{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T21:01:18Z","timestamp":1751662878695},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,5,8]],"date-time":"2023-05-08T00:00:00Z","timestamp":1683504000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,8]],"date-time":"2023-05-08T00:00:00Z","timestamp":1683504000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s42514-023-00140-4","type":"journal-article","created":{"date-parts":[[2023,5,8]],"date-time":"2023-05-08T11:01:57Z","timestamp":1683543717000},"page":"210-227","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Sgap: towards efficient sparse tensor algebra compilation for GPU"],"prefix":"10.1007","volume":"5","author":[{"given":"Genghan","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yuetong","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Yanting","family":"Tao","sequence":"additional","affiliation":[]},{"given":"Zhongming","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Guohao","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Sitao","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Yuan","family":"Wen","sequence":"additional","affiliation":[]},{"given":"Pavlos","family":"Petoumenos","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,5,8]]},"reference":[{"key":"140_CR1","doi-asserted-by":"crossref","unstructured":"Asgari, B., Hadidi, R., Cao, J., Lim, S.-K., Kim, H., et al.: Fafnir: Accelerating sparse gathering by using efficient near-memory intelligent reduction. In: 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 908\u2013920 (2021). IEEE","DOI":"10.1109\/HPCA51647.2021.00080"},{"key":"140_CR2","doi-asserted-by":"crossref","unstructured":"Bell, N., Garland, M.: Implementing sparse matrix-vector multiplication on throughput-oriented processors. In: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis, pp. 1\u201311 (2009)","DOI":"10.1145\/1654059.1654078"},{"issue":"4","key":"140_CR3","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1137\/110838844","volume":"34","author":"N Bell","year":"2012","unstructured":"Bell, N., Dalton, S., Olson, L.N.: Exposing fine-grained parallelism in algebraic multigrid methods. SIAM J. Sci. Comput. 34(4), 123\u2013152 (2012)","journal-title":"SIAM J. Sci. Comput."},{"key":"140_CR4","doi-asserted-by":"crossref","unstructured":"Bik, A.J., Koanantakool, P., Shpeisman, T., Vasilache, N., Zheng, B., Kjolstad, F.: Compiler support for sparse tensor computations in mlir. arXiv:2202.04305 (2022)","DOI":"10.1145\/3544559"},{"key":"140_CR5","doi-asserted-by":"crossref","unstructured":"Bik, A.J., Wijshoff, H.A.: Compilation techniques for sparse matrix computations. In: Proceedings of the 7th International Conference on Supercomputing, pp. 416\u2013424 (1993)","DOI":"10.1145\/165939.166023"},{"key":"140_CR6","unstructured":"Chen, T., Moreau, T., Jiang, Z., Zheng, L., Yan, E., Shen, H., Cowan, M., Wang, L., Hu, Y., Ceze, L., et al.: Tvm: An automated end-to-end optimizing compiler for deep learning. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pp. 578\u2013594 (2018)"},{"key":"140_CR7","doi-asserted-by":"publisher","unstructured":"Chou, S., Kjolstad, F., Amarasinghe, S.: Format abstraction for sparse tensor algebra compilers. Proc. ACM Program. Lang. 2(OOPSLA), 123\u2013112330 (2018). https:\/\/doi.org\/10.1145\/3276493","DOI":"10.1145\/3276493"},{"issue":"6","key":"140_CR8","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1145\/362384.362685","volume":"13","author":"EF Codd","year":"1970","unstructured":"Codd, E.F.: A relational model of data for large shared data banks. Commun. ACM 13(6), 377\u2013387 (1970)","journal-title":"Commun. ACM"},{"key":"140_CR9","doi-asserted-by":"crossref","unstructured":"Dai, G., Huang, G., Yang, S., Yu, Z., Zhang, H., Ding, Y., Xie, Y., Yang, H., Wang, Y.: Heuristic adaptability to input dynamics for spmm on gpus. arXiv:2202.08556 (2022)","DOI":"10.1145\/3489517.3530508"},{"key":"140_CR10","doi-asserted-by":"crossref","unstructured":"Feng, S., Hou, B., Jin, H., Lin, W., Shao, J., Lai, R., Ye, Z., Zheng, L., Yu, C.H., Yu, Y., et al.: Tensorir: An abstraction for automatic tensorized program optimization. arXiv:2207.04296 (2022)","DOI":"10.1145\/3575693.3576933"},{"key":"140_CR11","unstructured":"Guennebaud, G., Jacob, B., et al.: Eigen. 3 http:\/\/eigen. tuxfamily.org (2010)"},{"key":"140_CR12","unstructured":"Hamilton, W., Ying, Z., Leskovec, J.: Inductive representation learning on large graphs. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"issue":"3","key":"140_CR13","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1145\/3007787.3001163","volume":"44","author":"S Han","year":"2016","unstructured":"Han, S., Liu, X., Mao, H., Pu, J., Pedram, A., Horowitz, M.A., Dally, W.J.: Eie: efficient inference engine on compressed deep neural network. ACM SIGARCH Comput. Archit. News 44(3), 243\u2013254 (2016)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"key":"140_CR14","doi-asserted-by":"crossref","unstructured":"Hidayeto\u011flu, M., Pearson, C., Mailthody, V.S., Ebrahimi, E., Xiong, J., Nagi, R., Hwu, W.-m.: At-scale sparse deep neural network inference with efficient gpu implementation. In: 2020 IEEE High Performance Extreme Computing Conference (HPEC), pp. 1\u20137 (2020). IEEE","DOI":"10.1109\/HPEC43674.2020.9286206"},{"key":"140_CR15","doi-asserted-by":"crossref","unstructured":"Hong, C., Sukumaran-Rajam, A., Nisa, I., Singh, K., Sadayappan, P.: Adaptive sparse tiling for sparse matrix multiplication. In: Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming, pp. 300\u2013314 (2019)","DOI":"10.1145\/3293883.3295712"},{"key":"140_CR16","doi-asserted-by":"crossref","unstructured":"Huang, G., Dai, G., Wang, Y., Yang, H.: Ge-spmm: general-purpose sparse matrix-matrix multiplication on gpus for graph neural networks. In: SC20: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312 (2020). IEEE","DOI":"10.1109\/SC41405.2020.00076"},{"key":"140_CR17","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. arXiv:1609.02907 (2016)"},{"key":"140_CR18","doi-asserted-by":"crossref","unstructured":"Kjolstad, F., Ahrens, P., Kamil, S., Amarasinghe, S.: Tensor algebra compilation with workspaces, 180\u2013192 (2019)","DOI":"10.1109\/CGO.2019.8661185"},{"key":"140_CR19","doi-asserted-by":"publisher","unstructured":"Kjolstad, F., Kamil, S., Chou, S., Lugato, D., Amarasinghe, S.: The tensor algebra compiler. Proc. ACM Program. Lang. 1(OOPSLA), 77\u201317729 (2017). https:\/\/doi.org\/10.1145\/3133901","DOI":"10.1145\/3133901"},{"key":"140_CR20","unstructured":"Kjolstad, F.: Sparse tensor algebra compilation. Ph.d. thesis, Massachusetts Institute of Technology, Cambridge, MA (2020). http:\/\/tensor-compiler.org\/files\/kjolstad-phd-thesis-taco-compiler.pdf"},{"issue":"3","key":"140_CR21","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1137\/07070111X","volume":"51","author":"TG Kolda","year":"2009","unstructured":"Kolda, T.G., Bader, B.W.: Tensor decompositions and applications. SIAM Rev. 51(3), 455\u2013500 (2009)","journal-title":"SIAM Rev."},{"key":"140_CR22","doi-asserted-by":"crossref","unstructured":"Kurt, S.E., Raje, S., Sukumaran-Rajam, A., Sadayappan, P.: Sparsity-aware tensor decomposition. In: 2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 952\u2013962 (2022). IEEE","DOI":"10.1109\/IPDPS53621.2022.00097"},{"key":"140_CR23","unstructured":"Lin, C.-Y., Luo, L., Ceze, L.: Accelerating spmm kernel with cache-first edge sampling for graph neural networks. arXiv:2104.10716 (2021)"},{"key":"140_CR24","unstructured":"Liu, B., Wang, M., Foroosh, H., Tappen, M., Pensky, M.: Sparse convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 806\u2013814 (2015)"},{"key":"140_CR25","doi-asserted-by":"crossref","unstructured":"Mehrabi, A., Lee, D., Chatterjee, N., Sorin, D.J., Lee, B.C., O\u2019Connor, M.: Learning sparse matrix row permutations for efficient spmm on gpu architectures. In: 2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pp. 48\u201358 (2021). IEEE","DOI":"10.1109\/ISPASS51385.2021.00016"},{"key":"140_CR26","unstructured":"Naumov, M., Chien, L., Vandermersch, P., Kapasi, U.: Cusparse library. In: GPU Technology Conference (2010)"},{"key":"140_CR27","doi-asserted-by":"crossref","unstructured":"Nisa, I., Li, J., Sukumaran-Rajam, A., Vuduc, R., Sadayappan, P.: Load-balanced sparse mttkrp on gpus. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 123\u2013133 (2019). IEEE","DOI":"10.1109\/IPDPS.2019.00023"},{"key":"140_CR28","doi-asserted-by":"crossref","unstructured":"Popoola, T., Shankar, R., Rift, A., Singh, S., Davis, E.C., Strout, M.M., Olschanowsky, C.: An object-oriented interface to the sparse polyhedral library. In: 2021 IEEE 45th Annual Computers, Software, and Applications Conference (COMPSAC), pp. 1825\u20131831 (2021). IEEE","DOI":"10.1109\/COMPSAC51774.2021.00275"},{"key":"140_CR29","unstructured":"Qin, E., Garg, R., Bambhaniya, A., Pellauer, M., Parashar, A., Rajamanickam, S., Hao, C., Krishna, T.: Enabling flexibility for sparse tensor acceleration via heterogeneity. arXiv:2201.08916 (2022)"},{"key":"140_CR30","doi-asserted-by":"publisher","unstructured":"Senanayake, R., Hong, C., Wang, Z., Wilson, A., Chou, S., Kamil, S., Amarasinghe, S., Kjolstad, F.: A sparse iteration space transformation framework for sparse tensor algebra. Proc. ACM Program. Lang. 4(OOPSLA) (2020). https:\/\/doi.org\/10.1145\/3428226","DOI":"10.1145\/3428226"},{"key":"140_CR31","doi-asserted-by":"crossref","unstructured":"Shantharam, M., Srinivasmurthy, S., Raghavan, P.: Characterizing the impact of soft errors on iterative methods in scientific computing. In: Proceedings of the International Conference on Supercomputing, pp. 152\u2013161 (2011)","DOI":"10.1145\/1995896.1995922"},{"issue":"11","key":"140_CR32","doi-asserted-by":"publisher","first-page":"1921","DOI":"10.1109\/JPROC.2018.2857721","volume":"106","author":"MM Strout","year":"2018","unstructured":"Strout, M.M., Hall, M., Olschanowsky, C.: The sparse polyhedral framework: composing compiler-generated inspector-executor code. Proc. IEEE 106(11), 1921\u20131934 (2018)","journal-title":"Proc. IEEE"},{"issue":"6","key":"140_CR33","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1145\/2813885.2738003","volume":"50","author":"A Venkat","year":"2015","unstructured":"Venkat, A., Hall, M., Strout, M.: Loop and data transformations for sparse matrix code. ACM SIGPLAN Not. 50(6), 521\u2013532 (2015)","journal-title":"ACM SIGPLAN Not."},{"key":"140_CR34","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wohlwend, J., Lei, T.: Structured pruning of large language models. arXiv:1910.04732 (2019)","DOI":"10.18653\/v1\/2020.emnlp-main.496"},{"key":"140_CR35","doi-asserted-by":"crossref","unstructured":"Wang, E., Zhang, Q., Shen, B., Zhang, G., Lu, X., Wu, Q., Wang, Y.: Intel math kernel library. In: High-Performance Computing on the Intel\u00ae Xeon Phi$$^{{\\rm TM}}$$, pp. 167\u2013188. Springer, Cham (2014)","DOI":"10.1007\/978-3-319-06486-4_7"},{"key":"140_CR36","doi-asserted-by":"crossref","unstructured":"Xin, J., Ye, X., Zheng, L., Wang, Q., Huang, Y., Yao, P., Yu, L., Liao, X., Jin, H.: Fast sparse deep neural network inference with flexible spmm optimization space exploration. In: 2021 IEEE High Performance Extreme Computing Conference (HPEC), pp. 1\u20137 (2021). IEEE","DOI":"10.1109\/HPEC49654.2021.9622791"},{"key":"140_CR37","doi-asserted-by":"crossref","unstructured":"Yang, C., Bulu\u00e7, A., Owens, J.D.: Design principles for sparse matrix multiplication on the gpu. In: European Conference on Parallel Processing, pp. 672\u2013687 (2018). Springer","DOI":"10.1007\/978-3-319-96983-1_48"},{"key":"140_CR38","unstructured":"Ye, Z., Lai, R., Shao, J., Chen, T., Ceze, L.: Sparsetir: composable abstractions for sparse compilation in deep learning"},{"key":"140_CR39","doi-asserted-by":"crossref","unstructured":"Yu, Z., Dai, G., Huang, G., Wang, Y., Yang, H.: Exploiting online locality and reduction parallelism for sampled dense matrix multiplication on gpus. In: 2021 IEEE 39th International Conference on Computer Design (ICCD), pp. 567\u2013574 (2021). IEEE","DOI":"10.1109\/ICCD53106.2021.00092"},{"key":"140_CR40","unstructured":"Yuster, R., Zwick, U.: Detecting short directed cycles using rectangular matrix multiplication and dynamic programming. In: SODA, vol. 4, pp. 254\u2013260 (2004). Citeseer"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00140-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-023-00140-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00140-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,26]],"date-time":"2023-06-26T09:14:29Z","timestamp":1687770869000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-023-00140-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,8]]},"references-count":40,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["140"],"URL":"https:\/\/doi.org\/10.1007\/s42514-023-00140-4","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"value":"2524-4922","type":"print"},{"value":"2524-4930","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,8]]},"assertion":[{"value":"3 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 March 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 May 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}