{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T22:24:08Z","timestamp":1775082248296,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":24,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819584048","type":"print"},{"value":"9789819584055","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-8405-5_28","type":"book-chapter","created":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T20:14:37Z","timestamp":1775074477000},"page":"518-536","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Efficient SpMV: A Multi-Aware Optimization Framework for\u00a0Heterogeneous Architecture"],"prefix":"10.1007","author":[{"given":"Yang","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zexin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zenghui","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yonghua","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,2]]},"reference":[{"key":"28_CR1","doi-asserted-by":"publisher","first-page":"8883","DOI":"10.1007\/s11227-020-03186-1","volume":"76","author":"M Barreda","year":"2020","unstructured":"Barreda, M., Dolz, M.F., Castano, M.A., Alonso-Jord\u00e1, P., Quintana-Orti, E.S.: Performance modeling of the sparse matrix-vector product via convolutional neural networks. J. Supercomput. 76, 8883\u20138900 (2020)","journal-title":"J. Supercomput."},{"issue":"5\u20136","key":"28_CR2","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1016\/j.parco.2014.03.012","volume":"40","author":"U Bor\u0161tnik","year":"2014","unstructured":"Bor\u0161tnik, U., VandeVondele, J., Weber, V., Hutter, J.: Sparse matrix multiplication: the distributed block-compressed sparse row library. Parallel Comput. 40(5\u20136), 47\u201358 (2014)","journal-title":"Parallel Comput."},{"issue":"7","key":"28_CR3","doi-asserted-by":"publisher","first-page":"1422","DOI":"10.1080\/00207160.2014.942298","volume":"92","author":"Z Chen","year":"2015","unstructured":"Chen, Z., Liu, H., Yang, B.: Accelerating iterative linear solvers using multiple graphical processing units. Int. J. Comput. Math. 92(7), 1422\u20131438 (2015)","journal-title":"Int. J. Comput. Math."},{"issue":"1","key":"28_CR4","first-page":"1","volume":"38","author":"TA Davis","year":"2011","unstructured":"Davis, T.A., Hu, Y.: The university of florida sparse matrix collection. ACM Trans. Math. Softw. (TOMS) 38(1), 1\u201325 (2011)","journal-title":"ACM Trans. Math. Softw. (TOMS)"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Du, Z., Li, J., Wang, Y., Li, X., Tan, G., Sun, N.: Alphasparse: generating high performance spmv codes directly from sparse matrices. In: SC22: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201315. IEEE (2022)","DOI":"10.1109\/SC41404.2022.00071"},{"key":"28_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2023.104799","volume":"185","author":"J Gao","year":"2024","unstructured":"Gao, J., Ji, W., Liu, J., Wang, Y., Shi, F.: Revisiting thread configuration of SPMV kernels on GPU: a machine learning based approach. J. Parallel Distributed Comput. 185, 104799 (2024)","journal-title":"J. Parallel Distributed Comput."},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Huang, Y., Li, D.: Performance modeling for optimal data placement on GPU with heterogeneous memory systems. In: 2017 IEEE International Conference on Cluster Computing (CLUSTER), pp. 166\u2013177. IEEE (2017)","DOI":"10.1109\/CLUSTER.2017.42"},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Kabir, H., Booth, J.D., Raghavan, P.: A multilevel compressed sparse row format for efficient sparse computations on multicore processors. In: 2014 21st International Conference on High Performance Computing (HiPC), pp. 1\u201310. IEEE (2014)","DOI":"10.1109\/HiPC.2014.7116882"},{"key":"28_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2023.102997","volume":"115","author":"PA Lane","year":"2023","unstructured":"Lane, P.A., Booth, J.D.: Heterogeneous sparse matrix-vector multiplication via compressed sparse row format. Parallel Comput. 115, 102997 (2023)","journal-title":"Parallel Comput."},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Li, J., Tan, G., Chen, M., Sun, N.: Smat: an input adaptive auto-tuner for sparse matrix-vector multiplication. In: Proceedings of the 34th ACM SIGPLAN Conference on Programming Language Design and Implementation, pp. 117\u2013126 (2013)","DOI":"10.1145\/2491956.2462181"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Li, J., Xu, J., Li, S., Huang, S., Liu, J., Lian, Y., Dai, G.: Fast and efficient 2-bit LLM inference on GPU: 2\/4\/16-bit in a weight matrix with asynchronous dequantization. In: Proceedings of the 43rd IEEE\/ACM International Conference on Computer-Aided Design. ICCAD \u201924, Association for Computing Machinery, New York (2025)","DOI":"10.1145\/3676536.3676796"},{"issue":"7","key":"28_CR12","first-page":"1842","volume":"32","author":"M Li","year":"2020","unstructured":"Li, M., Ao, Y., Yang, C.: Adaptive SPMV\/SPMSPV on GPUS for input vectors of varied sparsity. IEEE Trans. Parallel Distrib. Syst. 32(7), 1842\u20131853 (2020)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"6","key":"28_CR13","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1007\/s42514-024-00195-x","volume":"6","author":"J Liu","year":"2024","unstructured":"Liu, J., Wang, Y., Gao, J., Ji, W.: PSPMV: precision-based sparse matrix partition and SPMV optimization. CCF Trans. High Perf. Comput. 6(6), 549\u2013565 (2024)","journal-title":"CCF Trans. High Perf. Comput."},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Liu, W., Vinter, B.: Csr5: an efficient storage format for cross-platform sparse matrix-vector multiplication. In: Proceedings of the 29th ACM on International Conference on Supercomputing, pp. 339\u2013350 (2015)","DOI":"10.1145\/2751205.2751209"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Lu, Y., Liu, W.: Dasp: specific dense matrix multiply-accumulate units accelerated general sparse matrix-vector multiplication. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201314 (2023)","DOI":"10.1145\/3581784.3607051"},{"key":"28_CR16","doi-asserted-by":"crossref","unstructured":"Pang, M., Fei, X., Qu, P., Zhang, Y., Li, Z.: A row decomposition-based approach for sparse matrix multiplication on GPUS. In: Proceedings of the 29th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming, pp. 377\u2013389 (2024)","DOI":"10.1145\/3627535.3638470"},{"key":"28_CR17","doi-asserted-by":"crossref","unstructured":"Qiu, H., et al.: A conflict-aware divide-and-conquer algorithm for symmetric sparse matrix-vector multiplication. In: SC24: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201315. IEEE (2024)","DOI":"10.1109\/SC41406.2024.00054"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Sedaghati, N., Mu, T., Pouchet, L.N., Parthasarathy, S., Sadayappan, P.: Automatic selection of sparse matrix representation on GPUS. In: Proceedings of the 29th ACM on International Conference on Supercomputing, pp. 99\u2013108 (2015)","DOI":"10.1145\/2751205.2751244"},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Sim, J., Dasgupta, A., Kim, H., Vuduc, R.: A performance analysis framework for identifying potential benefits in GPGPU applications. In: Proceedings of the 17th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 11\u201322 (2012)","DOI":"10.1145\/2145816.2145819"},{"key":"28_CR20","doi-asserted-by":"crossref","unstructured":"Vuduc, R.W., Moon, H.J.: Fast sparse matrix-vector multiplication by exploiting variable block structure. In: High Performance Computing and Communications: First International Conference, HPCC 2005, pp. 807\u2013816, Sorrento, Italy, September 21-23, 2005. Proceedings 1. Springer (2005)","DOI":"10.1007\/11557654_91"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Xie, Z., Liu, J., Li, J., Li, D.: Merchandiser: data placement on heterogeneous memory for task-parallel HPC applications with load-balance awareness. In: Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming, pp. 204\u2013217 (2023)","DOI":"10.1145\/3572848.3577497"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Yan, S., Li, C., Zhang, Y., Zhou, H.: yaspmv: yet another spmv framework on gpus. In: Proceedings of the 19th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming. p. 107\u2013118. PPoPP \u201914 (2014)","DOI":"10.1145\/2555243.2555255"},{"key":"28_CR23","doi-asserted-by":"crossref","unstructured":"Yesil, S., Heidarshenas, A., Morrison, A., Torrellas, J.: Wise: predicting the performance of sparse matrix vector multiplication with machine learning. In: Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming, pp. 329\u2013341 (2023)","DOI":"10.1145\/3572848.3577506"},{"key":"28_CR24","doi-asserted-by":"crossref","unstructured":"Zeng, S., et\u00a0al.: FLIGHTLLM: efficient large language model inference with a complete mapping flow on FPGAS. In: Proceedings of the 2024 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays, pp. 223\u2013234 (2024)","DOI":"10.1145\/3626202.3637562"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-8405-5_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T20:14:39Z","timestamp":1775074479000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-8405-5_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819584048","9789819584055"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-8405-5_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ieee-cybermatics.org\/2025\/ica3pp\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}