{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:11:27Z","timestamp":1757617887825,"version":"3.44.0"},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T00:00:00Z","timestamp":1745798400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s10586-024-04966-7","type":"journal-article","created":{"date-parts":[[2025,4,28]],"date-time":"2025-04-28T10:50:40Z","timestamp":1745837440000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Block strategy and adaptive storage for sparse matrix\u2013vector multiplication on GPU"],"prefix":"10.1007","volume":"28","author":[{"given":"Zhixiang","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanxia","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guoyin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiqing","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haibo","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,4,28]]},"reference":[{"key":"4966_CR1","doi-asserted-by":"crossref","unstructured":"Riazi, M.S., Weinert, C., Tkachenko, O., Songhori, E.M., Schneider, T., Koushanfar, F.: Chameleon: a hybrid secure computation framework for machine learning applications. In: Proceedings of the 2018 on Asia Conference on Computer and Communications Security, pp. 707\u2013721 (2018)","DOI":"10.1145\/3196494.3196522"},{"key":"4966_CR2","doi-asserted-by":"crossref","unstructured":"Vasireddy, P., Kavi, K., Mehta, G.: Sparse-t: hardware accelerator thread for unstructured sparse data processing. In: Proceedings of the 41st IEEE\/ACM International Conference on Computer-Aided Design, pp. 1\u20138 (2022)","DOI":"10.1145\/3508352.3549441"},{"issue":"1","key":"4966_CR3","first-page":"1","volume":"1","author":"T Wu","year":"2023","unstructured":"Wu, T., Cheng, J., Zhang, C., Hou, J., Chen, G., Huang, Z., Zhang, W., Han, W., Bai, B.: ClipSim: a GPU-friendly parallel framework for single-source simrank with accuracy guarantee. Proc. ACM Manag. Data 1(1), 1\u201326 (2023)","journal-title":"Proc. ACM Manag. Data"},{"key":"4966_CR4","doi-asserted-by":"crossref","unstructured":"Horro, M., Pouchet, L.-N., Rodr\u00edguez, G., Touri\u00f1o, J.: Custom high-performance vector code generation for data-specific sparse computations. In: Proceedings of the International Conference on Parallel Architectures and Compilation Techniques, pp. 160\u2013171 (2022)","DOI":"10.1145\/3559009.3569668"},{"issue":"1","key":"4966_CR5","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/s12200-022-00009-4","volume":"15","author":"J Cheng","year":"2022","unstructured":"Cheng, J., Zhao, Y., Zhang, W., Zhou, H., Huang, D., Zhu, Q., Guo, Y., Xu, B., Dong, J., Zhang, X.: A small microring array that performs large complex-valued matrix\u2013vector multiplication. Front. Optoelectron. 15(1), 15 (2022)","journal-title":"Front. Optoelectron."},{"issue":"4","key":"4966_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3017994","volume":"43","author":"S Filippone","year":"2017","unstructured":"Filippone, S., Cardellini, V., Barbieri, D., Fanfarillo, A.: Sparse matrix\u2013vector multiplication on GPGPUs. ACM Trans. Math. Softw. 43(4), 1\u201349 (2017). https:\/\/doi.org\/10.1145\/3017994","journal-title":"ACM Trans. Math. Softw."},{"key":"4966_CR7","doi-asserted-by":"publisher","unstructured":"Mehmood, R.: Performance enhancement strategies for sparse matrix\u2013vector multiplication (SpMV) and iterative linear solvers. arXiv (2022). https:\/\/doi.org\/10.48550\/arXiv.2212.07490","DOI":"10.48550\/arXiv.2212.07490"},{"key":"4966_CR8","unstructured":"Gao, J., Liu, B., Ji, W., Huang, H.: A systematic literature survey of sparse matrix\u2013vector multiplication. arXiv (2024)"},{"issue":"1","key":"4966_CR9","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1007\/s11227-008-0251-8","volume":"50","author":"G Goumas","year":"2009","unstructured":"Goumas, G., Kourtis, K., Anastopoulos, N., Karakasis, V., Koziris, N.: Performance evaluation of the sparse matrix-vector multiplication on modern architectures. J. Supercomput. 50(1), 36\u201377 (2009). https:\/\/doi.org\/10.1007\/s11227-008-0251-8","journal-title":"J. Supercomput."},{"issue":"8","key":"4966_CR10","doi-asserted-by":"publisher","first-page":"2982","DOI":"10.1109\/TMAG.2010.2043511","volume":"46","author":"MM Dehnavi","year":"2010","unstructured":"Dehnavi, M.M., Fernandez, D.M., Giannacopoulos, D.: Finite-element sparse matrix vector multiplication on graphic processing units. IEEE Trans. Magn. 46(8), 2982\u20132985 (2010). https:\/\/doi.org\/10.1109\/TMAG.2010.2043511","journal-title":"IEEE Trans. Magn."},{"issue":"4","key":"4966_CR11","doi-asserted-by":"publisher","first-page":"231","DOI":"10.14778\/1938545.1938548","volume":"4","author":"X Yang","year":"2011","unstructured":"Yang, X., Parthasarathy, S., Sadayappan, P.: Fast sparse matrix\u2013vector multiplication on GPUs: implications for graph mining. Proc. VLDB Endow. 4(4), 231\u2013242 (2011). https:\/\/doi.org\/10.14778\/1938545.1938548","journal-title":"Proc. VLDB Endow."},{"issue":"8","key":"4966_CR12","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1016\/j.parco.2011.08.003","volume":"38","author":"F V\u00e1zquez","year":"2012","unstructured":"V\u00e1zquez, F., Fern\u00e1ndez, J.J., Garz\u00f3n, E.M.: Automatic tuning of the sparse matrix vector product on GPUs based on the ELLR-T approach. Parallel Comput. 38(8), 408\u2013420 (2012). https:\/\/doi.org\/10.1016\/j.parco.2011.08.003","journal-title":"Parallel Comput."},{"issue":"5","key":"4966_CR13","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1137\/130930352","volume":"36","author":"M Kreutzer","year":"2014","unstructured":"Kreutzer, M., Hager, G., Wellein, G., Fehske, H., Bishop, A.R.: A unified sparse matrix data format for efficient general sparse matrix\u2013vector multiplication on modern processors with wide SIMD units. SIAM J. Sci. Comput. 36(5), 401\u2013423 (2014). https:\/\/doi.org\/10.1137\/130930352","journal-title":"SIAM J. Sci. Comput."},{"issue":"3","key":"4966_CR14","doi-asserted-by":"publisher","first-page":"431","DOI":"10.1007\/s11704-014-4127-1","volume":"9","author":"CC Yan","year":"2015","unstructured":"Yan, C.C., Yu, H., Xu, W., Zhang, Y., Chen, B., Tian, Z., Wang, Y., Yin, J.: Memory bandwidth optimization of SpMV on GPGPUs. Front. Comput. Sci. 9(3), 431\u2013441 (2015). https:\/\/doi.org\/10.1007\/s11704-014-4127-1","journal-title":"Front. Comput. Sci."},{"key":"4966_CR15","doi-asserted-by":"publisher","first-page":"490","DOI":"10.1016\/j.future.2015.03.005","volume":"54","author":"J Zhang","year":"2016","unstructured":"Zhang, J., Wan, J., Li, F., Mao, J., Zhuang, L., Yuan, J., Liu, E., Yu, Z.: Efficient sparse matrix\u2013vector multiplication using cache oblivious extension quadtree storage format. Futur. Gener. Comput. Syst. 54, 490\u2013500 (2016). https:\/\/doi.org\/10.1016\/j.future.2015.03.005","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"1","key":"4966_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3480935","volume":"48","author":"H Anzt","year":"2020","unstructured":"Anzt, H., Cojean, T., Flegar, G., G\u00f6bel, F., Gr\u00fctzmacher, T., Nayak, P., Ribizel, T., Tsai, Y.M., Quintana-Ort\u00ed, E.S.: Ginkgo: a modern linear operator algebra framework for high performance computing. ACM Trans. Math. Softw. 48(1), 1\u201333 (2020). https:\/\/doi.org\/10.1145\/3480935","journal-title":"ACM Trans. Math. Softw."},{"key":"4966_CR17","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1016\/j.future.2020.02.076","volume":"108","author":"Z Tan","year":"2020","unstructured":"Tan, Z., Ji, W., Gao, J., Zhao, Y., Benatia, A., Wang, Y., Shi, F.: MMSparse: 2D partitioning of sparse matrix based on mathematical morphology. Futur. Gener. Comput. Syst. 108, 521\u2013532 (2020). https:\/\/doi.org\/10.1016\/j.future.2020.02.076","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"1","key":"4966_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3380930","volume":"7","author":"H Anzt","year":"2020","unstructured":"Anzt, H., Cojean, T., Yen-Chen, C., Dongarra, J., Flegar, G., Nayak, P., Tomov, S., Tsai, Y.M., Wang, W.: Load-balancing sparse matrix vector product kernels on GPUs. ACM Trans. Parallel Comput. 7(1), 1\u201326 (2020). https:\/\/doi.org\/10.1145\/3380930","journal-title":"ACM Trans. Parallel Comput."},{"key":"4966_CR19","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/j.jpdc.2021.07.007","volume":"157","author":"J Gao","year":"2021","unstructured":"Gao, J., Xia, Y., Yin, R., He, G.: Adaptive diagonal sparse matrix\u2013vector multiplication on GPU. J. Parallel Distrib. Comput. 157, 287\u2013302 (2021). https:\/\/doi.org\/10.1016\/j.jpdc.2021.07.007","journal-title":"J. Parallel Distrib. Comput."},{"key":"4966_CR20","doi-asserted-by":"publisher","unstructured":"Niu, Y., Lu, Z., Dong, M., Jin, Z., Liu, W., Tan, G.: TileSpMV: a tiled algorithm for sparse matrix\u2013vector multiplication on GPUs. In: 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 68\u201378. IEEE, Portland (2021). https:\/\/doi.org\/10.1109\/ipdps49936.2021.00016","DOI":"10.1109\/ipdps49936.2021.00016"},{"issue":"5","key":"4966_CR21","doi-asserted-by":"publisher","first-page":"6318","DOI":"10.1007\/s11227-021-04123-6","volume":"78","author":"H Cui","year":"2022","unstructured":"Cui, H., Wang, N., Wang, Y., Han, Q., Xu, Y.: An effective SpMV based on block strategy and hybrid compression on GPU. J. Supercomput. 78(5), 6318\u20136339 (2022). https:\/\/doi.org\/10.1007\/s11227-021-04123-6","journal-title":"J. Supercomput."},{"issue":"12","key":"4966_CR22","doi-asserted-by":"publisher","first-page":"3977","DOI":"10.1109\/tpds.2022.3177291","volume":"33","author":"E Karimi","year":"2022","unstructured":"Karimi, E., Agostini, N.B., Dong, S., Kaeli, D.: VCSR: An efficient GPU memory-aware sparse format. IEEE Trans. Parallel Distrib. Syst. 33(12), 3977\u20133989 (2022). https:\/\/doi.org\/10.1109\/tpds.2022.3177291","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"1","key":"4966_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2049662.2049663","volume":"38","author":"TA Davis","year":"2011","unstructured":"Davis, T.A., Hu, Y.: The university of Florida sparse matrix collection. ACM Trans. Math. Softw. 38(1), 1\u201325 (2011). https:\/\/doi.org\/10.1145\/2049662.2049663","journal-title":"ACM Trans. Math. Softw."},{"key":"4966_CR24","unstructured":"NVIDIA: Turing Architecture Whitepaper (2018)"},{"key":"4966_CR25","unstructured":"NVIDIA: Ampere Architecture Whitepaper (2021)"},{"issue":"12","key":"4966_CR26","first-page":"379","volume":"7","author":"T Davis","year":"2007","unstructured":"Davis, T.: Wilkinson\u2019s sparse matrix definition. NA Digest 7(12), 379\u2013401 (2007)","journal-title":"NA Digest"},{"key":"4966_CR27","doi-asserted-by":"publisher","unstructured":"Bell, N., Garland, M.: Implementing sparse matrix-vector multiplication on throughput-oriented processors. In: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis, pp. 1\u201311. ACM, Portland Oregon (2009). https:\/\/doi.org\/10.1145\/1654059.1654078","DOI":"10.1145\/1654059.1654078"},{"key":"4966_CR28","doi-asserted-by":"publisher","unstructured":"Maggioni, M., Berger-Wolf, T., Liang, J.: GPU-based steady-state solution of the chemical master equation. In: 2013 IEEE International Symposium on Parallel & Distributed Processing, Workshops and PhD Forum, pp. 579\u2013588. IEEE, Cambridge (2013). https:\/\/doi.org\/10.1109\/IPDPSW.2013.271","DOI":"10.1109\/IPDPSW.2013.271"},{"key":"4966_CR29","unstructured":"Anzt, H., Tomov, S., Dongarra, J.: Implementing a sparse matrix vector product for the SELL-C\/SELL-C-$$\\sigma$$ formats on NVIDIA GPU. University of Tennessee, Tech. Rep. ut-eecs-14-727 (2014)"},{"key":"4966_CR30","doi-asserted-by":"publisher","first-page":"49","DOI":"10.2528\/PIER11031607","volume":"116","author":"A Dziekonski","year":"2011","unstructured":"Dziekonski, A., Lamecki, A., Mrozowski, M.: A memory efficient and fast sparse matrix vector product on a GPU. Prog. Electromagn. Res. 116, 49\u201363 (2011). https:\/\/doi.org\/10.2528\/PIER11031607","journal-title":"Prog. Electromagn. Res."},{"key":"4966_CR31","doi-asserted-by":"publisher","unstructured":"Xu, W., Zhang, H., Jiao, S., Wang, D., Song, F., Liu, Z.: Optimizing sparse matrix vector multiplication using cache blocking method on fermi GPU. In: 2012 13th ACIS International Conference on Software Engineering, Artificial Intelligence, Networking and Parallel\/Distributed Computing, pp. 231\u2013235. IEEE, Kyoto (2012). https:\/\/doi.org\/10.1109\/SNPD.2012.20","DOI":"10.1109\/SNPD.2012.20"},{"issue":"12","key":"4966_CR32","doi-asserted-by":"publisher","first-page":"1784","DOI":"10.1002\/nme.4865","volume":"102","author":"J Wong","year":"2015","unstructured":"Wong, J., Kuhl, E., Darve, E.: A new sparse matrix vector multiplication graphics processing unit algorithm designed for finite element problems. Int. J. Numer. Methods Eng. 102(12), 1784\u20131814 (2015). https:\/\/doi.org\/10.1002\/nme.4865","journal-title":"Int. J. Numer. Methods Eng."},{"key":"4966_CR33","doi-asserted-by":"publisher","unstructured":"Muro, R., Fujii, A., Tanaka, T.: Acceleration of symmetric sparse matrix\u2013vector product using improved hierarchical diagonal blocking format. In: Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region, pp. 63\u201370. ACM, Guangzhou (2019). https:\/\/doi.org\/10.1145\/3293320.3293332","DOI":"10.1145\/3293320.3293332"},{"issue":"3","key":"4966_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3226228","volume":"15","author":"A Benatia","year":"2018","unstructured":"Benatia, A., Ji, W., Wang, Y., Shi, F.: BestSF: A sparse meta-format for optimizing SpMV on GPU. ACM Trans. Archit. Code Optim. 15(3), 1\u201327 (2018). https:\/\/doi.org\/10.1145\/3226228","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"4966_CR35","doi-asserted-by":"publisher","first-page":"81279","DOI":"10.1109\/ACCESS.2019.2923565","volume":"7","author":"S Usman","year":"2019","unstructured":"Usman, S., Mehmood, R., Katib, I., Albeshri, A.: ZAKI+: A machine learning based process mapping tool for SpMV computations on distributed memory architectures. IEEE Access 7, 81279\u201381296 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2923565","journal-title":"IEEE Access"},{"issue":"1","key":"4966_CR36","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1177\/1094342019886628","volume":"34","author":"A Benatia","year":"2020","unstructured":"Benatia, A., Ji, W., Wang, Y., Shi, F.: Sparse matrix partitioning for optimizing SpMV on CPU-GPU heterogeneous platforms. Int. J. High Perform. Comput. Appl. 34(1), 66\u201380 (2020). https:\/\/doi.org\/10.1177\/1094342019886628","journal-title":"Int. J. High Perform. Comput. Appl."},{"issue":"6","key":"4966_CR37","doi-asserted-by":"publisher","first-page":"6313","DOI":"10.1007\/s11227-020-03489-3","volume":"77","author":"T Mohammed","year":"2021","unstructured":"Mohammed, T., Albeshri, A., Katib, I., Mehmood, R.: DIESEL: A novel deep learning-based tool for SpMV computations and solving sparse linear equation systems. J. Supercomput. 77(6), 6313\u20136355 (2021). https:\/\/doi.org\/10.1007\/s11227-020-03489-3","journal-title":"J. Supercomput."},{"key":"4966_CR38","doi-asserted-by":"publisher","unstructured":"Greathouse, J.L., Daga, M.: Efficient sparse matrix\u2013vector multiplication on GPUs using the CSR storage format. In: SC14: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 769\u2013780. IEEE, New Orleans (2014). https:\/\/doi.org\/10.1109\/SC.2014.68","DOI":"10.1109\/SC.2014.68"},{"key":"4966_CR39","doi-asserted-by":"crossref","unstructured":"Liu, W., Vinter, B.: CSR5: An efficient storage format for cross-platform sparse matrix\u2013vector multiplication. In: Proceedings of the 29th ACM on International Conference on Supercomputing, pp. 339\u2013350 (2015)","DOI":"10.1145\/2751205.2751209"},{"key":"4966_CR40","doi-asserted-by":"publisher","unstructured":"Merrill, D., Garland, M.: Merge-based parallel sparse matrix\u2013vector multiplication. In: SC16: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 678\u2013689. IEEE, Salt Lake City (2016). https:\/\/doi.org\/10.1109\/SC.2016.57","DOI":"10.1109\/SC.2016.57"},{"key":"4966_CR41","doi-asserted-by":"publisher","unstructured":"Steinberger, M., Zayer, R., Seidel, H.-P.: Globally homogeneous, locally adaptive sparse matrix\u2013vector multiplication on the GPU. In: Proceedings of the International Conference on Supercomputing, pp. 1\u201311. ACM, Chicago (2017). https:\/\/doi.org\/10.1145\/3079079.3079086","DOI":"10.1145\/3079079.3079086"},{"issue":"1","key":"4966_CR42","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/s11265-016-1216-4","volume":"90","author":"Y Liu","year":"2018","unstructured":"Liu, Y., Schmidt, B.: LightSpMV: faster CUDA-compatible sparse matrix\u2013vector multiplication using compressed sparse rows. J. Signal Process. Syst. 90(1), 69\u201386 (2018). https:\/\/doi.org\/10.1007\/s11265-016-1216-4","journal-title":"J. Signal Process. Syst."},{"issue":"7","key":"4966_CR43","doi-asserted-by":"publisher","first-page":"2639","DOI":"10.1016\/j.jpdc.2014.03.002","volume":"74","author":"C Zheng","year":"2014","unstructured":"Zheng, C., Gu, S., Gu, T.-X., Yang, B., Liu, X.-P.: BiELL: A bisection ELLPACK-based storage format for optimizing SpMV on GPUs. J. Parallel Distrib. Comput. 74(7), 2639\u20132647 (2014). https:\/\/doi.org\/10.1016\/j.jpdc.2014.03.002","journal-title":"J. Parallel Distrib. Comput."},{"key":"4966_CR44","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.jpdc.2014.11.001","volume":"76","author":"A Ashari","year":"2015","unstructured":"Ashari, A., Sedaghati, N., Eisenlohr, J., Sadayappan, P.: A model-driven blocking strategy for load balanced sparse matrix-vector multiplication on GPUs. J. Parallel Distrib. Comput. 76, 3\u201315 (2015). https:\/\/doi.org\/10.1016\/j.jpdc.2014.11.001","journal-title":"J. Parallel Distrib. Comput."},{"issue":"4","key":"4966_CR45","doi-asserted-by":"publisher","first-page":"3431","DOI":"10.1007\/s10586-021-03340-1","volume":"24","author":"H Bian","year":"2021","unstructured":"Bian, H., Huang, J., Dong, R., Guo, Y., Liu, L., Huang, D., Wang, X.: A simple and efficient storage format for SIMD-accelerated SpMV. Clust. Comput. 24(4), 3431\u20133448 (2021). https:\/\/doi.org\/10.1007\/s10586-021-03340-1","journal-title":"Clust. Comput."},{"issue":"2","key":"4966_CR46","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1177\/1094342013501126","volume":"28","author":"W Yang","year":"2014","unstructured":"Yang, W., Li, K., Liu, Y., Shi, L., Wan, L.: Optimization of quasi-diagonal matrix\u2013vector multiplication on GPU. Int. J. High Perform. Comput. Appl. 28(2), 183\u2013195 (2014). https:\/\/doi.org\/10.1177\/1094342013501126","journal-title":"Int. J. High Perform. Comput. Appl."},{"issue":"1","key":"4966_CR47","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1002\/cpe.2978","volume":"26","author":"X Feng","year":"2014","unstructured":"Feng, X., Jin, H., Zheng, R., Shao, Z., Zhu, L.: A segment-based sparse matrix\u2013vector multiplication on CUDA. Concurr. Comput. Pract. Exp. 26(1), 271\u2013286 (2014). https:\/\/doi.org\/10.1002\/cpe.2978","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"4966_CR48","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/j.jpdc.2016.12.023","volume":"104","author":"W Yang","year":"2017","unstructured":"Yang, W., Li, K., Li, K.: A hybrid computing method of SpMV on CPU\u2013GPU heterogeneous computing systems. J. Parallel Distrib. Comput. 104, 49\u201360 (2017). https:\/\/doi.org\/10.1016\/j.jpdc.2016.12.023","journal-title":"J. Parallel Distrib. Comput."},{"key":"4966_CR49","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1016\/j.jcss.2017.09.010","volume":"92","author":"W Yang","year":"2018","unstructured":"Yang, W., Li, K.: A parallel computing method using blocked format with optimal partitioning for SpMV on GPU. J. Comput. Syst. Sci. 92, 152\u2013170 (2018)","journal-title":"J. Comput. Syst. Sci."},{"key":"4966_CR50","doi-asserted-by":"publisher","unstructured":"Gkountouvas, T., Karakasis, V., Kourtis, K., Goumas, G., Koziris, N.: Improving the performance of the symmetric sparse matrix\u2013vector multiplication in multicore. In: 2013 IEEE 27th International Symposium on Parallel and Distributed Processing, pp. 273\u2013283. IEEE, Cambridge (2013). https:\/\/doi.org\/10.1109\/IPDPS.2013.43","DOI":"10.1109\/IPDPS.2013.43"},{"issue":"6","key":"4966_CR51","doi-asserted-by":"publisher","first-page":"643","DOI":"10.1137\/14097135X","volume":"37","author":"P Mironowicz","year":"2015","unstructured":"Mironowicz, P., Dziekonski, A., Mrozowski, M.: A task-scheduling approach for efficient sparse symmetric matrix\u2013vector multiplication on a GPU. SIAM J. Sci. Comput. 37(6), 643\u2013666 (2015). https:\/\/doi.org\/10.1137\/14097135X","journal-title":"SIAM J. Sci. Comput."},{"issue":"3","key":"4966_CR52","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3399732","volume":"7","author":"C Alappat","year":"2020","unstructured":"Alappat, C., Basermann, A., Bishop, A.R., Fehske, H., Hager, G., Schenk, O., Thies, J., Wellein, G.: A recursive algebraic coloring technique for hardware-efficient symmetric sparse matrix\u2013vector multiplication. ACM Trans. Parallel Comput. 7(3), 1\u201337 (2020). https:\/\/doi.org\/10.1145\/3399732","journal-title":"ACM Trans. Parallel Comput."},{"key":"4966_CR53","unstructured":"NVIDIA: cuSPARSE Library (2021)"},{"key":"4966_CR54","doi-asserted-by":"publisher","unstructured":"Aliaga, J.I., Anzt, H., Gr\u00fctzmacher, T., Quintana-Ort\u00ed, E.S., Tom\u00e1s, A.E.: Compression and load balancing for efficient sparse matrix\u2013vector product on multicore processors and graphics processing units. Concurr. Comput. Pract. Expe. 34(14) (2022). https:\/\/doi.org\/10.1002\/cpe.6515","DOI":"10.1002\/cpe.6515"},{"issue":"1","key":"4966_CR55","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1137\/S1064827595287997","volume":"20","author":"G Karypis","year":"1998","unstructured":"Karypis, G., Kumar, V.: A fast and high quality multilevel scheme for partitioning irregular graphs. SIAM J. Sci. Comput. 20(1), 359\u2013392 (1998)","journal-title":"SIAM J. Sci. Comput."},{"key":"4966_CR56","doi-asserted-by":"crossref","unstructured":"Ashari, A., Sedaghati, N., Eisenlohr, J., Parthasarath, S., Sadayappan, P.: Fast sparse matrix\u2013vector multiplication on GPUs for graph applications. In: SC\u201914: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 781\u2013792. IEEE (2014)","DOI":"10.1109\/SC.2014.69"},{"key":"4966_CR57","doi-asserted-by":"crossref","unstructured":"Trotter, J.D., Ekmek\u00e7iba\u015f\u0131, S., Langguth, J., Torun, T., D\u00fczak\u0131n, E., Ilic, A., Unat, D.: Bringing order to sparsity: a sparse matrix reordering study on multicore CPUs. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201313 (2023)","DOI":"10.1145\/3581784.3607046"},{"key":"4966_CR58","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718003","volume-title":"Iterative Methods for Sparse Linear Systems","author":"Y Saad","year":"2003","unstructured":"Saad, Y.: Iterative Methods for Sparse Linear Systems. SIAM, Philadelphia (2003)"},{"issue":"21","key":"4966_CR59","doi-asserted-by":"publisher","first-page":"4014","DOI":"10.48550\/ARXIV.2210.03881","volume":"10","author":"C Cui","year":"2022","unstructured":"Cui, C., Jiang, K., Liu, Y., Shu, S.: Fourier Neural Solver for large sparse linear algebraic systems. Mathematics 10(21), 4014 (2022). https:\/\/doi.org\/10.48550\/ARXIV.2210.03881","journal-title":"Mathematics"},{"key":"4966_CR60","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719581","volume-title":"Templates for the Solution of Algebraic Eigenvalue Problems: A Practical Guide","author":"Z Bai","year":"2000","unstructured":"Bai, Z., Demmel, J., Dongarra, J., Ruhe, A., van der Vorst, H.: Templates for the Solution of Algebraic Eigenvalue Problems: A Practical Guide. SIAM, Philadelphia (2000)"},{"key":"4966_CR61","doi-asserted-by":"publisher","unstructured":"Mehmood, R., Crowcroft, J., Elmirghani, J.M.H.: A parallel implicit method for the steady-state solution of CTMCs. In: 14th IEEE International Symposium on Modeling, Analysis, and Simulation, pp. 293\u2013302. IEEE, Monterey (2006). https:\/\/doi.org\/10.1109\/MASCOTS.2006.8","DOI":"10.1109\/MASCOTS.2006.8"},{"key":"4966_CR62","doi-asserted-by":"publisher","DOI":"10.1515\/9781400830329","volume-title":"Google\u2019s PageRank and Beyond: The Science of Search Engine Rankings","author":"AN Langville","year":"2006","unstructured":"Langville, A.N., Meyer, C.D.: Google\u2019s PageRank and Beyond: The Science of Search Engine Rankings. Princeton University Press, Princeton (2006)"}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-024-04966-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10586-024-04966-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-024-04966-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T12:25:31Z","timestamp":1757161531000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10586-024-04966-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,28]]},"references-count":62,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["4966"],"URL":"https:\/\/doi.org\/10.1007\/s10586-024-04966-7","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"type":"print","value":"1386-7857"},{"type":"electronic","value":"1573-7543"}],"subject":[],"published":{"date-parts":[[2025,4,28]]},"assertion":[{"value":"16 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 April 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"291"}}