{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T13:53:18Z","timestamp":1770990798178,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T00:00:00Z","timestamp":1634515200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T00:00:00Z","timestamp":1634515200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61672181"],"award-info":[{"award-number":["61672181"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2022,4]]},"DOI":"10.1007\/s11227-021-04123-6","type":"journal-article","created":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T00:07:34Z","timestamp":1634602054000},"page":"6318-6339","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["An effective SPMV based on block strategy and hybrid compression on GPU"],"prefix":"10.1007","volume":"78","author":[{"given":"Huanyu","family":"Cui","sequence":"first","affiliation":[]},{"given":"Nianbin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yuhua","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Qilong","family":"Han","sequence":"additional","affiliation":[]},{"given":"Yuezhu","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,18]]},"reference":[{"key":"4123_CR1","unstructured":"Ernesto D, Pablo E (2018) Solving sparse triangular linear systems in modern GPUs: a synchronization-free algorithm. In: 26th Euromicro International Conference on Parallel, Distributed and Network-based Processing (PDP), 1:196\u2013203"},{"key":"4123_CR2","first-page":"8883","volume":"76","author":"A Ahmadi","year":"2021","unstructured":"Ahmadi A, Manganiello F, Khademi A et al (2021) A parallel Jacobi-embedded Gauss-Seidel mMethod. IEEE Trans Parallel Distrib Syst 76:8883\u20138900","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"4123_CR3","doi-asserted-by":"publisher","first-page":"8883","DOI":"10.1007\/s11227-020-03186-1","volume":"76","author":"M Barreda","year":"2020","unstructured":"Barreda M, Dolz MF et al (2020) Performance modeling of the sparse matrix-vector product via convolutional neural networks. J Supercomput 76:8883\u20138900","journal-title":"J Supercomput"},{"key":"4123_CR4","doi-asserted-by":"crossref","unstructured":"Benatia A, Ji, WX, Wang, YZ (2016) Sparse matrix format selection with multiclass SVM for SPMV on GPU. In: 45th International Conference on Parallel Processing (ICPP), Comm. ACM 38(4):393\u2013422","DOI":"10.1109\/ICPP.2016.64"},{"key":"4123_CR5","unstructured":"Nathan B, Michael G (2009) Implementing sparse matrix-vector multiplication on throughput-oriented processors. In: Proceedings of the ACM\/IEEE Conference on High Performance Computing, SC 2009, pp. 1\u201311"},{"key":"4123_CR6","unstructured":"Francisco V, Ortega G, Jos\u00e9-Jes\u00fas F (2010) Improving the performance of the sparse matrix vector product with GPUs. In: 10th IEEE International Conference on Computer and Information Technology, CIT Bradford, West Yorkshire, UK, pp. 1146\u20131151"},{"key":"4123_CR7","unstructured":"Dominik G, Anton L (2011) Automatically generating and tuning GPU code for sparse matrix-vector multiplication from a high-level representation. In: Proceedings of 4th Workshop on General Purpose Processing on Graphics Processing Units, GPGPU, pp. 1\u20138"},{"issue":"2","key":"4123_CR8","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/j.micpro.2011.05.005","volume":"36","author":"C Juan","year":"2012","unstructured":"Juan C, Francisco F, Marcos F et al (2012) Optimization of sparse matrix-vector multiplication using reordering techniques on GPUs. Microprocess Microsyst 36(2):65\u201377","journal-title":"Microprocess Microsyst"},{"issue":"1","key":"4123_CR9","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1109\/TPDS.2013.31","volume":"25","author":"AJN Yzelman","year":"2013","unstructured":"Yzelman AJN, Roose D (2013) High-level strategies for parallel shared-memory sparse matrix-vector multiplication. IEEE Tras Parallel Distrib Syst 25(1):116\u2013125","journal-title":"IEEE Tras Parallel Distrib Syst"},{"key":"4123_CR10","doi-asserted-by":"crossref","unstructured":"Ashari A, Sedaghati N, Eisenlohr J et al (2014) An efficient two-dimensional blocking strategy for sparse matrix-vector multiplication on GPUs. In: Proceedings of the 28th ACM international conference on Supercomputing. ACM, pp. 273\u2013282","DOI":"10.1145\/2597652.2597678"},{"issue":"9","key":"4123_CR11","doi-asserted-by":"publisher","first-page":"2623","DOI":"10.1109\/TC.2014.2366731","volume":"64","author":"W Yang","year":"2015","unstructured":"Yang W, Li K, Mo Z et al (2015) Performance optimization using partitioned SPMV on GPUs and multicore CPUs. IEEE Trans Comput 64(9):2623\u20132636","journal-title":"IEEE Trans Comput"},{"issue":"08","key":"4123_CR12","first-page":"60","volume":"491","author":"K Cheng","year":"2018","unstructured":"Cheng K, Tian J, Ma RL (2018) Study on efficient storage format of sparse matrix based on GPU. Comput Eng 491(08):60\u201366","journal-title":"Comput Eng"},{"issue":"3","key":"4123_CR13","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1080\/17445760802337010","volume":"24","author":"L Buatois","year":"2009","unstructured":"Buatois L, Caumon G, Levy B (2009) Concurrent number cruncher: a GPU implementation of a general sparse linear solver. Int J Parallel Emerg Distrib Syst 24(3):205\u2013223","journal-title":"Int J Parallel Emerg Distrib Syst"},{"issue":"4","key":"4123_CR14","first-page":"447","volume":"56","author":"T Oberhuber","year":"2011","unstructured":"Oberhuber T, Suzuki A, Vacata J (2011) New row-grouped CSR format for storing the sparse matrices on GPU with implementation in CUDA. Acta Tech 56(4):447\u2013466","journal-title":"Acta Tech"},{"key":"4123_CR15","doi-asserted-by":"crossref","unstructured":"Belgin M, Back G, Ribbens CJ (2009) Pattern-based sparse matrix representation for memory-efficient SMVM kernels. In: Proceedings of the 23rd International Conference on Supercomputing, ACM, pp 100\u2013109","DOI":"10.1145\/1542275.1542294"},{"issue":"3","key":"4123_CR16","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1016\/j.parco.2008.12.006","volume":"35","author":"S Williams","year":"2009","unstructured":"Williams S, Oliker L, Vuduc R (2009) Optimization of sparse matrix-vector multiplication on emerging multicore platforms. Parallel Comput 35(3):178\u2013194","journal-title":"Parallel Comput"},{"key":"4123_CR17","doi-asserted-by":"crossref","unstructured":"Monakov A, Lokhmotov A, Avetisyan A (2010) Automatically tuning sparse matrix-vector multiplication for GPU architectures. In: International Conference on High-Performance Embedded Architectures and Compilers. DBLP, pp 111\u2013125","DOI":"10.1007\/978-3-642-11515-8_10"},{"key":"4123_CR18","doi-asserted-by":"crossref","unstructured":"Choi JW, Singh A (2010) Model-driven autotuning of sparse matrix vector multiply on GPUs. In: PPoPP \u201910: Proceedings of the 15th ACM SIGPLAN symposium on Principles and Practice of Parallel Programming, pp 115\u2013126","DOI":"10.1145\/1693453.1693471"},{"issue":"12","key":"4123_CR19","doi-asserted-by":"publisher","first-page":"806","DOI":"10.1016\/j.parco.2011.08.004","volume":"37","author":"AN Yzelman","year":"2011","unstructured":"Yzelman AN, Bisseling RH (2011) Two-dimensional cache-oblivious sparse matrix vector multiplication. Parallel Comput 37(12):806\u2013819","journal-title":"Parallel Comput"},{"issue":"2","key":"4123_CR20","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1109\/TPDS.2018.2864729","volume":"30","author":"NFT Abubaker","year":"2019","unstructured":"Abubaker NFT, Kadir A, Cevdet A (2019) Spatiotemporal graph and hypergraph partitioning models for sparse matrix-vector multiplication on many-core architectures. IEEE Trans Parallel Distrib Syst 30(2):445\u2013458","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"4123_CR21","doi-asserted-by":"crossref","unstructured":"Yang C, Aydin B, Owens J (2018) Design principles for sparse matrix multiplication on the GPU. In: Euro-Par, pp 1\u201316","DOI":"10.1007\/978-3-319-96983-1_48"},{"key":"4123_CR22","doi-asserted-by":"crossref","unstructured":"Yan SG, Li C, Zhang YQ (2014). yaSPMV: yet another SPMV framework on GPUs. In: Proceedings of the 19th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, ACM, pp 107\u2013118","DOI":"10.1145\/2692916.2555255"},{"key":"4123_CR23","doi-asserted-by":"crossref","unstructured":"Liu W, Vinter B (2015) CSR5: An efficient storage format for cross-platform sparse matrix-vector multiplication. In: The 29th ACM International Conference on Supercomputing (ICS \u201915). ACM, ACM, pp. 1\u201312","DOI":"10.1145\/2751205.2751209"},{"issue":"4","key":"4123_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3218823","volume":"44","author":"GM Tan","year":"2018","unstructured":"Tan GM, Liu JH, Li JJ (2018) Design and implementation of adaptive SPMV library for multicore and many-core architecture. ACM Trans Math Softw 44(4):1\u201325","journal-title":"ACM Trans Math Softw"},{"key":"4123_CR25","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1016\/j.jcss.2017.09.010","volume":"92","author":"W Yang","year":"2018","unstructured":"Yang W, Li K, Li K (2018) A parallel computing method using blocked format with optimal partitioning for SPMV on GPU. J Comput Syst Sci 92:152\u2013170","journal-title":"J Comput Syst Sci"},{"issue":"3","key":"4123_CR26","first-page":"1","volume":"15","author":"A Benatia","year":"2019","unstructured":"Benatia A, Ji W, Wang Y et al (2019) BestSF: A sparse meta-format for optimizing SPMV on GPU. ACM Trans Arch Code Optim 15(3):1\u201327","journal-title":"ACM Trans Arch Code Optim"},{"issue":"2","key":"4123_CR27","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1177\/1094342013501126","volume":"28","author":"WD Yang","year":"2014","unstructured":"Yang WD, Li KL, Liu YZ et al (2014) Optimization of Quasi - diagonal matrix-vector multiplication on GPU[J]. Int J High Perform Comput Appl 28(2):183\u2013195","journal-title":"Int J High Perform Comput Appl"},{"key":"4123_CR28","unstructured":"Fukaya T, Ishida K, Miura A et al (2021) Accelerating the SpMV kernel on standard CPUs by exploiting the partially diagonal structures[J]. Preprints"},{"issue":"4","key":"4123_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/cpe.5484","volume":"2021","author":"G He","year":"2021","unstructured":"He G, Chen Q, Gao J (2021) A new diagonal storage for efficient implementation of sparse matrix-vector multiplication on graphics processing unit. Concurr Comput Pract Exp 2021(4):1\u201315","journal-title":"Concurr Comput Pract Exp"},{"issue":"11","key":"4123_CR30","first-page":"1","volume":"157","author":"J Gao","year":"2021","unstructured":"Gao J, Xia Y, Yin R et al (2021) Adaptive diagonal sparse matrix-vector multiplication on GPU[J]. J Parallel Distrib Comput 157(11):1\u201353","journal-title":"J Parallel Distrib Comput"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04123-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-021-04123-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04123-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,3,18]],"date-time":"2022-03-18T16:22:05Z","timestamp":1647620525000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-021-04123-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,18]]},"references-count":30,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2022,4]]}},"alternative-id":["4123"],"URL":"https:\/\/doi.org\/10.1007\/s11227-021-04123-6","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,10,18]]},"assertion":[{"value":"29 September 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}