{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:39:46Z","timestamp":1740123586629,"version":"3.37.3"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2015,11,28]],"date-time":"2015-11-28T00:00:00Z","timestamp":1448668800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100005071","name":"Shiraz University (IR)","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100005071","id-type":"DOI","asserted-by":"publisher"}]},{"name":"School of Computer, Institute for Research in Fundamental Sciences (IR)"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2016,9]]},"DOI":"10.1007\/s11227-015-1571-0","type":"journal-article","created":{"date-parts":[[2015,11,28]],"date-time":"2015-11-28T13:26:25Z","timestamp":1448717185000},"page":"3366-3386","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Adaptive sparse matrix representation for efficient matrix\u2013vector multiplication"],"prefix":"10.1007","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1004-6398","authenticated-orcid":false,"given":"Pantea","family":"Zardoshti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Farshad","family":"Khunjush","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hamid","family":"Sarbazi-Azad","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,11,28]]},"reference":[{"key":"1571_CR1","unstructured":"Asanovic K et al (2006) The landscape of parallel computing research: a view from Berkeley, vol 2. Technical Report UCB\/EECS-2006-183, EECS Department, University of California, Berkeley"},{"key":"1571_CR2","unstructured":"Page L, Brin S, Motwani R, Winograd T (1998) The PageRank citation ranking: bringing order to the web. Technical report, Stanford Digital Library Technologies Project"},{"key":"1571_CR3","doi-asserted-by":"crossref","unstructured":"Saad Y (2003) Iterative methods for sparse linear systems, 2nd edn. SIAM, Philadelphia","DOI":"10.1137\/1.9780898718003"},{"key":"1571_CR4","doi-asserted-by":"crossref","unstructured":"Bell N, Garland M (2009) Implementing sparse matrix\u2013vector multiplication on throughput-oriented processors. In: Proceedings of the conference on high performance computing networking, storage and analysis. ACM","DOI":"10.1145\/1654059.1654078"},{"key":"1571_CR5","unstructured":"Baskaran MM, Bordawekar R (2008) Optimizing sparse matrix\u2013vector multiplication on GPUs using compile-time and run-time strategies. IBM Research Report, RC24704 (W0812-047)"},{"key":"1571_CR6","unstructured":"Baskaran MM, Bordawekar R (2009) Sparse matrix\u2013vector multiplication toolkit for graphics processing units. http:\/\/www.alphaworks.ibm.com\/tech\/spmv4gpu"},{"key":"1571_CR7","doi-asserted-by":"crossref","unstructured":"Monakov A, Lokhmotov A, Avetisyan A (2010) Automatically tuning sparse matrix\u2013vector multiplication for GPU architectures. In: High performance embedded architectures and compilers. Springer, Berlin","DOI":"10.1007\/978-3-642-11515-8_10"},{"key":"1571_CR8","unstructured":"Monakov A (May 2012) Specialized sparse matrix formats and SpMV kernel tuning for GPUs. In: Proceedings of the GPU technology conference (GTC)"},{"key":"1571_CR9","doi-asserted-by":"crossref","unstructured":"Choi JW, Singh A, Vuduc RW (2010) Model-driven autotuning of sparse matrix\u2013vector multiply on GPUs. In: ACM sigplan notices, vol 45, no 5. ACM","DOI":"10.1145\/1837853.1693471"},{"key":"1571_CR10","doi-asserted-by":"crossref","unstructured":"Grewe D, Lokhmotov A (2011) Automatically generating and tuning GPU code for sparse matrix\u2013vector multiplication from a high-level representation. In: Proceedings of the fourth workshop on general purpose processing on graphics processing units. ACM","DOI":"10.1145\/1964179.1964196"},{"key":"1571_CR11","doi-asserted-by":"crossref","unstructured":"Reguly I, Giles M (2012) Efficient sparse matrix\u2013vector multiplication on cache-based GPUs. In: Innovative parallel computing (InPar), 2012. IEEE","DOI":"10.1109\/InPar.2012.6339602"},{"issue":"8","key":"1571_CR12","doi-asserted-by":"crossref","first-page":"815","DOI":"10.1002\/cpe.1658","volume":"23","author":"F V\u00e1zquez","year":"2011","unstructured":"V\u00e1zquez F, Fern\u00e1ndez JJ, Garz\u00f3n EM (2011) A new approach for sparse matrix vector product on NVIDIA GPUs. Concurr. Comput. Pract. Exp. 23(8):815\u2013826","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"1571_CR13","doi-asserted-by":"crossref","unstructured":"Yan S et al (2014) yaspmv: Yet another SpMV framework on GPUs. In: ACM SIGPLAN notices, vol 49, no 8. ACM","DOI":"10.1145\/2692916.2555255"},{"key":"1571_CR14","doi-asserted-by":"crossref","unstructured":"Ashari A et al (2014) An efficient two-dimensional blocking strategy for sparse matrix\u2013vector multiplication on GPUs. In: Proceedings of the 28th ACM international conference on supercomputing. ACM","DOI":"10.1145\/2597652.2597678"},{"issue":"7","key":"1571_CR15","doi-asserted-by":"crossref","first-page":"2639","DOI":"10.1016\/j.jpdc.2014.03.002","volume":"74","author":"C Zheng","year":"2014","unstructured":"Zheng C et al (2014) BiELL: a bisection ELLPACK-based storage format for optimizing SpMV on GPUs. J Parallel Distrib Comput 74(7):2639\u20132647","journal-title":"J Parallel Distrib Comput"},{"issue":"3","key":"1571_CR16","doi-asserted-by":"crossref","first-page":"431","DOI":"10.1007\/s11704-014-4127-1","volume":"9","author":"CC Yan","year":"2015","unstructured":"Yan CC et al (2015) Memory bandwidth optimization of SpMV on GPGPUs. Front Comput Sci 9(3):431\u2013441","journal-title":"Front Comput Sci"},{"issue":"13","key":"1571_CR17","doi-asserted-by":"publisher","first-page":"3281","DOI":"10.1002\/cpe.3217","volume":"27","author":"P Guo","year":"2014","unstructured":"Guo P, Wang L (2014) Accurate cross-architecture performance modeling for sparse matrix-vector multiplication (SpMV) on GPUs. Concurr Comput Pract Exp 27(13):3281\u20133294. doi: 10.1002\/cpe.3217","journal-title":"Concurr Comput Pract Exp"},{"issue":"1","key":"1571_CR18","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1109\/TPDS.2014.2308221","volume":"26","author":"K Li","year":"2015","unstructured":"Li K, Yang W, Li K (2015) Performance analysis and optimization for SpMV on GPU using probabilistic modeling. IEEE Trans Parallel Distrib Syst 26(1):196\u2013205","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"1571_CR19","doi-asserted-by":"crossref","unstructured":"Neelima B, Ram Mohana Reddy G, Raghavendra Prakash S (2014) Predicting an Optimal Sparse Matrix Format for SpMV Computation on GPU. In: Parallel and distributed processing symposium workshops (IPDPSW), 2014 IEEE international. IEEE","DOI":"10.1109\/IPDPSW.2014.160"},{"key":"1571_CR20","doi-asserted-by":"crossref","unstructured":"Sedaghati N, Mu T, Pouchet L-N, Parthasarathy S, Sadayappan P (2015) Automatic selection of sparse matrix representation on GPUs. In: Proceedings of the 29th ACM international conference on supercomputing. ACM","DOI":"10.1145\/2751205.2751244"},{"key":"1571_CR21","unstructured":"Vuduc RW (2003) Automatic performance tuning of sparse matrix kernels, Diss. University of California, Berkeley"},{"issue":"3","key":"1571_CR22","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1016\/j.parco.2008.12.006","volume":"35","author":"S Williams","year":"2009","unstructured":"Williams S et al (2009) Optimization of sparse matrix\u2013vector multiplication on emerging multicore platforms. Parallel Comput 35(3):178\u2013194","journal-title":"Parallel Comput"},{"key":"1571_CR23","unstructured":"Williams S (2008) Webb. Auto-tuning performance on multicore computers, ProQuest"},{"key":"1571_CR24","doi-asserted-by":"crossref","unstructured":"Vuduc RW, Demmel JW, Yelick KA (2005) OSKI: a library of automatically tuned sparse matrix kernels. In: Journal of Physics: conference series, vol 16, no 1. IOP Publishing","DOI":"10.1088\/1742-6596\/16\/1\/071"},{"key":"1571_CR25","doi-asserted-by":"crossref","unstructured":"Bilmes J et al (1997) Optimizing matrix multiply using PHiPAC: a portable, high-performance, ANSI C coding methodology. In: Proceedings of the 11th international conference on supercomputing. ACM","DOI":"10.1145\/263580.263662"},{"key":"1571_CR26","doi-asserted-by":"crossref","unstructured":"Whaley RC, Dongarra JJ (1998) Automatically tuned linear algebra software. In: Proceedings of the 1998 ACM\/IEEE conference on supercomputing. IEEE Computer Society","DOI":"10.1109\/SC.1998.10004"},{"issue":"1","key":"1571_CR27","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1177\/1094342004041296","volume":"18","author":"EJ Im","year":"2004","unstructured":"Im EJ, Yelick KA, Vuduc RW (2004) Sparsity: optimization framework for sparse matrix kernels. Int J High Perform Comput Appl 18(1):135\u2013158","journal-title":"Int J High Perform Comput Appl"},{"key":"1571_CR28","doi-asserted-by":"crossref","unstructured":"Li J et al (2013) SMAT: an input adaptive auto-tuner for sparse matrix\u2013vector multiplication. In: ACM SIGPLAN notices, vol 48, no 6. ACM","DOI":"10.1145\/2499370.2462181"},{"key":"1571_CR29","doi-asserted-by":"crossref","unstructured":"Vuduc RW, Moon HJ (2005) Fast sparse matrix\u2013vector multiplication by exploiting variable block structure. In: High performance computing and communications. Springer, Berlin","DOI":"10.1007\/11557654_91"},{"issue":"3","key":"1571_CR30","doi-asserted-by":"crossref","first-page":"519","DOI":"10.1137\/0914033","volume":"14","author":"AT Ogielski","year":"1993","unstructured":"Ogielski AT, Aiello W (1993) Sparse matrix computations on parallel processor arrays. SIAM J Sci Comput 14(3):519\u2013530","journal-title":"SIAM J Sci Comput"},{"key":"1571_CR31","doi-asserted-by":"crossref","unstructured":"Lee BC et al (2004) Performance models for evaluation and automatic tuning of symmetric sparse matrix\u2013vector multiply. In: International conference on parallel processing, 2004. ICPP 2004. IEEE","DOI":"10.1109\/ICPP.2004.1327917"},{"key":"1571_CR32","volume-title":"Optimizing the performance of sparse matrix\u2013vector multiplication","author":"EJ Im","year":"2000","unstructured":"Im EJ, Yelick KA (2000) Optimizing the performance of sparse matrix\u2013vector multiplication. University of California, Berkeley"},{"key":"1571_CR33","doi-asserted-by":"crossref","unstructured":"Kourtis K et al (2011) CSX: an extended compression format for spmv on shared memory systems. In: ACM SIGPLAN notices, vol 46, no 8. ACM","DOI":"10.1145\/2038037.1941587"},{"key":"1571_CR34","doi-asserted-by":"crossref","unstructured":"Liu W, Vinter B (2015) Csr5: an efficient storage format for cross-platform sparse matrix\u2013vector multiplication. In: Proceedings of the 29th ACM international conference on supercomputing. ACM","DOI":"10.1145\/2751205.2751209"},{"key":"1571_CR35","unstructured":"NVIDA (2014) Whitepaper NVIDIAs next generation CUDA compute architecture: Kepler GK110\/210"},{"key":"1571_CR36","unstructured":"NVIDIA Corporation (2014) Tuning CUDA applications for Kepler. Technical report, August 2014. http:\/\/docs.nvidia.com\/cuda\/pdf\/Kepler_Tuning_Guide.pdf"},{"key":"1571_CR37","unstructured":"NVIDIA CUDA (2010) NVIDIA CUDA C programming guide, Version 3.1. http:\/\/developer.download.nvidia.com\/compute\/cuda\/3_1\/toolkit\/docs\/NVIDIA_CUDA_C_ProgrammingGuide_3.1.pdf . Accessed 4 May 2011"},{"issue":"1","key":"1571_CR38","first-page":"1","volume":"38","author":"TA Davis","year":"2011","unstructured":"Davis TA, Hu Y (2011) The University of Florida sparse matrix collection. ACM Trans Math Softw (TOMS) 38(1):1","journal-title":"ACM Trans Math Softw (TOMS)"},{"key":"1571_CR39","unstructured":"NVIDIA (2013) Compute visual profiler user guide. http:\/\/developer.download.nvidia.com\/compute\/DevZone\/docs\/html\/C\/doc\/Compute-Visual-Profiler-User-Guide.Pdf"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1571-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-015-1571-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1571-0","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T21:32:04Z","timestamp":1567373524000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-015-1571-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,11,28]]},"references-count":39,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2016,9]]}},"alternative-id":["1571"],"URL":"https:\/\/doi.org\/10.1007\/s11227-015-1571-0","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2015,11,28]]}}}