{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T08:00:32Z","timestamp":1776931232029,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T00:00:00Z","timestamp":1763164800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2009007"],"award-info":[{"award-number":["2009007"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3731599.3767441","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T16:13:44Z","timestamp":1762532024000},"page":"823-827","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["How effective is matrix reordering for improving performance of sparse matrix-vector multiplication?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5737-1780","authenticated-orcid":false,"given":"Omid","family":"Asudeh","sequence":"first","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4285-1439","authenticated-orcid":false,"given":"Sina","family":"Mahdipour Saravani","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6589-9956","authenticated-orcid":false,"given":"Fabrice","family":"Rastello","sequence":"additional","affiliation":[{"name":"Inria, Grenoble, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8672-4071","authenticated-orcid":false,"given":"Gerald","family":"Sabin","sequence":"additional","affiliation":[{"name":"RNET Technologies, Dayton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4737-2034","authenticated-orcid":false,"given":"Ponnuswamy","family":"Sadayappan","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake city, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Kadir Akbudak Enver Kayaaslan and Cevdet Aykanat. 2012. Technical Report on Hypergraph-Partitioning-Based Models and Methods for Exploiting Cache Locality in Sparse-Matrix Vector Multiplication. ArXiv abs\/1202.3856 (2012). https:\/\/api.semanticscholar.org\/CorpusID:207981924"},{"key":"e_1_3_3_1_3_2","unstructured":"Omid Asudeh Sina\u00a0Mahdipour Saravani Gerald Sabin Fabrice Rastello and P Sadayappan. 2025. Is Sparse Matrix Reordering Effective for Sparse Matrix-Vector Multiplication? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2506.10356 (2025)."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Guillaume\u00a0Pallez Aupy Jeonghyung Park and Padma Raghavan. 2016. Locality-Aware Laplacian Mesh Smoothing. 2016 45th International Conference on Parallel Processing (ICPP) (2016) 588\u2013597. https:\/\/api.semanticscholar.org\/CorpusID:9487643","DOI":"10.1109\/ICPP.2016.74"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Ariful Azad Mathias Jacquelin Ayd\u0131n Bulu\u00e7 and Esmond\u00a0G. Ng. 2016. The Reverse Cuthill-McKee Algorithm in Distributed-Memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS) (2016) 22\u201331. https:\/\/api.semanticscholar.org\/CorpusID:2736222","DOI":"10.1109\/IPDPS.2017.85"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Vincent\u00a0D. Blondel Jean-Loup Guillaume Renaud Lambiotte and Etienne Lefebvre. 2008. Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment 2008 (2008) P10008. https:\/\/api.semanticscholar.org\/CorpusID:334423","DOI":"10.1088\/1742-5468\/2008\/10\/P10008"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"e_1_3_3_1_8_2","volume-title":"The SuiteSparse Matrix Collection","author":"Davis Timothy","unstructured":"Timothy Davis. [n. d.]. The SuiteSparse Matrix Collection. http:\/\/sparse.tamu.edu\/"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Timothy\u00a0A Davis and Yifan Hu. 2011. The University of Florida sparse matrix collection. ACM Transactions on Mathematical Software (TOMS) 38 1 (2011) 1\u201325.","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Elizabeth\u00a0D Dolan and Jorge\u00a0J Mor\u00e9. 2002. Benchmarking optimization software with performance profiles. Mathematical programming 91 (2002) 201\u2013213.","DOI":"10.1007\/s101070100263"},{"key":"e_1_3_3_1_11_2","unstructured":"Matthew Drescher Muhammad\u00a0A. Awad Serban\u00a0D. Porumbescu and John\u00a0Douglas Owens. 2023. BOBA: A Parallel Lightweight Graph Reordering Algorithm with Heavyweight Implications. ArXiv abs\/2306.10410 (2023). https:\/\/api.semanticscholar.org\/CorpusID:259202987"},{"key":"e_1_3_3_1_12_2","unstructured":"Jianhua Gao Bingjie Liu Weixing Ji and Hua Huang. 2024. A Systematic Literature Survey of Sparse Matrix-Vector Multiplication. ArXiv abs\/2404.06047 (2024). https:\/\/api.semanticscholar.org\/CorpusID:269009492"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Norman\u00a0E. Gibbs William\u00a0G. Poole and Paul\u00a0K. Stockmeyer. 1976. An algorithm for reducing the bandwidth and profile of a sparse matrix. SIAM J. Numer. Anal. 13 (1976) 236\u2013250. https:\/\/api.semanticscholar.org\/CorpusID:122611444","DOI":"10.1137\/0713023"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.56021\/9781421407944"},{"key":"e_1_3_3_1_15_2","unstructured":"JiaJun Hou HongJie Liu and ShengXin Zhu. 2024. RCM++:Reverse Cuthill-McKee ordering with Bi-Criteria Node Finder. ArXiv abs\/2409.04171 (2024). https:\/\/api.semanticscholar.org\/CorpusID:272463987"},{"key":"e_1_3_3_1_16_2","volume-title":"METIS: A Software Package for Partitioning Unstructured Graphs, Partitioning Meshes, and Computing Fill-Reducing Orderings of Sparse Matrices","author":"Karypis George","year":"1997","unstructured":"George Karypis and Vipin Kumar. 1997. METIS: A Software Package for Partitioning Unstructured Graphs, Partitioning Meshes, and Computing Fill-Reducing Orderings of Sparse Matrices. Technical Report 97-061. University of Minnesota, Department of Computer Science and Engineering. https:\/\/hdl.handle.net\/11299\/215346"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"George Karypis and Vipin Kumar. 1998. A fast and high quality multilevel scheme for partitioning irregular graphs. SIAM Journal on scientific Computing 20 1 (1998) 359\u2013392.","DOI":"10.1137\/S1064827595287997"},{"key":"e_1_3_3_1_18_2","unstructured":"Ang Li Radu Serban and Dan Negrut. 2015. Analysis of a Splitting Approach for the Parallel Solution of Linear Systems on GPU Cards. SIAM J. Sci. Comput. 39 (2015). https:\/\/api.semanticscholar.org\/CorpusID:14242678"},{"key":"e_1_3_3_1_19_2","unstructured":"Yu-Fang Liang and Hou-Biao Li. 2024. Orthogonal Block Kaczmarz Algorithm Based on Preprocessing Technology. https:\/\/api.semanticscholar.org\/CorpusID:266693670"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Wai-Hung Liu and Andrew\u00a0H. Sherman. 1976. Comparative Analysis of the Cuthill\u2013McKee and the Reverse Cuthill\u2013McKee Ordering Algorithms for Sparse Matrices. SIAM J. Numer. Anal. 13 (1976) 198\u2013213. https:\/\/api.semanticscholar.org\/CorpusID:120924396","DOI":"10.1137\/0713020"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Todd\u00a0S. Munson and Paul\u00a0D. Hovland. 2005. The FeasNewt benchmark. IEEE International. 2005 Proceedings of the IEEE Workload Characterization Symposium 2005. (2005) 150\u2013154. https:\/\/api.semanticscholar.org\/CorpusID:17405473","DOI":"10.1109\/IISWC.2005.1526011"},{"key":"e_1_3_3_1_22_2","volume-title":"The PageRank citation ranking: Bringing order to the web.","author":"Page Lawrence","year":"1999","unstructured":"Lawrence Page, Sergey Brin, Rajeev Motwani, and Terry Winograd. 1999. The PageRank citation ranking: Bringing order to the web. Technical Report. Stanford infolab."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Thomas\u00a0B. Rolinger and Christopher\u00a0D. Krieger. 2018. Impact of Traditional Sparse Optimizations on a Migratory Thread Architecture. 2018 IEEE\/ACM 8th Workshop on Irregular Applications: Architectures and Algorithms (IA3) (2018) 45\u201352. https:\/\/api.semanticscholar.org\/CorpusID:55702233","DOI":"10.1109\/IA3.2018.00013"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.5555\/829576"},{"key":"e_1_3_3_1_25_2","unstructured":"George\u00a0M. Slota Sivasankaran Rajamanickam and Kamesh Madduri. 2017. Distributed Graph Layout for Scalable Small-world Network Analysis. ArXiv abs\/1701.00503 (2017). https:\/\/api.semanticscholar.org\/CorpusID:18972195"},{"key":"e_1_3_3_1_26_2","unstructured":"Jiawen Sun Hans Vandierendonck and Dimitrios\u00a0S. Nikolopoulos. 2018. VEBO: a vertex- and edge-balanced ordering heuristic to load balance parallel graph processing. Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming (2018). https:\/\/api.semanticscholar.org\/CorpusID:49299764"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607046"},{"key":"e_1_3_3_1_28_2","unstructured":"Mich\u00e8le Weiland Lawrence Mitchell Gerard Gorman Stephan\u00a0C. Kramer Mark\u00a0I. Parsons and James\u00a0A. Southern. 2012. Mixed-mode implementation of PETSc for scalable linear algebra on multi-core processors. ArXiv abs\/1205.2005 (2012). https:\/\/api.semanticscholar.org\/CorpusID:6568515"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"\u00dcmit\u00a0V. \u00c7ataly\u00fcrek and Cevdet Aykanat. 1999. Hypergraph-Partitioning-Based Decomposition for Parallel Sparse-Matrix Vector Multiplication. IEEE Trans. Parallel Distributed Syst. 10 (1999) 673\u2013693. https:\/\/api.semanticscholar.org\/CorpusID:2954155","DOI":"10.1109\/71.780863"}],"event":{"name":"SC Workshops '25: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St Louis MO USA","acronym":"SC Workshops '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731599.3767441","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731599.3767441","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T19:30:13Z","timestamp":1767987013000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731599.3767441"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":28,"alternative-id":["10.1145\/3731599.3767441","10.1145\/3731599"],"URL":"https:\/\/doi.org\/10.1145\/3731599.3767441","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}