{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:34:06Z","timestamp":1772724846279,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T00:00:00Z","timestamp":1691366400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,7]]},"DOI":"10.1145\/3605573.3605611","type":"proceedings-article","created":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T16:21:16Z","timestamp":1694622076000},"page":"807-817","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["HASpGEMM: Heterogeneity-Aware Sparse General Matrix-Matrix Multiplication on Modern Asymmetric Multicore Processors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9379-4005","authenticated-orcid":false,"given":"Helin","family":"Cheng","sequence":"first","affiliation":[{"name":"China University of Petroleum-Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7270-1904","authenticated-orcid":false,"given":"Wenxuan","family":"Li","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6387-8116","authenticated-orcid":false,"given":"Yuechen","family":"Lu","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2150-5759","authenticated-orcid":false,"given":"Weifeng","family":"Liu","sequence":"additional","affiliation":[{"name":"China University of Petroleum-Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,9,13]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"8","volume":"28","author":"Akbudak Kadir","year":"2017","unstructured":"Kadir Akbudak and Cevdet Aykanat. Exploiting Locality in Sparse Matrix-Matrix Multiplication on Many-Core Architectures. TPDS 28, 8 (2017).","journal-title":"Many-Core Architectures. TPDS"},{"key":"e_1_3_2_2_2_1","first-page":"3","volume":"4","author":"Akbudak Kadir","year":"2018","unstructured":"Kadir Akbudak, Oguz Selvitopi, and Cevdet Aykanat. Partitioning Models for Scaling Parallel Sparse Matrix-Matrix Multiplication. TOPC 4, 3 (2018).","journal-title":"Scaling Parallel Sparse Matrix-Matrix Multiplication. TOPC"},{"key":"e_1_3_2_2_3_1","unstructured":"Pham Nguyen\u00a0Quang Anh Rui Fan and Yonggang Wen. Balanced Hashing and Efficient GPU Sparse General Matrix-Matrix Multiplication. In ICS \u201916."},{"key":"e_1_3_2_2_4_1","first-page":"6","volume":"38","author":"Azad Ariful","year":"2016","unstructured":"Ariful Azad, Grey Ballard, Ayd\u0131n Bulu\u00e7, James Demmel, Laura Grigori, Oded Schwartz, Sivan Toledo, and Samuel Williams. Exploiting Multiple Levels of Parallelism in Sparse Matrix-Matrix Multiplication. SISC 38, 6 (2016).","journal-title":"Sparse Matrix-Matrix Multiplication. SISC"},{"key":"e_1_3_2_2_5_1","first-page":"6","volume":"46","author":"Azad Ariful","year":"2018","unstructured":"Ariful Azad, Georgios\u00a0A. Pavlopoulos, Christos\u00a0A. Ouzounis, Nikos\u00a0C. Kyrpides, and Ayd\u0131n Bulu\u00e7. HipMCL: A High-Performance Parallel Implementation of the Markov Clustering Algorithm for Large-Scale Networks. NAR 46, 6 (2018).","journal-title":"Large-Scale Networks. NAR"},{"key":"e_1_3_2_2_6_1","first-page":"4","volume":"33","author":"Azad Ariful","year":"2022","unstructured":"Ariful Azad, Oguz Selvitopi, Md Taufique Hussain, John R. Gilbert, and Ayd\u0131n Bulu\u00e7. Combinatorial BLAS 2.0: Scaling Combinatorial Algorithms on Distributed-Memory Systems. TPDS 33, 4 (2022).","journal-title":"Distributed-Memory Systems. TPDS"},{"key":"e_1_3_2_2_7_1","unstructured":"Allison\u00a0H. Baker Todd Gamblin Martin Schulz and Ulrike Meier Yang. Challenges of Scaling Algebraic Multigrid Across Modern Multicore Architectures. In IPDPS \u201911."},{"key":"e_1_3_2_2_8_1","unstructured":"Sai Balakrishnan Ravi Rajwar Mike Upton and Konrad Lai. The Impact of Performance Asymmetry in Emerging Multicore Architectures. In ISCA \u201905."},{"key":"e_1_3_2_2_9_1","unstructured":"Grey Ballard Ayd\u0131n Bulu\u00e7 James Demmel Laura Grigori Benjamin Lipshitz Oded Schwartz and Sivan Toledo. Communication Optimal Parallel Multiplication of Sparse Random Matrices. In SPAA \u201913."},{"key":"e_1_3_2_2_10_1","first-page":"3","volume":"3","author":"Ballard Grey","year":"2016","unstructured":"Grey Ballard, Alex Druinsky, Nicholas Knight, and Oded Schwartz. Hypergraph Partitioning for Sparse Matrix-Matrix Multiplication. TOPC 3, 3 (2016).","journal-title":"Sparse Matrix-Matrix Multiplication. TOPC"},{"key":"e_1_3_2_2_11_1","first-page":"3","volume":"38","author":"Ballard Grey","year":"2016","unstructured":"Grey Ballard, Christopher Siefert, and Jonathan Hu. Reducing Communication Costs for Sparse Matrix Multiplication within Algebraic Multigrid. SISC 38, 3 (2016).","journal-title":"Algebraic Multigrid. SISC"},{"key":"e_1_3_2_2_12_1","unstructured":"Michela Becchi and Patrick Crowley. Dynamic Thread Assignment on Heterogeneous Multiprocessor Architectures. In CF \u201906."},{"key":"e_1_3_2_2_13_1","first-page":"4","volume":"34","author":"Bell Nathan","year":"2012","unstructured":"Nathan Bell, Steven Dalton, and Luke\u00a0N. Olson. Exposing Fine-Grained Parallelism in Algebraic Multigrid Methods. SISC 34, 4 (2012).","journal-title":"Algebraic Multigrid Methods. SISC"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Ayd\u0131n Bulu\u00e7 and John\u00a0R. Gilbert. 2008. Challenges and Advances in Parallel Sparse Matrix-Matrix Multiplication. In ICPP \u201908.","DOI":"10.1109\/ICPP.2008.45"},{"key":"e_1_3_2_2_15_1","unstructured":"Ayd\u0131n Bulu\u00e7 and John\u00a0R. Gilbert. On the Representation and Multiplication of Hypersparse Matrices. In IPDPS \u201908."},{"key":"e_1_3_2_2_16_1","first-page":"4","volume":"25","author":"Bulu\u00e7 Ayd\u0131n","year":"2011","unstructured":"Ayd\u0131n Bulu\u00e7 and John\u00a0R Gilbert. The Combinatorial BLAS: Design, Implementation, and Applications. IJHPCA 25, 4 (2011).","journal-title":"Applications. IJHPCA"},{"key":"e_1_3_2_2_17_1","first-page":"4","volume":"34","author":"Bulu\u00e7 Ayd\u0131n","year":"2012","unstructured":"Ayd\u0131n Bulu\u00e7 and John\u00a0R. Gilbert. Parallel Sparse Matrix-Matrix Multiplication and Indexing: Implementation and Experiments. SISC 34, 4 (2012).","journal-title":"Experiments. SISC"},{"key":"e_1_3_2_2_18_1","unstructured":"Ayd\u0131n Bulu\u00e7 Timothy Mattson Scott McMillan Jos\u00e9\u00a0E. Moreira and Carl Yang. Design of the GraphBLAS API for C. In GABB \u201917."},{"key":"e_1_3_2_2_19_1","first-page":"2","volume":"19","author":"Chen Jing","year":"2022","unstructured":"Jing Chen, Madhavan Manivannan, Mustafa Abduljabbar, and Miquel Peric\u00e0s. ERASE: Energy Efficient Task Mapping and Resource Management for Work Stealing Runtimes. TACO 19, 2 (2022).","journal-title":"Work Stealing Runtimes. TACO"},{"key":"e_1_3_2_2_20_1","first-page":"4","volume":"30","author":"Chen Yuedan","year":"2019","unstructured":"Yuedan Chen, Kenli Li, Wangdong Yang, Guoqing Xiao, Xianghui Xie, and Tao Li. Performance-Aware Model for Sparse Matrix-Matrix Multiplication on the Sunway TaihuLight Supercomputer. TPDS 30, 4 (2019).","journal-title":"Sunway TaihuLight Supercomputer. TPDS"},{"key":"e_1_3_2_2_21_1","unstructured":"Steven Dalton Sean Baxter Duane Merrill Luke Olson and Michael Garland.Optimizing Sparse Matrix Operations on GPUs Using Merge Path. In IPDPS \u201915."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2699470"},{"key":"e_1_3_2_2_23_1","volume-title":"Algorithm 1000: SuiteSparse: GraphBLAS: Graph Algorithms in the Language of Sparse Linear Algebra. TOMS 45, 4","author":"Davis A.","year":"2019","unstructured":"Timothy\u00a0A. Davis. Algorithm 1000: SuiteSparse: GraphBLAS: Graph Algorithms in the Language of Sparse Linear Algebra. TOMS 45, 4 (2019)."},{"key":"e_1_3_2_2_24_1","first-page":"1","volume":"38","author":"Timothy\u00a0A. Davis and Yifan Hu. The University of","year":"2011","unstructured":"Timothy\u00a0A. Davis and Yifan Hu. The University of Florida Sparse Matrix Collection. TOMS 38, 1 (2011).","journal-title":"Florida Sparse Matrix Collection. TOMS"},{"key":"e_1_3_2_2_25_1","first-page":"12","volume":"31","author":"Demirci Gunduz Vehbi","year":"2020","unstructured":"Gunduz Vehbi Demirci and Cevdet Aykanat. Cartesian Partitioning Models for 2D and 3D Parallel SpGEMM Algorithms. TPDS 31, 12 (2020).","journal-title":"Parallel SpGEMM Algorithms. TPDS"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.2172\/1417260"},{"key":"e_1_3_2_2_27_1","first-page":"2","volume":"28","author":"Duff S.","year":"2002","unstructured":"Iain\u00a0S. Duff, Michael\u00a0A. Heroux, and Roldan Pozo. An Overview of the Sparse Basic Linear Algebra Subprograms: The New Standard from the BLAS Technical Forum. TOMS 28, 2 (2002).","journal-title":"BLAS Technical Forum. TOMS"},{"key":"e_1_3_2_2_28_1","first-page":"3","volume":"23","author":"Duff S.","year":"1997","unstructured":"Iain\u00a0S. Duff, Michele Marrone, Giuseppe Radicati, and Carlo Vittoli. Level 3 Basic Linear Algebra Subprograms for Sparse Matrices: A User-Level Interface. TOMS 23, 3 (1997).","journal-title":"User-Level Interface. TOMS"},{"key":"e_1_3_2_2_29_1","unstructured":"Mingjia Fan Xiaotian Tian Yintao He Junxian Li Yiru Duan Xiaozhe Hu Ying Wang Zhou Jin and Weifeng Liu. AmgR: Algebraic Multigrid Accelerated on ReRAM. In DAC \u201923."},{"key":"e_1_3_2_2_30_1","unstructured":"Valentin\u00a0Le F\u00e8vre and Marc Casas. Efficient Execution of SpGEMM on Long Vector Architectures. In HPDC \u201923."},{"key":"e_1_3_2_2_31_1","unstructured":"Xu Fu Bingbin Zhang Tengcheng Wang Wenhao Li Yuechen Lu Enxin Yi Jianqi Zhao Xiaohan Geng Fangying Li Jingwen Zhang Zhou Jin and Weifeng Liu. PanguLU: A Scalable Regular Two-Dimensional Block-Cyclic Sparse Direct Solver on Distributed Heterogeneous Systems. In SC \u201923."},{"key":"e_1_3_2_2_32_1","unstructured":"Hormozd Gahvari William Gropp Kirk\u00a0E. Jordan Martin Schulz and Ulrike Meier Yang. Modeling the Performance of an Algebraic Multigrid Cycle Using Hybrid MPI\/OpenMP. In ICPP \u201912."},{"key":"e_1_3_2_2_33_1","first-page":"12","volume":"55","author":"Gao Jianhua","year":"2023","unstructured":"Jianhua Gao, Weixing Ji, Fangli Chang, Shiyu Han, Bingxin Wei, Zeming Liu, and Yizhuo Wang. A Systematic Survey of General Sparse Matrix-Matrix Multiplication. CSUR 55, 12 (2023).","journal-title":"General Sparse Matrix-Matrix Multiplication. CSUR"},{"key":"e_1_3_2_2_34_1","first-page":"1","volume":"13","author":"Gilbert R.","year":"1992","unstructured":"John\u00a0R. Gilbert, Cleve Moler, and Robert Schreiber. Sparse Matrices in MATLAB: Design and Implementation. SIMAX 13, 1 (1992).","journal-title":"Implementation. SIMAX"},{"key":"e_1_3_2_2_35_1","first-page":"4","volume":"40","author":"Gremse Felix","year":"2018","unstructured":"Felix Gremse, Kerstin K\u00fcpper, and Uwe Naumann. Memory-Efficient Sparse Matrix-Matrix Multiplication by Row Merging on Many-Core Architectures. SISC 40, 4 (2018).","journal-title":"Many-Core Architectures. SISC"},{"key":"e_1_3_2_2_36_1","unstructured":"Zhixiang Gu Jose Moreira David Edelsohn and Ariful Azad. Bandwidth Optimized Parallel Algorithms for Sparse Matrix-Matrix Multiplication Using Propagation Blocking. In SPAA \u201920."},{"key":"e_1_3_2_2_37_1","first-page":"3","volume":"4","author":"Gustavson G.","year":"1978","unstructured":"Fred\u00a0G. Gustavson. Two Fast Algorithms for Sparse Matrices: Multiplication and Permuted Transposition. TOMS 4, 3 (1978).","journal-title":"Permuted Transposition. TOMS"},{"key":"e_1_3_2_2_38_1","unstructured":"Kaixi Hou Weifeng Liu Hao Wang and Wu-chun Feng. Fast Segmented Sort on GPUs. In ICS \u201917."},{"key":"e_1_3_2_2_39_1","unstructured":"Md Taufique Hussain Oguz Selvitopi Ayd\u0131n Bulu\u00e7 and Ariful Azad. Communication-Avoiding and Memory-Constrained Sparse Matrix-Matrix Multiplication at Extreme Scale. In IPDPS \u201921."},{"key":"e_1_3_2_2_40_1","first-page":"5","volume":"49","author":"Ji Haonan","year":"2021","unstructured":"Haonan Ji, Shibo Lu, Kaixi Hou, Hao Wang, Zhou Jin, Weifeng Liu, and Brian Vinter. Segmented Merge: A New Primitive for Parallel Sparse Matrix Computations. IJPP 49, 5 (2021).","journal-title":"Parallel Sparse Matrix Computations. IJPP"},{"key":"e_1_3_2_2_41_1","unstructured":"Rakesh Kumar Keith\u00a0I. Farkas Norman\u00a0P. Jouppi Parthasarathy Ranganathan and Dean Tullsen. Single-ISA Heterogeneous Multi-Core Architectures: the Potential for Processor Power Reduction. In MICRO \u201903."},{"key":"e_1_3_2_2_42_1","unstructured":"Rakesh Kumar Dean Tullsen Parthasarathy Ranganathan Norman\u00a0P. Jouppi and Keith\u00a0I. Farkas. Single-ISA Heterogeneous Multi-Core Architectures for Multithreaded Workload Performance. In ISCA \u201904."},{"key":"e_1_3_2_2_43_1","unstructured":"Rakshith Kunchum Ankur Chaudhry Aravind Sukumaran-Rajam Qingpeng Niu Israt Nisa and P. Sadayappan. On Improving Performance of Sparse Matrix-Matrix Multiplication on GPUs. In ICS \u201917."},{"key":"e_1_3_2_2_44_1","unstructured":"Jeongmyung Lee Seokwon Kang Yongseung Yu Yong-Yeon Jo Sang-Wook Kim and Yongjun Park. Optimization of GPU-Based Sparse Matrix Multiplication for Large Sparse Networks. In ICDE \u201920."},{"key":"e_1_3_2_2_45_1","unstructured":"Wenxuan Li Helin Cheng Zhengyang Lu Yuechen Lu and Weifeng Liu. HASpMV: Heterogeneity-Aware Sparse Matrix-Vector Multiplication on Modern Asymmetric Multicore Processors. In Cluster \u201923."},{"key":"e_1_3_2_2_46_1","unstructured":"Junhong Liu Xin He Weifeng Liu and Guangming Tan. Register-Based Implementation of the Sparse General Matrix-Matrix Multiplication on GPUs. In PPoPP \u201918."},{"key":"e_1_3_2_2_47_1","first-page":"3","volume":"47","author":"Liu Junhong","year":"2019","unstructured":"Junhong Liu, Xin He, Weifeng Liu, and Guangming Tan. Register-Aware Optimizations for Parallel Sparse Matrix-Matrix Multiplication. IJPP 47, 3 (2019).","journal-title":"Parallel Sparse Matrix-Matrix Multiplication. IJPP"},{"key":"e_1_3_2_2_48_1","unstructured":"Nian Liu Jinyu Gu Dahai Tang Kenli Li Binyu Zang and Haibo Chen. Asymmetry-Aware Scalable Locking. In PPoPP \u201922."},{"key":"e_1_3_2_2_49_1","unstructured":"Weifeng Liu and Brian Vinter. An Efficient GPU General Sparse Matrix-Matrix Multiplication for Irregular Data. In IPDPS \u201914."},{"key":"e_1_3_2_2_50_1","volume-title":"C","author":"Liu Weifeng","year":"2015","unstructured":"Weifeng Liu and Brian Vinter. A Framework for General Sparse Matrix-Matrix Multiplication on GPUs and Heterogeneous Processors. JPDC 85, C (2015)."},{"key":"e_1_3_2_2_51_1","volume-title":"C","author":"Liu Weifeng","year":"2015","unstructured":"Weifeng Liu and Brian Vinter. Speculative Segmented Sum for Sparse Matrix-Vector Multiplication on Heterogeneous Processors. PARCO 49, C (2015)."},{"key":"e_1_3_2_2_52_1","unstructured":"Timothy\u00a0G Mattson Carl Yang Scott McMillan Aydin Bulu\u00e7 and Jos\u00e9\u00a0E Moreira. GraphBLAS C API: Ideas for Future Versions of the Specification. In HPEC \u201917."},{"key":"e_1_3_2_2_53_1","volume-title":"C","author":"Nagasaka Yusuke","year":"2019","unstructured":"Yusuke Nagasaka, Satoshi Matsuoka, Ariful Azad, and Ayd\u0131n Bulu\u00e7. Performance Optimization, Modeling and Analysis of Sparse Matrix-Matrix Products on Multi-Core and Many-Core Processors. PARCO 90, C (2019)."},{"key":"e_1_3_2_2_54_1","unstructured":"Yuyao Niu Zhengyang Lu Haonan Ji Shuhui Song Zhou Jin and Weifeng Liu. TileSpGEMM: A Tiled Algorithm for Parallel Sparse General Matrix-Matrix Multiplication on GPUs. In PPoPP \u201922."},{"key":"e_1_3_2_2_55_1","unstructured":"Subhankar Pal Jonathan Beaumont Dong-hyeon Park Aporva Amarnath Siying Feng Chaitali Chakrabarti Hun-Seok Kim David Blaauw Trevor Mudge and Ronald Dreslinski. OuterSPACE: An Outer Product Based Sparse Matrix Multiplication Accelerator. In HPCA \u201918."},{"key":"e_1_3_2_2_56_1","unstructured":"Mathias Parger Martin Winter Daniel Mlakar and Markus Steinberger. SpECK: Accelerating GPU Sparse Matrix-Matrix Multiplication through Lightweight Analysis. In PPoPP \u201920."},{"key":"e_1_3_2_2_57_1","unstructured":"Jason Power Arkaprava Basu Junli Gu Sooraj Puthoor Bradford\u00a0M. Beckmann Mark\u00a0D. Hill Steven\u00a0K. Reinhardt and David\u00a0A. Wood. Heterogeneous System Coherence for Integrated CPU-GPU Systems. In MICRO \u201913."},{"key":"e_1_3_2_2_58_1","unstructured":"Juan\u00a0Carlos Saez Fernando Castro and Manuel Prieto-Matias. Enabling Performance Portability of Data-Parallel OpenMP Applications on Asymmetric Multicore Processors. In ICPP \u201920."},{"key":"e_1_3_2_2_59_1","unstructured":"Oguz Selvitopi Md\u00a0Taufique Hussain Ariful Azad and Ayd\u0131n Bulu\u00e7. Optimizing High Performance Markov Clustering for Pre-Exascale Architectures. In IPDPS \u201920."},{"key":"e_1_3_2_2_60_1","unstructured":"Tengcheng Wang Wenhao Li Haojie Pei Yuying Sun Zhou Jin and Weifeng Liu. Accelerating Sparse LU Factorization with Density-Aware Adaptive Matrix Multiplication for Circuit Simulation. In DAC \u201923."},{"key":"e_1_3_2_2_61_1","unstructured":"Martin Winter Daniel Mlakar Rhaleb Zayer Hans-Peter Seidel and Markus Steinberger. Adaptive Sparse Matrix-Matrix Multiplication on the GPU. In PPoPP \u201919."},{"key":"e_1_3_2_2_62_1","unstructured":"Michael Wolf Mehmet Deveci Jonathan W. Berry Simon David Hammond and Sivasankaran Rajamanickam. Fast Linear Algebra-Based Triangle Counting with KokkosKernels. In HPEC \u201917."},{"key":"e_1_3_2_2_63_1","unstructured":"Yang Xia Peng Jiang Gagan Agrawal and Rajiv Ramnath. Scaling Sparse Matrix Multiplication on CPU-GPU Nodes. In IPDPS \u201921."},{"key":"e_1_3_2_2_64_1","unstructured":"Zhen Xie Guangming Tan Weifeng Liu and Ninghui Sun. IA-SpGEMM: An Input-Aware Auto-Tuning Framework for Parallel Sparse Matrix-Matrix Multiplication. In ICS \u201919."},{"key":"e_1_3_2_2_65_1","first-page":"1","volume":"33","author":"Xie Zhen","year":"2022","unstructured":"Zhen Xie, Guangming Tan, Weifeng Liu, and Ninghui Sun. A Pattern-Based SpGEMM Library for Multi-Core and Many-Core Architectures. TPDS 33, 1 (2022).","journal-title":"Many-Core Architectures. TPDS"},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466795"},{"key":"e_1_3_2_2_67_1","unstructured":"Carl Yang Ayd\u0131n Bulu\u00e7 and John\u00a0D Owens. Implementing Push-Pull Efficiently in GraphBLAS. In ICPP \u201918."},{"key":"e_1_3_2_2_68_1","first-page":"5","volume":"32","author":"Yu Teng","year":"2021","unstructured":"Teng Yu, Runxin Zhong, Vladimir Janjic, Pavlos Petoumenos, Jidong Zhai, Hugh Leather, and John Thomson. Collaborative Heterogeneity-Aware OS Scheduler for Asymmetric Multicore Processors. TPDS 32, 5 (2021).","journal-title":"Asymmetric Multicore Processors. TPDS"},{"key":"e_1_3_2_2_69_1","first-page":"1","volume":"1","author":"Yuster Raphael","year":"2005","unstructured":"Raphael Yuster and Uri Zwick. Fast Sparse Matrix Multiplication. TALG 1, 1 (2005).","journal-title":"Uri Zwick. Fast Sparse Matrix Multiplication. TALG"},{"key":"e_1_3_2_2_70_1","volume-title":"Performance Evaluation and Analysis of Sparse Matrix and Graph Kernels on Heterogeneous Processors. THPC","author":"Zhang Feng","year":"2019","unstructured":"Feng Zhang, Weifeng Liu, Ningxuan Feng, Jidong Zhai, and Xiaoyong Du. Performance Evaluation and Analysis of Sparse Matrix and Graph Kernels on Heterogeneous Processors. THPC (2019)."},{"key":"e_1_3_2_2_71_1","first-page":"3","volume":"28","author":"Zhang Feng","year":"2017","unstructured":"Feng Zhang, Jidong Zhai, Bingsheng He, Shuhao Zhang, and Wenguang Chen. Understanding Co-Running Behaviors on Integrated CPU\/GPU Architectures. TPDS 28, 3 (2017).","journal-title":"GPU Architectures. TPDS"},{"key":"e_1_3_2_2_72_1","unstructured":"Zhekai Zhang Hanrui Wang Song Han and William J. Dally. SpArch: Efficient Architecture for Sparse Matrix Multiplication. In HPCA \u201920."}],"event":{"name":"ICPP 2023: 52nd International Conference on Parallel Processing","location":"Salt Lake City UT USA","acronym":"ICPP 2023"},"container-title":["Proceedings of the 52nd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605573.3605611","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605573.3605611","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:04Z","timestamp":1750182544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605573.3605611"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,7]]},"references-count":72,"alternative-id":["10.1145\/3605573.3605611","10.1145\/3605573"],"URL":"https:\/\/doi.org\/10.1145\/3605573.3605611","relation":{},"subject":[],"published":{"date-parts":[[2023,8,7]]},"assertion":[{"value":"2023-09-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}