{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T17:41:14Z","timestamp":1778694074965,"version":"3.51.4"},"reference-count":31,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2023YFB3001701"],"award-info":[{"award-number":["2023YFB3001701"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372432"],"award-info":[{"award-number":["62372432"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1109\/tpds.2024.3432579","type":"journal-article","created":{"date-parts":[[2024,7,23]],"date-time":"2024-07-23T18:25:06Z","timestamp":1721759106000},"page":"1672-1689","source":"Crossref","is-referenced-by-count":5,"title":["IrGEMM: An Input-Aware Tuning Framework for Irregular GEMM on ARM and X86 CPUs"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8910-4951","authenticated-orcid":false,"given":"Cunyang","family":"Wei","sequence":"first","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9855-5367","authenticated-orcid":false,"given":"Haipeng","family":"Jia","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7520-9640","authenticated-orcid":false,"given":"Yunquan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianyu","family":"Yao","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2610-042X","authenticated-orcid":false,"given":"Chendi","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenxuan","family":"Cao","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ymben.2014.05.014"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3065870"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.05.302"},{"key":"ref4","article-title":"Poster: A batched cholesky solver for local RX anomaly detection on GPUs","author":"Molero","year":"2013"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.83"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/2764454"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS53394.2021.00118"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545032"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ISPA-BDCloud-SocialCom-SustainCom52081.2021.00034"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC-DSS-SmartCity-DependSys57074.2022.00042"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1356052.1356053"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3210754"},{"key":"ref16","first-page":"1","article-title":"Designing vector-friendly compact blas and lapack kernels","volume-title":"Proc. Int. Conf. High Perform. Comput., Netw., Storage Anal.","author":"Kim"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476217"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2012.97"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1377603.1377607"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3431921"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/PDP2018.2018.00065"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00069"},{"key":"ref23","article-title":"Deep learning inference in Facebook data centers: Characterization, performance optimizations and hardware implications","author":"Park","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.08.040"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511624216"},{"key":"ref26","first-page":"265","article-title":"{TensorFlow }: A system for { Large-Scale} machine learning","volume-title":"Proc. 12th USENIX Symp. Operating Syst. Des. Implementation","author":"Abadi","year":"2016"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2939785"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-017-2746-2"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2015.29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404407"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2925987"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178496"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807673"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/10601540\/10607886.pdf?arnumber=10607886","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T04:13:59Z","timestamp":1723349639000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10607886\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":31,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2024.3432579","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9]]}}}