{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T08:02:42Z","timestamp":1764403362052,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key Research & Development Program of China","award":["2022YFB4500103"],"award-info":[{"award-number":["2022YFB4500103"]}]},{"name":"NSFC","award":["62032008"],"award-info":[{"award-number":["62032008"]}]},{"name":"STCSM","award":["23511100100"],"award-info":[{"award-number":["23511100100"]}]},{"name":"Shanghai Science & Technology Development Funds","award":["22QB1404600"],"award-info":[{"award-number":["22QB1404600"]}]},{"name":"Shanghai Pujiang Program","award":["22PJ1422000"],"award-info":[{"award-number":["22PJ1422000"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620666.3651333","type":"proceedings-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T12:08:21Z","timestamp":1713960501000},"page":"149-163","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Boost Linear Algebra Computation Performance via Efficient VNNI Utilization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3536-7623","authenticated-orcid":false,"given":"Hao","family":"Zhou","sequence":"first","affiliation":[{"name":"Enflame-Tech Inc., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7829-2963","authenticated-orcid":false,"given":"Qiukun","family":"Han","sequence":"additional","affiliation":[{"name":"Enflame-Tech Inc., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1779-9221","authenticated-orcid":false,"given":"Heng","family":"Shi","sequence":"additional","affiliation":[{"name":"Enflame-Tech Inc., Shanghai, China"},{"name":"SJTU, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0533-1111","authenticated-orcid":false,"given":"Yalin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Enflame-Tech Inc., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1142-4496","authenticated-orcid":false,"given":"Jianguo","family":"Yao","sequence":"additional","affiliation":[{"name":"Enflame-Tech Inc., Shanghai, China"},{"name":"SJTU, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"MLIR's affine dialect. https:\/\/mlir.llvm.org\/docs\/Dialects\/Affine\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_2_1","unstructured":"BLIS Git repository. https:\/\/github.com\/flame\/blis . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_3_1","unstructured":"Clang. https:\/\/clang.llvm.org\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_4_1","unstructured":"GNU and LLVM compiler flags. https:\/\/www.bu.edu\/tech\/support\/research\/software-and-programming\/programming\/compilers\/gcc-compiler-flags\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_5_1","unstructured":"GCC. https:\/\/gcc.gnu.org\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_6_1","unstructured":"Intel\u00ae oneAPI Toolkits. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/toolkits.html#base-kit . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_7_1","unstructured":"Intel compiler flags. https:\/\/www.bu.edu\/tech\/support\/research\/software-and-programming\/programming\/compilers\/intel-compiler-flags\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_8_1","unstructured":"Porting guide for ICC users to DPCPP or ICX. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/guide\/porting-guide-for-icc-users-to-dpcpp-or-icx.html . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_9_1","unstructured":"LLVM. https:\/\/llvm.org\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_10_1","unstructured":"Intel\u00ae optimized math library for numerical computing. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/onemkl.html . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_11_1","unstructured":"OpenBLAS - An optimized BLAS library. https:\/\/www.openblas.net\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_12_1","unstructured":"mlir-clang from Polygeist's Git repository. https:\/\/github.com\/llvm\/Polygeist . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_13_1","unstructured":"MLIR's scf dialect. https:\/\/mlir.llvm.org\/docs\/Dialects\/SCFDialect\/ . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_14_1","unstructured":"llvm::targettransforminfo class reference. https:\/\/llvm.org\/doxygen\/classllvm_1_1TargetTransformInfo.html . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_15_1","unstructured":"Working with operators using tensor expression. https:\/\/tvm.apache.org\/docs\/tutorial\/tensor_expr_get_started.html . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_16_1","unstructured":"VeGen Git repository. https:\/\/github.com\/ychen306\/vegen . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_17_1","unstructured":"Tuning guide for deep learning with Intel\u00ae AVX512 and Intel\u00ae deep learning boost on 3rd generation Intel\u00ae Xeon\u00ae scalable processors. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/guide\/deep-learning-with-avx512-and-dl-boost.html . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_18_1","unstructured":"VPDPBUSD - Multiply and Add Unsigned and Signed Bytes. https:\/\/www.felixcloutier.com\/x86\/vpdpbusd . Accessed: 2024-01-08."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/29873.29875"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2908080.2908111"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1168857.1168906"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2442516.2442529"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2003.00532"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3168821"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370332"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3445814.3446692"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/996841.996853"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3497776.3517770"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/349299.349320"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2254064.2254106"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3276480"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT52795.2021.00011"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/1133981.1133997"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872387"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2017.21"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.32"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2015.7054199"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3243176.3243189"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661192"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377555.3377890"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00117"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2764454"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2202.03293"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370330"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527440"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/2854038.2854054"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/2886101"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"La Jolla CA USA","acronym":"ASPLOS '24"},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651333","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:42Z","timestamp":1750291422000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651333"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":51,"alternative-id":["10.1145\/3620666.3651333","10.1145\/3620666"],"URL":"https:\/\/doi.org\/10.1145\/3620666.3651333","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}