{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:33:43Z","timestamp":1772724823125,"version":"3.50.1"},"reference-count":44,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,2,27]],"date-time":"2021-02-27T00:00:00Z","timestamp":1614384000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,2,27]],"date-time":"2021-02-27T00:00:00Z","timestamp":1614384000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,2,27]],"date-time":"2021-02-27T00:00:00Z","timestamp":1614384000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["CCF-1751400"],"award-info":[{"award-number":["CCF-1751400"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,2,27]]},"DOI":"10.1109\/cgo51591.2021.9370330","type":"proceedings-article","created":{"date-parts":[[2021,3,11]],"date-time":"2021-03-11T21:33:26Z","timestamp":1615498406000},"page":"77-89","source":"Crossref","is-referenced-by-count":23,"title":["UNIT: Unifying Tensorized Instruction Compilation"],"prefix":"10.1109","author":[{"given":"Jian","family":"Weng","sequence":"first","affiliation":[]},{"given":"Animesh","family":"Jain","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Leyuan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yida","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Tony","family":"Nowatzki","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Schedule trees","author":"verdoolaege","year":"2014","journal-title":"Second International Workshop on Polyhedral Compilation Techniques"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2400682.2400713"},{"key":"ref33","first-page":"131","author":"ira","year":"2007","journal-title":"Loop-aware SLP in GCC"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3211346.3211348"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080256"},{"key":"ref37","author":"nicolas","year":"2018","journal-title":"Tensor Comprehensions Framework-Agnostic High-Performance Machine Learning Abstractions"},{"key":"ref36","author":"tavarageri","year":"2020","journal-title":"PolyDL Polyhedral optimizations for creation of high performance DL primitives"},{"key":"ref35","author":"grover","year":"2020","journal-title":"Automatic kernel generation for Volta tensor cores"},{"key":"ref34","article-title":"Glow: Graph lowering compiler techniques for neural networks","author":"rotem","year":"2018","journal-title":"CoRR abs\/1805 00907"},{"key":"ref10","first-page":"578","article-title":"TVM: An automated end-to-end optimizing compiler for deep learning","author":"chen","year":"2018","journal-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322229"},{"key":"ref11","first-page":"3389","article-title":"Learning to optimize tensor programs","author":"chen","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3240765.3240838"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358276"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref15","article-title":"TC-CIM: Empowering tensor comprehensions for computing-in-memory","author":"drebes","year":"2020","journal-title":"IMPACT 2020&#x2013;10th International Workshop on Polyhedral Compilation Techniques"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1145\/996893.996853","article-title":"Vectorization for simd architectures with alignment constraints","volume":"39","author":"alexandre","year":"2004","journal-title":"ACM SIGPLAN Notices"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404407"},{"key":"ref18","article-title":"Quantization and training of neural networks for efficient journal","volume":"abs 1712 5877","author":"benoit","year":"2017","journal-title":"CoRR"},{"key":"ref19","author":"jain","year":"2020","journal-title":"Efficient execution of quantized deep learning models A compiler approach"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1133981.1133997"},{"key":"ref4","year":"2020","journal-title":"Nvidia CUDA &#x00AE; deep neural network library (cuDNN)"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2006.25"},{"key":"ref3","year":"2020","journal-title":"Apache MXNet A Flexible and Efficient Library for Deep Learning"},{"key":"ref6","year":"2020","journal-title":"oneAPI deep neural network library (oneDNN)"},{"key":"ref29","first-page":"65","article-title":"Swizzle inventor: data movement synthesis for GPU kernels","author":"mangpo","year":"2019","journal-title":"Proceedings of the fourth international conference on Architectural support for programming languages and operating systems - AS"},{"key":"ref5","year":"2020","journal-title":"NVIDIA Tensor Cores"},{"key":"ref8","year":"2020","journal-title":"TensorFlow&#x2122;"},{"key":"ref7","year":"2020","journal-title":"PyTorch"},{"key":"ref2","year":"2019","journal-title":"Introduction to Intel deep learning boost on second generation Intel Xeon scalable processors"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3372266"},{"key":"ref1","year":"2017","journal-title":"Exploring the Arm dot product instructions"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462187"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/358438.349320"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3173176"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00063"},{"key":"ref24","article-title":"Mixed precision training","author":"paulius","year":"2017","journal-title":"CoRR absI1710 03740"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00032"},{"key":"ref23","first-page":"1025","article-title":"Optimizing CNN model inference on CPUs","author":"liu","year":"2019","journal-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00071"},{"key":"ref26","article-title":"Stream-dataflow acceleration","author":"tony","year":"2017","journal-title":"2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA) ISCA"},{"key":"ref43","year":"2017","journal-title":"XLA Team Xla - tensorflow compiled"},{"key":"ref25","year":"0","journal-title":"Multi-level IR compiler framework"}],"event":{"name":"2021 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)","location":"Seoul, Korea (South)","start":{"date-parts":[[2021,2,27]]},"end":{"date-parts":[[2021,3,3]]}},"container-title":["2021 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9370300\/9370301\/09370330.pdf?arnumber=9370330","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:42:50Z","timestamp":1652197370000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9370330\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,27]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/cgo51591.2021.9370330","relation":{},"subject":[],"published":{"date-parts":[[2021,2,27]]}}}