{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T01:27:07Z","timestamp":1755998827311,"version":"3.37.3"},"reference-count":15,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,11,2]],"date-time":"2022-11-02T00:00:00Z","timestamp":1667347200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,11,2]],"date-time":"2022-11-02T00:00:00Z","timestamp":1667347200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003052","name":"Ministry of Trade, Industry & Energy(MOTIE, Korea)","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003052","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010418","name":"Institute of Information & communications Technology Planning & Evaluation(IITP)","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,11,2]]},"DOI":"10.1109\/sips55645.2022.9919243","type":"proceedings-article","created":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T22:55:20Z","timestamp":1667516120000},"page":"1-6","source":"Crossref","is-referenced-by-count":1,"title":["Understanding and Optimizing INT4 Convolution for Accelerated DNN Inference on Tensor Cores"],"prefix":"10.1109","author":[{"given":"Junkyeong","family":"Choi","sequence":"first","affiliation":[{"name":"Nota Inc.,South Korea"}]},{"given":"Hyucksung","family":"Kwon","sequence":"additional","affiliation":[{"name":"Hanyang University,Dept. of Electronic Engineering,South Korea"}]},{"given":"Woongkyu","family":"Lee","sequence":"additional","affiliation":[{"name":"Hanyang University,Dept. of Electronic Engineering,South Korea"}]},{"given":"Jieun","family":"Lim","sequence":"additional","affiliation":[{"name":"Nota Inc.,South Korea"}]},{"given":"Jungwook","family":"Choi","sequence":"additional","affiliation":[{"name":"Hanyang University,Dept. of Electronic Engineering,South Korea"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref3","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/SCCC.2018.8705253"},{"key":"ref5","first-page":"3393","article-title":"Learning to optimize tensor programs","author":"Chen","year":"2018","journal-title":"ser. NIPS\u201918"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00091"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00065"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"},{"key":"ref9","article-title":"Automatic kernel generation for volta tensor cores","author":"Bhaskaracharya","year":"2020","journal-title":"arXiv preprint"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00071"},{"key":"ref11","first-page":"11875","article-title":"Hawq-v3: Dyadic neural network quantization","volume-title":"International Conference on Machine Learning","author":"Yao","year":"2021"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476157"},{"key":"ref13","first-page":"863","article-title":"Ansor: Generating high-performance tensor programs for deep learning","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Zheng","year":"2020"},{"key":"ref14","first-page":"578","article-title":"TVM: An automated end-to-end optimizing compiler for deep learning","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen","year":"2018"},{"volume-title":"NVIDIA Nsight compute","key":"ref15"}],"event":{"name":"2022 IEEE Workshop on Signal Processing Systems (SiPS)","start":{"date-parts":[[2022,11,2]]},"location":"Rennes, France","end":{"date-parts":[[2022,11,4]]}},"container-title":["2022 IEEE Workshop on Signal Processing Systems (SiPS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9919234\/9919202\/09919243.pdf?arnumber=9919243","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T05:47:46Z","timestamp":1706075266000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9919243\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,2]]},"references-count":15,"URL":"https:\/\/doi.org\/10.1109\/sips55645.2022.9919243","relation":{},"subject":[],"published":{"date-parts":[[2022,11,2]]}}}