{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,29]],"date-time":"2025-06-29T04:04:22Z","timestamp":1751169862913,"version":"3.41.0"},"reference-count":16,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T00:00:00Z","timestamp":1748131200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T00:00:00Z","timestamp":1748131200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,25]]},"DOI":"10.1109\/iscas56072.2025.11044273","type":"proceedings-article","created":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T17:42:19Z","timestamp":1751046139000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["A 17.1 TOPS\/W FP-INT Transformer Inference Accelerator with Sparsity Boosting and Output Importance-Aware Processing"],"prefix":"10.1109","author":[{"given":"Jeonggyu","family":"So","sequence":"first","affiliation":[{"name":"KAIST,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Seongyon","family":"Hong","sequence":"additional","affiliation":[{"name":"KAIST,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiwon","family":"Choi","sequence":"additional","affiliation":[{"name":"KAIST,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wooyoung","family":"Jo","sequence":"additional","affiliation":[{"name":"KAIST,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sangjin","family":"Kim","sequence":"additional","affiliation":[{"name":"KAIST,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hoi-Jun","family":"Yoo","sequence":"additional","affiliation":[{"name":"KAIST,Daejeon,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Donghyeon","family":"Han","sequence":"additional","affiliation":[{"name":"Chung-ang University,Seoul,Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref2","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proceedings of NAACLHLT","author":"Jacob"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1145\/3714983.3714987","article-title":"AWQ: activation-aware weight quantization for on-device LLM compression and acceleration","volume-title":"Proceedings of the Seventh Annual Conference on Machine Learning and Systems, MLSys 2024","author":"Lin"},{"journal-title":"Gptq: Accurate post-training quantization for generative pretrained transformers","year":"2022","author":"Frantar","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2024.3397189"},{"issue":"2020","key":"ref7","first-page":"10271","article-title":"Pushing the limits of narrow precision inferencing at cloud scale with microsoft floating point","volume":"33","author":"Rouhani","journal-title":"Advances in neural information processing systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3222059"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00064"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2022.3209872"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731659"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2023.3304752"},{"journal-title":"Roberta: A robustly optimized bert pretraining approach","year":"2019","author":"Liu","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ESSCIRC55480.2022.9911343"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731686"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42615.2023.10067817"}],"event":{"name":"2025 IEEE International Symposium on Circuits and Systems (ISCAS)","start":{"date-parts":[[2025,5,25]]},"location":"London, United Kingdom","end":{"date-parts":[[2025,5,28]]}},"container-title":["2025 IEEE International Symposium on Circuits and Systems (ISCAS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11043142\/11042930\/11044273.pdf?arnumber=11044273","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T06:50:38Z","timestamp":1751093438000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11044273\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,25]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1109\/iscas56072.2025.11044273","relation":{},"subject":[],"published":{"date-parts":[[2025,5,25]]}}}