{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T06:10:11Z","timestamp":1747894211824,"version":"3.41.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002551","name":"Seoul National University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002551","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003836","name":"IC Design Education Center","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003836","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.23919\/date64628.2025.10992868","type":"proceedings-article","created":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T17:36:35Z","timestamp":1747848995000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Integer Unit-Based Outlier-Aware LLM Accelerator Preserving Numerical Accuracy of FP-FP GEMM"],"prefix":"10.23919","author":[{"given":"Jehun","family":"Lee","sequence":"first","affiliation":[{"name":"Seoul National University,Seoul,Korea"}]},{"given":"Jae-Joon","family":"Kim","sequence":"additional","affiliation":[{"name":"Seoul National University,Seoul,Korea"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Bert: Pretraining of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint"},{"issue":"8","key":"ref2","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref3","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv preprint"},{"key":"ref4","first-page":"10271","article-title":"Pushing the limits of narrow precision inferencing at cloud scale with microsoft floating point","volume-title":"Advances in neural information processing systems","volume":"33","author":"Rouhani","year":"2020"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589351"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref7","article-title":"Awq: Activation-aware weight quantization for llm compression and acceleration","author":"Lin","year":"2023","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00064"},{"key":"ref9","article-title":"Gptq: Accurate post-training quantization for generative pretrained transformers","author":"Frantar","year":"2022","journal-title":"arXiv preprint"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00010"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2024.3423692"},{"key":"ref12","article-title":"Intel\u00ae Core\u2122 Ultra Processor Datasheet, Volume 1 of 2","volume-title":"Tech. Rep.","author":"Corporation","year":"2024"},{"key":"ref13","first-page":"17402","article-title":"Outlier suppression: Pushing the limit of low-bit transformer language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref14","article-title":"Outlier suppression+: Accurate quantization of large language models by equivalent and optimal shifting and scaling","author":"Wei","year":"2023","journal-title":"arXiv preprint"},{"key":"ref15","first-page":"27 168","article-title":"Zeroquant: Efficient and affordable post-training quantization for large-scale transformers","volume":"35","author":"Yao","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref16","first-page":"7750","article-title":"The case for 4-bit precision: k-bit inference scaling laws","volume-title":"International Conference on Machine Learning","author":"Dettmers","year":"2023"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00063"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3657323"},{"key":"ref19","article-title":"Llm. int8 (): 8-bit matrix multiplication for transformers at scale","author":"Dettmers","year":"2022","journal-title":"arXiv preprint"},{"key":"ref20","first-page":"38087","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","author":"Xiao","year":"2023","journal-title":"International Conference on Machine Learning"},{"key":"ref21","article-title":"Training dnns with hybrid block floating point","volume":"31","author":"Drumond","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref22","article-title":"Microscaling data formats for deep learning","author":"Rouhani","year":"2023","journal-title":"arXiv preprint"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3316781.3317783","article-title":"Biscaled-dnn: Quantizing long-tailed datastructures with two scale factors for deep neural networks","volume-title":"Proceedings of the 56th Annual Design Automation Conference 2019","author":"Jain","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589038"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00095"},{"key":"ref26","article-title":"Brecq: Pushing the limit of post-training quantization by block reconstruction","author":"Li","year":"2021","journal-title":"arXiv preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/92.845894"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2013.6567600"},{"key":"ref30","article-title":"Scale-sim: Systolic cnn accelerator simulator","author":"Samajdar","year":"2018","journal-title":"arXiv preprint"},{"journal-title":"Pointer sentinel mixture models","year":"2016","author":"Merity","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3474381"},{"key":"ref33","article-title":"Hel-laswag: Can a machine really finish your sentence?","author":"Zellers","year":"2019","journal-title":"arXiv preprint"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"ref35","article-title":"Measuring massive multitask language understanding","author":"Hendrycks","year":"2020","journal-title":"arXiv preprint"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2124"},{"key":"ref38","first-page":"521","article-title":"Orca: A distributed serving system for {Transformer-Based} generative models","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Yu","year":"2022"}],"event":{"name":"2025 Design, Automation &amp; Test in Europe Conference (DATE)","start":{"date-parts":[[2025,3,31]]},"location":"Lyon, France","end":{"date-parts":[[2025,4,2]]}},"container-title":["2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10992638\/10992588\/10992868.pdf?arnumber=10992868","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T05:32:28Z","timestamp":1747891948000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10992868\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":38,"URL":"https:\/\/doi.org\/10.23919\/date64628.2025.10992868","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]}}}