{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T17:01:09Z","timestamp":1772643669212,"version":"3.50.1"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,22]],"date-time":"2025-06-22T00:00:00Z","timestamp":1750550400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1109\/dac63849.2025.11132989","type":"proceedings-article","created":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T17:35:41Z","timestamp":1757957741000},"page":"1-7","source":"Crossref","is-referenced-by-count":1,"title":["Finding the Pareto Frontier of Low-Precision Data Formats and MAC Architecture for LLM Inference"],"prefix":"10.1109","author":[{"given":"Brian","family":"Crafton","sequence":"first","affiliation":[{"name":"TSMC Corporate Research,San Jose,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaochen","family":"Peng","sequence":"additional","affiliation":[{"name":"TSMC Corporate Research,San Jose,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoyu","family":"Sun","sequence":"additional","affiliation":[{"name":"TSMC Corporate Research,San Jose,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashwin","family":"Lele","sequence":"additional","affiliation":[{"name":"TSMC Corporate Research,San Jose,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Zhang","sequence":"additional","affiliation":[{"name":"TSMC Corporate Research,San Jose,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Win-San","family":"Khwa","sequence":"additional","affiliation":[{"name":"TSMC Corporate Research,Hsinchu,Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kerem","family":"Akarvardar","sequence":"additional","affiliation":[{"name":"TSMC Corporate Research,San Jose,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Nvidia tesla v100 gpu architecture","year":"2017"},{"key":"ref2","first-page":"1","article-title":"Ieee standard for floating-point arithmetic","year":"2019","journal-title":"IEEE Std 754-2019 (Revision of IEEE 754-2008)"},{"key":"ref3","article-title":"Nvidia h100 tensor core gpu architecture","year":"2022"},{"key":"ref4","first-page":"223","article-title":"Rethinking floating point overheads for mixed precision dnn accelerators","volume-title":"Proceedings of Machine Learning and Systems","volume":"3","author":"Abdelaziz"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2017.29"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/s17-2001"},{"key":"ref7","first-page":"873","article-title":"Vs-quant: Per-vector scaled quantization for accurate low-precision neural network inference","volume-title":"Proceedings of Machine Learning and Systems","volume":"3","author":"Dai"},{"key":"ref8","first-page":"10271","article-title":"Pushing the limits of narrow precision inferencing at cloud scale with microsoft floating point","volume":"33","author":"Darvish Rouhani","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589351"},{"key":"ref10","first-page":"30318","article-title":"Gpt3.int8(): 8-bit matrix multiplication for transformers at scale","volume":"35","author":"Dettmers","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref11","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"ref12","article-title":"Training dnns with hybrid block floating point","volume":"31","author":"Drumond","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref13","article-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers","author":"Frantar","year":"2023"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2010.121"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.14529\/jsfi170206"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2019.00031"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH48897.2020.00029"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655907"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH48897.2020.00013"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2019.00021"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2023.3234893"},{"key":"ref23","article-title":"Flexpoint: An adaptive numerical format for efficient training of deep neural networks","volume":"30","author":"K\u00c3\u00b6ster","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS46773.2023.10182007"},{"key":"ref25","article-title":"Roberta: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref26","article-title":"Llm-qat: Data-free quantization aware training for large language models","author":"Liu","year":"2023"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH61463.2024.00016"},{"key":"ref28","article-title":"Openelm: An efficient language model family with open training and inference framework","author":"Mehta","year":"2024"},{"key":"ref29","article-title":"Convolutional neural networks using logarithmic data representation. arxiv 2016","author":"Miyashita","journal-title":"arXiv preprint arXiv:1603.01025"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD53106.2021.00032"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3205713"},{"key":"ref32","article-title":"Ocp microscaling (mx) specification","author":"Rouhani","year":"2023"},{"key":"ref33","article-title":"Microscaling data formats for deep learning","author":"Rouhani","year":"2023","journal-title":"arXiv preprint arXiv:2310.10537"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2016.2525042"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/T-C.1975.224172"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218516"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/hcs61935.2024.10665247"},{"key":"ref38","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref39","first-page":"38087","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","volume-title":"International Conference on Machine Learning","author":"Xiao"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1472"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2022.3202747"}],"event":{"name":"2025 62nd ACM\/IEEE Design Automation Conference (DAC)","location":"San Francisco, CA, USA","start":{"date-parts":[[2025,6,22]]},"end":{"date-parts":[[2025,6,25]]}},"container-title":["2025 62nd ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11132383\/11132091\/11132989.pdf?arnumber=11132989","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:32:06Z","timestamp":1758000726000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11132989\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/dac63849.2025.11132989","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]}}}