{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T21:33:29Z","timestamp":1773264809650,"version":"3.50.1"},"reference-count":30,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["92464302"],"award-info":[{"award-number":["92464302"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Emerg. Sel. Topics Circuits Syst."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/jetcas.2025.3562937","type":"journal-article","created":{"date-parts":[[2025,4,21]],"date-time":"2025-04-21T17:39:23Z","timestamp":1745257163000},"page":"272-284","source":"Crossref","is-referenced-by-count":1,"title":["Adaptive Two-Range Quantization and Hardware Co-Design for Large Language Model Acceleration"],"prefix":"10.1109","volume":"15","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1478-2202","authenticated-orcid":false,"given":"Siqi","family":"Cai","sequence":"first","affiliation":[{"name":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6944-2958","authenticated-orcid":false,"given":"Gang","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1244-7657","authenticated-orcid":false,"given":"Wenjie","family":"Li","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6826-2670","authenticated-orcid":false,"given":"Dongxu","family":"Lyu","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0486-6421","authenticated-orcid":false,"given":"Guanghui","family":"He","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2024.3383347"},{"key":"ref2","article-title":"OPT: Open pre-trained transformer language models","author":"Zhang","year":"2022","journal-title":"arXiv:2205.01068"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2024.3350661"},{"key":"ref4","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. NIPS","author":"Brown"},{"key":"ref5","first-page":"38087","article-title":"SmoothQuant: Accurate and efficient post-training quantization for large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xiao"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2021.3127129"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2021.3131609"},{"key":"ref8","first-page":"30318","article-title":"LLM.int8(): 8-bit matrix multiplication for transformers at scale","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Dettmers"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00063"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589038"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.195"},{"key":"ref14","article-title":"RPTQ: Reorder-based post-training quantization for large language models","author":"Yuan","year":"2023","journal-title":"arXiv:2304.01089"},{"key":"ref15","article-title":"Quantization and training of neural networks for efficient integer-arithmetic-only inference","author":"Jacob","year":"2017","journal-title":"arXiv:1712.05877"},{"key":"ref16","article-title":"MQBench: Towards reproducible and deployable model quantization benchmark","author":"Li","year":"2021","journal-title":"arXiv:2111.03759"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2018.8464801"},{"key":"ref18","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016","journal-title":"arXiv:1609.07843"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3115\/1075812.1075835"},{"key":"ref20","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref21","article-title":"GPTQ: Accurate post-training quantization for generative pre-trained transformers","author":"Frantar","year":"2022","journal-title":"arXiv:2210.17323"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218516"},{"key":"ref23","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv:2302.13971"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00095"},{"key":"ref25","first-page":"41","article-title":"Fine-grained DRAM: Energy-efficient DRAM for extreme bandwidth systems","volume-title":"Proc. 50th Annu. IEEE\/ACM Int. Symp. Microarchitecture (MICRO)","author":"O\u2019Connor"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/SSDM.2003.1214975"},{"key":"ref27","article-title":"Think you have solved question answering? Try ARC, the AI2 reasoning challenge","author":"Clark","year":"2018","journal-title":"arXiv:1803.05457"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1260"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2021.3072217"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655986"}],"container-title":["IEEE Journal on Emerging and Selected Topics in Circuits and Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5503868\/11050007\/10971983.pdf?arnumber=10971983","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T04:29:25Z","timestamp":1750912165000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10971983\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":30,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/jetcas.2025.3562937","relation":{},"ISSN":["2156-3357","2156-3365"],"issn-type":[{"value":"2156-3357","type":"print"},{"value":"2156-3365","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}