{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T00:57:45Z","timestamp":1773277065182,"version":"3.50.1"},"reference-count":26,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100004358","name":"Samsung Electronics Company Ltd.","doi-asserted-by":"publisher","award":["IO201207-07799-01"],"award-info":[{"award-number":["IO201207-07799-01"]}],"id":[{"id":"10.13039\/100004358","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Institute of Information and Communications Technology Planning and Evaluation"},{"name":"Korean Government Ministry of Science Information and Communication Technology (MSIT), Processing-in-Memory (PIM) Semiconductor Design Research Center","award":["2022-0-01170"],"award-info":[{"award-number":["2022-0-01170"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Emerg. Sel. Topics Circuits Syst."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/jetcas.2025.3558300","type":"journal-article","created":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T17:51:19Z","timestamp":1744134679000},"page":"231-243","source":"Crossref","is-referenced-by-count":4,"title":["LightRot: A Light-Weighted Rotation Scheme and Architecture for Accurate Low-Bit Large Language Model Inference"],"prefix":"10.1109","volume":"15","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6665-9973","authenticated-orcid":false,"given":"Sangjin","family":"Kim","sequence":"first","affiliation":[{"name":"PIM Semiconductor Design Research Center (AI-PIM), Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0698-8116","authenticated-orcid":false,"given":"Yuseon","family":"Choi","sequence":"additional","affiliation":[{"name":"Graduate School of AI Semiconductor, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3998-7385","authenticated-orcid":false,"given":"Jungjun","family":"Oh","sequence":"additional","affiliation":[{"name":"Graduate School of AI Semiconductor, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8354-2170","authenticated-orcid":false,"given":"Byeongcheol","family":"Kim","sequence":"additional","affiliation":[{"name":"Graduate School of AI Semiconductor, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6661-4879","authenticated-orcid":false,"given":"Hoi-Jun","family":"Yoo","sequence":"additional","affiliation":[{"name":"PIM Semiconductor Design Research Center (AI-PIM), Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. NIPS","author":"Brown"},{"key":"ref2","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv:2307.09288"},{"key":"ref3","article-title":"The llama 3 herd of models","author":"Grattafiori","year":"2024","journal-title":"arXiv:2407.21783"},{"key":"ref4","article-title":"Mixtral of experts","author":"Jiang","year":"2024","journal-title":"arXiv:2401.04088"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3714983.3714987"},{"key":"ref6","first-page":"23901","article-title":"SqueezeLLM: Dense-and-sparse quantization","volume-title":"Proc. ICML Poster Session","author":"Kim"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i12.29237"},{"key":"ref8","first-page":"38087","article-title":"SmoothQuant: Accurate and efficient post-training quantization for large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xiao"},{"key":"ref9","first-page":"196","article-title":"Atom: Low-bit quantization for efficient and accurate LLM serving","volume-title":"Proc. Mach. Learn. Syst. (MLSys)","volume":"6","author":"Zhao"},{"key":"ref10","article-title":"QServe: W4A8KV4 quantization and system co-design for efficient LLM serving","author":"Lin","year":"2024","journal-title":"arXiv:2405.04532"},{"key":"ref11","article-title":"QuaRot: Outlier-free 4-bit inference in rotated LLMs","author":"Ashkboos","year":"2024","journal-title":"arXiv:2404.00456"},{"key":"ref12","article-title":"SpinQuant: LLM quantization with learned rotations","author":"Liu","year":"2024","journal-title":"arXiv:2405.16406"},{"key":"ref13","article-title":"FlatQuant: Flatness matters for LLM quantization","author":"Sun","year":"2024","journal-title":"arXiv:2410.09426"},{"key":"ref14","first-page":"48630","article-title":"QuIP: Even better LLM quantization with Hadamard incoherence and lattice codebooks","volume-title":"Proc. Mach. Learn. Res.","author":"Tseng"},{"key":"ref15","article-title":"GLU variants improve transformer","author":"Shazeer","year":"2020","journal-title":"arXiv:2002.05202"},{"key":"ref16","article-title":"Pointer Sentinel mixture models","volume-title":"arXiv:1609.07843","author":"Merity","year":"2016"},{"key":"ref17","first-page":"46595","article-title":"Judging LLM-as-a-judge with MT-bench and chatbot arena","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS) Datasets Benchmarks Track","volume":"36","author":"Zheng"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731686"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42615.2023.10067817"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2024.3397189"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/isscc49657.2024.10454330"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49661.2025.10904594"},{"key":"ref23","article-title":"Microscaling data formats for deep learning","author":"Darvish Rouhani","year":"2023","journal-title":"arXiv:2310.10537"},{"key":"ref24","article-title":"AMXFP4: Taming activation outliers with asymmetric microscaling floating-point for 4-bit LLM inference","author":"Lee","year":"2024","journal-title":"arXiv:2411.09909"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/jssc.2023.3234893"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/jssc.2023.3312615"}],"container-title":["IEEE Journal on Emerging and Selected Topics in Circuits and Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5503868\/11050007\/10950449.pdf?arnumber=10950449","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T04:28:42Z","timestamp":1750912122000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10950449\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":26,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/jetcas.2025.3558300","relation":{},"ISSN":["2156-3357","2156-3365"],"issn-type":[{"value":"2156-3357","type":"print"},{"value":"2156-3365","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}