{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T22:19:54Z","timestamp":1771625994138,"version":"3.50.1"},"reference-count":79,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"crossref","award":["1641\/21"],"award-info":[{"award-number":["1641\/21"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"crossref","award":["2878\/25"],"award-info":[{"award-number":["2878\/25"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"crossref"}]},{"name":"MIT-IBM Watson AI Laboratory"},{"name":"National Science Foundation","award":["CCF-2131115"],"award-info":[{"award-number":["CCF-2131115"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Inform. Theory"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tit.2025.3649596","type":"journal-article","created":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T18:37:13Z","timestamp":1767119833000},"page":"1943-1972","source":"Crossref","is-referenced-by-count":0,"title":["Optimal Quantization for Matrix Multiplication"],"prefix":"10.1109","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5791-7923","authenticated-orcid":false,"given":"Or","family":"Ordentlich","sequence":"first","affiliation":[{"name":"Hebrew University of Jerusalem, Jerusalem, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2109-0979","authenticated-orcid":false,"given":"Yury","family":"Polyanskiy","sequence":"additional","affiliation":[{"name":"MIT, Cambridge, MA, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02165411"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-022-05172-4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS57990.2023.00130"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611977912.134"},{"key":"ref5","article-title":"NestQuant: Nested lattice quantization for matrix products and LLMs","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Savkin"},{"key":"ref6","article-title":"QuIP#: Even better LLM quantization with Hadamard incoherence and lattice codebooks","author":"Tseng","year":"2024","journal-title":"arXiv:2402.04396"},{"key":"ref7","article-title":"QuaRot: Outlier-free 4-bit inference in rotated LLMs","author":"Ashkboos","year":"2024","journal-title":"arXiv:2404.00456"},{"key":"ref8","first-page":"4396","article-title":"QuIP: 2-bit quantization of large language models with guarantees","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Chee"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1983.1056761"},{"key":"ref10","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1017\/9781108966351"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2004.834787"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139045520"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2016.2571719"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1090\/jams\/984"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/237814.237823"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1561\/2200000035"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1017\/S0962492920000021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1137\/S0097539704442684"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/509907.509965"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/997817.997857"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/2493252.2493254"},{"key":"ref24","article-title":"Dynamic network surgery for efficient DNNs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Guo"},{"issue":"187","key":"ref25","first-page":"1","article-title":"Quantized neural networks: Training neural networks with low precision weights and activations","volume":"18","author":"Hubara","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref26","article-title":"Compressing deep convolutional networks using vector quantization","author":"Gong","year":"2014","journal-title":"arXiv:1412.6115"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"ref28","first-page":"30318","article-title":"GPT3. int8 (): 8-bit matrix multiplication for transformers at scale","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Dettmers"},{"key":"ref29","first-page":"27168","article-title":"ZeroQuant: Efficient and affordable post-training quantization for large-scale transformers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yao"},{"key":"ref30","first-page":"38087","article-title":"SmoothQuant: Accurate and efficient post-training quantization for large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xiao"},{"key":"ref31","article-title":"The era of 1-bit LLMs: All large language models are in 1.58 bits","author":"Ma","year":"2024","journal-title":"arXiv:2402.17764"},{"key":"ref32","article-title":"OPTQ: Accurate quantization for generative pre-trained transformers","volume-title":"Proc. 11th Int. Conf. Learn. Represent","author":"Frantar"},{"key":"ref33","article-title":"QTIP: Quantization with trellises and incoherence processing","author":"Tseng","year":"2024","journal-title":"arXiv:2406.11235"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.57"},{"key":"ref35","first-page":"992","article-title":"Multiplying matrices without multiplying","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Blalock"},{"key":"ref36","article-title":"And the bit goes down: Revisiting the quantization of neural networks","author":"Stock","year":"2019","journal-title":"arXiv:1907.05686"},{"key":"ref37","first-page":"482","article-title":"Quantization based fast inner product search","volume-title":"Proc. Artif. Intell. Statist.","author":"Guo"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT63088.2025.11195282"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT57864.2024.10619604"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2009.2032853"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2010.2090225"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2015.2402972"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2014.37"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2019.2963864"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2023.3234502"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3626-0"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2021.3083271"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2005.855591"},{"key":"ref49","article-title":"A proof of the existence of good nested lattices","author":"Krithivasan","year":"2007"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1134\/S0032946007010073"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2014.2332343"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2014.2343226"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2016.2593633"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ITW.2016.7606876"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2017.2721421"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2017.2778158"},{"key":"ref57","article-title":"Bounds on the density of smooth lattice coverings","author":"Ordentlich","year":"2023","journal-title":"arXiv:2311.04644"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/OJCOMS.2024.3452040"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ITW61385.2024.10806974"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/18.508838"},{"key":"ref61","volume-title":"Introducing NVFP4 for Efficient and Accurate Low-Precision Inference","author":"Alvarez","year":"2025"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/18.720542"},{"key":"ref63","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-2016-7","volume-title":"Sphere Packings, Lattices and Groups","author":"Conway","year":"1988"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2023.3291313"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.4007\/annals.2017.185.3.7"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056484"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.4007\/annals.2009.170.1003"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.4007\/annals.2017.185.3.8"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/18.243466"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/18.412695"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/18.771234"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056483"},{"key":"ref73","article-title":"The Voronoi spherical CDF for lattices and linear codes: New bounds for quantization and coding","author":"Ordentlich","year":"2025","journal-title":"arXiv:2506.19791"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1137\/060673096"},{"key":"ref75","volume-title":"Accelerated Dense Random Projections","author":"Liberty","year":"2009"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1142\/S1793536911000787"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2004.834787"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-28650-9_9"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.2307\/3213932"}],"container-title":["IEEE Transactions on Information Theory"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/18\/11400648\/11318974-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/18\/11400648\/11318974.pdf?arnumber=11318974","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T21:16:40Z","timestamp":1771622200000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11318974\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":79,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tit.2025.3649596","relation":{},"ISSN":["0018-9448","1557-9654"],"issn-type":[{"value":"0018-9448","type":"print"},{"value":"1557-9654","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}