{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T07:00:35Z","timestamp":1775199635912,"version":"3.50.1"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434611","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["OOQ: Outlier-Oriented Quantization for Efficient Large Language Models"],"prefix":"10.1109","author":[{"given":"Haoyu","family":"Wang","sequence":"first","affiliation":[{"name":"AI Institute School of Computer Science Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence,Shanghai,China"}]},{"given":"Bei","family":"Liu","sequence":"additional","affiliation":[{"name":"AI Institute School of Computer Science Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence,Shanghai,China"}]},{"given":"Hang","family":"Shao","sequence":"additional","affiliation":[{"name":"AI Institute School of Computer Science Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence,Shanghai,China"}]},{"given":"Bo","family":"Xiao","sequence":"additional","affiliation":[{"name":"Meituan,Beijing,China"}]},{"given":"Ke","family":"Zeng","sequence":"additional","affiliation":[{"name":"Meituan,Beijing,China"}]},{"given":"Guanglu","family":"Wan","sequence":"additional","affiliation":[{"name":"Meituan,Beijing,China"}]},{"given":"Yanmin","family":"Qian","sequence":"additional","affiliation":[{"name":"AI Institute School of Computer Science Shanghai Jiao Tong University,Auditory Cognition and Computational Acoustics Lab MoE Key Lab of Artificial Intelligence,Shanghai,China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref2","article-title":"Opt: Open pre-trained transformer language models","author":"Zhang","year":"2022","journal-title":"arXiv preprint arXiv:2205.01068"},{"key":"ref3","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint arXiv:2302.13971"},{"key":"ref4","article-title":"Scaling laws for autoregressive generative modeling","author":"Henighan","year":"2020","journal-title":"arXiv preprint arXiv:2010.14701"},{"key":"ref5","first-page":"38087","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","volume-title":"International Conference on Machine Learning","author":"Xiao"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2198"},{"key":"ref7","article-title":"Quip: 2-bit quantization of large language models with guarantees","volume":"36","author":"Chee","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","article-title":"Awq: Activation-aware weight quantization for 11 m compression and acceleration","author":"Lin","year":"2023","journal-title":"arXiv preprint arXiv:2306.00978"},{"key":"ref9","article-title":"Squeezellm: Dense-and-sparse quantization","author":"Kim","year":"2023","journal-title":"arXiv preprint arXiv:2306.07629"},{"key":"ref10","first-page":"9847","article-title":"Towards accurate post-training network quantization via bit-split and stitching","volume-title":"International Conference on Machine Learning.","author":"Wang"},{"key":"ref11","first-page":"4466","article-title":"Accurate post training quantization with small calibration sets","volume-title":"International Conference on Machine Learning.","author":"Hubara"},{"key":"ref12","article-title":"The case for 4-bit precision: k-bit inference scaling laws","volume-title":"International Conference on Machine Learning","author":"Dettmers"},{"key":"ref13","article-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers","author":"Frantar","year":"2022","journal-title":"arXiv preprint arXiv:2210.17323"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0323"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i12.29237"},{"key":"ref16","article-title":"Spqr: A sparse-quantized representation for near-lossless 11 m weight compression","author":"Dettmers","year":"2023","journal-title":"arXiv preprint arXiv:2306.03078"},{"key":"ref17","article-title":"Outlier weighed layerwise sparsity (owl): A missing secret sauce for pruning llms to high sparsity","author":"Yin","year":"2023","journal-title":"arXiv preprint arXiv:2310.05175"},{"key":"ref18","article-title":"Loftq: Lora-fine-tuning-aware quantization for large language models","author":"Li","year":"2023","journal-title":"arXiv preprint arXiv:2310.08659"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0441"},{"key":"ref20","article-title":"Qa-lora: Quantization-aware low-rank adaptation of large language models","author":"Xu","year":"2023","journal-title":"arXiv preprint arXiv:2309.14717"},{"key":"ref21","article-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021","journal-title":"arXiv preprint arXiv:2106.09685"},{"key":"ref22","article-title":"Onebit: Towards extremely low-bit large language models","author":"Xu","year":"2024","journal-title":"arXiv preprint arXiv:2402.11295"},{"key":"ref23","article-title":"Omniquant: Omnidirectionally calibrated quantization for large language models","author":"Shao","year":"2023","journal-title":"arXiv preprint arXiv:2308.13137"},{"key":"ref24","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016","journal-title":"arXiv preprint arXiv:1609.07843"},{"issue":"140","key":"ref25","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"Journal of machine learning research"},{"key":"ref26","article-title":"Quip#: Even better 11 m quantization with hadamard incoherence and lattice codebooks","volume-title":"Forty-first International Conference on Machine Learning","author":"Tseng"},{"key":"ref27","article-title":"decoupleq: Towards 2-bit post-training uniform quantization via decoupling parameters into integer and floating points","author":"Guo","year":"2024","journal-title":"arXiv preprint arXiv:2404.12759"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.26"},{"key":"ref29","article-title":"Llm-mq: Mixed-precision quantization for efficient 11 m deployment","author":"Li"},{"key":"ref30","article-title":"Enabling fast 2-bit 11 m on gpus: Memory alignment, sparse outlier, and asynchronous dequantization","author":"Li","year":"2023","journal-title":"arXiv preprint arXiv:2311.16442"},{"key":"ref31","article-title":"Extreme compression of large language models via additive quantization","volume-title":"Forty-first International Conference on Machine Learning","author":"Egiazarian"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3474381"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1472"},{"key":"ref35","article-title":"Think you have solved question answering? try arc, the ai2 reasoning challenge","author":"Clark","year":"2018","journal-title":"arXiv preprint arXiv:1803.05457"},{"key":"ref36","article-title":"Boolq: Exploring the surprising difficulty of natural yes\/no questions","author":"Clark","year":"2019","journal-title":"arXiv preprint arXiv:1905.10044"},{"key":"ref37","first-page":"10323","article-title":"Sparsegpt: Massive language models can be accurately pruned in one-shot","volume-title":"International Conference on Machine Learning.","author":"Frantar"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434611.pdf?arnumber=11434611","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:57:15Z","timestamp":1775192235000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434611\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434611","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}