{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T10:46:31Z","timestamp":1768992391998,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Liaoning Binhai Laboratory Project","award":["LBLF-2023-01"],"award-info":[{"award-number":["LBLF-2023-01"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62106035, 62206038"],"award-info":[{"award-number":["62106035, 62206038"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100017676","name":"Chunhui Project Foundation of the Education Department of China","doi-asserted-by":"publisher","award":["HZKY20220419"],"award-info":[{"award-number":["HZKY20220419"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100017676","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709287","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:42:22Z","timestamp":1743792142000},"page":"812-823","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["SEPTQ: A Simple and Effective Post-Training Quantization Paradigm for Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6921-2050","authenticated-orcid":false,"given":"Han","family":"Liu","sequence":"first","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5333-2769","authenticated-orcid":false,"given":"Haotian","family":"Gao","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5013-8476","authenticated-orcid":false,"given":"Xiaotong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2790-9527","authenticated-orcid":false,"given":"Changya","family":"Li","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8373-9366","authenticated-orcid":false,"given":"Feng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1717-5785","authenticated-orcid":false,"given":"Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen MSU-BIT University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4999-0303","authenticated-orcid":false,"given":"Fenglong","family":"Ma","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4807-1812","authenticated-orcid":false,"given":"Hong","family":"Yu","sequence":"additional","affiliation":[{"name":"Dalian University of Technology, Dalian, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS). 7948--7956","author":"Banner Ron","year":"2019","unstructured":"Ron Banner, Yury Nahshan, and Daniel Soudry. 2019. Post training 4-bit quantization of convolutional networks for rapid-deployment. In Conference on Neural Information Processing Systems (NeurIPS). 7948--7956."},{"key":"e_1_3_2_2_2_1","volume-title":"Kyle Richardson, Ashish Sabharwal, Carissa Schoenick, Oyvind Tafjord, and Peter Clark.","author":"Bhakthavatsalam Sumithra","year":"2021","unstructured":"Sumithra Bhakthavatsalam, Daniel Khashabi, Tushar Khot, Bhavana Dalvi Mishra, Kyle Richardson, Ashish Sabharwal, Carissa Schoenick, Oyvind Tafjord, and Peter Clark. 2021. Think you have Solved Direct-Answer Question Answering? Try ARC-DA, the Direct-Answer AI2 Reasoning Challenge. CoRR, Vol. abs\/2102.03315 (2021)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"e_1_3_2_2_4_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS). 1877--1901","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language models are few-shot learners. In Conference on Neural Information Processing Systems (NeurIPS). 1877--1901."},{"key":"e_1_3_2_2_5_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS).","author":"Chee Jerry","year":"2023","unstructured":"Jerry Chee, Yaohui Cai, Volodymyr Kuleshov, and Christopher De Sa. 2023. QuIP: 2-Bit Quantization of Large Language Models With Guarantees. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_2_6_1","volume-title":"8-bit Matrix Multiplication for Transformers at Scale. CoRR","author":"Dettmers Tim","year":"2022","unstructured":"Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. 2022. LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale. CoRR, Vol. abs\/2208.07339 (2022)."},{"key":"e_1_3_2_2_7_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In North American","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In North American Chapter of the Association for Computational Linguistics (NAACL). 4171--4186."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10809"},{"key":"e_1_3_2_2_9_1","volume-title":"CBQ: Cross-Block Quantization for Large Language Models. CoRR","author":"Ding Xin","year":"2023","unstructured":"Xin Ding, Xiaoyu Liu, Yun Zhang, Zhijun Tu, Wei Li, Jie Hu, Hanting Chen, Yehui Tang, Zhiwei Xiong, Baoqun Yin, and Yunhe Wang. 2023. CBQ: Cross-Block Quantization for Large Language Models. CoRR, Vol. abs\/2312.07950 (2023)."},{"key":"e_1_3_2_2_10_1","volume-title":"International Conference on Machine Learning (ICML).","author":"Egiazarian Vage","year":"2024","unstructured":"Vage Egiazarian, Andrei Panferov, Denis Kuznedelev, Elias Frantar, Artem Babenko, and Dan Alistarh. 2024. Extreme Compression of Large Language Models via Additive Quantization. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_2_11_1","volume-title":"Post-training Piecewise Linear Quantization for Deep Neural Networks. In European Conference on Computer Vision (ECCV). 69--86","author":"Fang Jun","year":"2020","unstructured":"Jun Fang, Ali Shafiee, Hamzah Abdel-Aziz, David Thorsley, Georgios Georgiadis, and Joseph Hassoun. 2020. Post-training Piecewise Linear Quantization for Deep Neural Networks. In European Conference on Computer Vision (ECCV). 69--86."},{"key":"e_1_3_2_2_12_1","volume-title":"Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning. In Conference on Neural Information Processing Systems (NeurIPS).","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar and Dan Alistarh. 2022. Optimal Brain Compression: A Framework for Accurate Post-Training Quantization and Pruning. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_2_13_1","volume-title":"Conference on Machine Learning and Systems (MLSys).","author":"Frantar Elias","year":"2024","unstructured":"Elias Frantar and Dan Alistarh. 2024. QMoE: Sub-1-Bit Compression of Trillion Parameter Models. In Conference on Machine Learning and Systems (MLSys)."},{"key":"e_1_3_2_2_14_1","volume-title":"GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers. CoRR","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers. CoRR, Vol. abs\/2210.17323 (2022)."},{"key":"e_1_3_2_2_15_1","volume-title":"Second Order Derivatives for Network Pruning: Optimal Brain Surgeon. In Conference on Neural Information Processing Systems (NeurIPS). 164--171","author":"Hassibi Babak","unstructured":"Babak Hassibi and David G. Stork. 1992. Second Order Derivatives for Network Pruning: Optimal Brain Surgeon. In Conference on Neural Information Processing Systems (NeurIPS). 164--171."},{"key":"e_1_3_2_2_16_1","volume-title":"BiLLM: Pushing the Limit of Post-Training Quantization for LLMs. In International Conference on Machine Learning (ICML).","author":"Huang Wei","year":"2024","unstructured":"Wei Huang, Yangdong Liu, Haotong Qin, Ying Li, Shiming Zhang, Xianglong Liu, Michele Magno, and Xiaojuan Qi. 2024. BiLLM: Pushing the Limit of Post-Training Quantization for LLMs. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_2_17_1","volume-title":"Accurate Post Training Quantization With Small Calibration Sets. In International Conference on Machine Learning (ICML). 4466--4475","author":"Hubara Itay","year":"2021","unstructured":"Itay Hubara, Yury Nahshan, Yair Hanani, Ron Banner, and Daniel Soudry. 2021. Accurate Post Training Quantization With Small Calibration Sets. In International Conference on Machine Learning (ICML). 4466--4475."},{"key":"e_1_3_2_2_18_1","volume-title":"Understanding and Improving Knowledge Distillation for Quantization Aware Training of Large Transformer Encoders. In Conference on Empirical Methods in Natural Language Processing (EMNLP). 6713--6725","author":"Kim Minsoo","year":"2022","unstructured":"Minsoo Kim, Sihwa Lee, Sukjin Hong, Du-Seong Chang, and Jungwook Choi. 2022. Understanding and Improving Knowledge Distillation for Quantization Aware Training of Large Transformer Encoders. In Conference on Empirical Methods in Natural Language Processing (EMNLP). 6713--6725."},{"key":"e_1_3_2_2_19_1","volume-title":"The Optimal BERT Surgeon: Scalable and Accurate Second-Order Pruning for Large Language Models. In Conference on Empirical Methods in Natural Language Processing (EMNLP). 4163--4181","author":"Kurtic Eldar","year":"2022","unstructured":"Eldar Kurtic, Daniel Campos, Tuan Nguyen, Elias Frantar, Mark Kurtz, Benjamin Fineran, Michael Goin, and Dan Alistarh. 2022. The Optimal BERT Surgeon: Scalable and Accurate Second-Order Pruning for Large Language Models. In Conference on Empirical Methods in Natural Language Processing (EMNLP). 4163--4181."},{"key":"e_1_3_2_2_20_1","volume-title":"Fran\u00e7ois Yvon, Matthias Gall\u00e9, et al.","author":"Scao Teven Le","year":"2022","unstructured":"Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra Sasha Luccioni, Fran\u00e7ois Yvon, Matthias Gall\u00e9, et al. 2022. BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. CoRR, Vol. abs\/2211.05100 (2022)."},{"key":"e_1_3_2_2_21_1","volume-title":"SPQR: Controlling Q-ensemble Independence with Spiked Random Model for Reinforcement Learning. In Conference on Neural Information Processing Systems (NeurIPS).","author":"Lee Dohyeok","year":"2023","unstructured":"Dohyeok Lee, Seungyub Han, Taehyun Cho, and Jungwoo Lee. 2023a. SPQR: Controlling Q-ensemble Independence with Spiked Random Model for Reinforcement Learning. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_2_22_1","volume-title":"International Conference on Machine Learning (ICML). 18913--18939","author":"Lee Jung Hyun","year":"2023","unstructured":"Jung Hyun Lee, Jeonghoon Kim, Se Jung Kwon, and Dongsoo Lee. 2023b. FlexRound: Learnable Rounding based on Element-wise Division for Post-Training Quantization. In International Conference on Machine Learning (ICML). 18913--18939."},{"key":"e_1_3_2_2_23_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Li Yuhang","year":"2021","unstructured":"Yuhang Li, Ruihao Gong, Xu Tan, Yang Yang, Peng Hu, Qi Zhang, Fengwei Yu, Wei Wang, and Shi Gu. 2021. BRECQ: Pushing the Limit of Post-Training Quantization by Block Reconstruction. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_24_1","volume-title":"AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration. In Conference on Machine Learning and Systems (MLSys).","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. 2024. AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration. In Conference on Machine Learning and Systems (MLSys)."},{"key":"e_1_3_2_2_25_1","volume-title":"Oscillation-free Quantization for Low-bit Vision Transformers. In International Conference on Machine Learning (ICML). 21813--21824","author":"Liu Shih-Yang","year":"2023","unstructured":"Shih-Yang Liu, Zechun Liu, and Kwang-Ting Cheng. 2023. Oscillation-free Quantization for Low-bit Vision Transformers. In International Conference on Machine Learning (ICML). 21813--21824."},{"key":"e_1_3_2_2_26_1","volume-title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR, Vol. abs\/1907.11692 (2019)."},{"key":"e_1_3_2_2_27_1","volume-title":"LLM-QAT: Data-Free Quantization Aware Training for Large Language Models. In Annual Meeting of the Association for Computational Linguistics (ACL). 467--484","author":"Liu Zechun","year":"2024","unstructured":"Zechun Liu, Barlas Oguz, Changsheng Zhao, Ernie Chang, Pierre Stock, Yashar Mehdad, Yangyang Shi, Raghuraman Krishnamoorthi, and Vikas Chandra. 2024. LLM-QAT: Data-Free Quantization Aware Training for Large Language Models. In Annual Meeting of the Association for Computational Linguistics (ACL). 467--484."},{"key":"e_1_3_2_2_28_1","volume-title":"Post-Training Quantization for Vision Transformer. In Conference on Neural Information Processing Systems (NeurIPS). 28092--28103","author":"Liu Zhenhua","year":"2021","unstructured":"Zhenhua Liu, Yunhe Wang, Kai Han, Wei Zhang, Siwei Ma, and Wen Gao. 2021. Post-Training Quantization for Vision Transformer. In Conference on Neural Information Processing Systems (NeurIPS). 28092--28103."},{"key":"e_1_3_2_2_29_1","volume-title":"Robert MacIntyre, Ann Bies, Mark Ferguson, Karen Katz, and Britta Schasberger.","author":"Marcus Mitchell P.","year":"1994","unstructured":"Mitchell P. Marcus, Grace Kim, Mary Ann Marcinkiewicz, Robert MacIntyre, Ann Bies, Mark Ferguson, Karen Katz, and Britta Schasberger. 1994. The Penn Treebank: Annotating Predicate Argument Structure. In Human Language Technology."},{"key":"e_1_3_2_2_30_1","volume-title":"Pointer Sentinel Mixture Models. In International Conference on Learning Representations (ICLR).","author":"Merity Stephen","year":"2017","unstructured":"Stephen Merity, Caiming Xiong, James Bradbury, and Richard Socher. 2017. Pointer Sentinel Mixture Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_31_1","volume-title":"Adaptive Rounding for Post-Training Quantization. In International Conference on Machine Learning (ICML). 7197--7206","author":"Nagel Markus","year":"2020","unstructured":"Markus Nagel, Rana Ali Amjad, Mart van Baalen, Christos Louizos, and Tijmen Blankevoort. 2020. Up or Down? Adaptive Rounding for Post-Training Quantization. In International Conference on Machine Learning (ICML). 7197--7206."},{"key":"e_1_3_2_2_32_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog Vol. 1 (2019)."},{"key":"e_1_3_2_2_33_1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research (JMLR), Vol. 21 (2020), 140:1--140:67.","journal-title":"Journal of Machine Learning Research (JMLR)"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474381"},{"key":"e_1_3_2_2_35_1","volume-title":"OmniQuant: Omnidirectionally Calibrated Quantization for Large Language Models. In International Conference on Learning Representations (ICLR).","author":"Shao Wenqi","year":"2024","unstructured":"Wenqi Shao, Mengzhao Chen, Zhaoyang Zhang, Peng Xu, Lirui Zhao, Zhiqian Li, Kaipeng Zhang, Peng Gao, Yu Qiao, and Ping Luo. 2024. OmniQuant: Omnidirectionally Calibrated Quantization for Large Language Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_36_1","volume-title":"Degree-Quant: Quantization-Aware Training for Graph Neural Networks. In International Conference on Learning Representations (ICLR).","author":"Tailor Shyam Anil","year":"2021","unstructured":"Shyam Anil Tailor, Javier Fern\u00e1ndez-Marqu\u00e9s, and Nicholas Donald Lane. 2021. Degree-Quant: Quantization-Aware Training for Graph Neural Networks. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_37_1","volume-title":"LLaMA: Open and Efficient Foundation Language Models. CoRR","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, Aur\u00e9lien Rodriguez, Armand Joulin, Edouard Grave, and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. CoRR, Vol. abs\/2302.13971 (2023)."},{"key":"e_1_3_2_2_38_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS). 5998--6008","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Conference on Neural Information Processing Systems (NeurIPS). 5998--6008."},{"key":"e_1_3_2_2_39_1","volume-title":"BitNet: Scaling 1-bit Transformers for Large Language Models. CoRR","author":"Wang Hongyu","year":"2023","unstructured":"Hongyu Wang, Shuming Ma, Li Dong, Shaohan Huang, Huaijie Wang, Lingxiao Ma, Fan Yang, Ruiping Wang, Yi Wu, and Furu Wei. 2023. BitNet: Scaling 1-bit Transformers for Large Language Models. CoRR, Vol. abs\/2310.11453 (2023)."},{"key":"e_1_3_2_2_40_1","volume-title":"SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models. In International Conference on Machine Learning (ICML). 38087--38099","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Micka\u00ebl Seznec, Hao Wu, Julien Demouth, and Song Han. 2023. SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models. In International Conference on Machine Learning (ICML). 38087--38099."},{"key":"e_1_3_2_2_41_1","volume-title":"OneBit: Towards Extremely Low-bit Large Language Models. CoRR","author":"Xu Yuzhuang","year":"2024","unstructured":"Yuzhuang Xu, Xu Han, Zonghan Yang, Shuo Wang, Qingfu Zhu, Zhiyuan Liu, Weidong Liu, and Wanxiang Che. 2024. OneBit: Towards Extremely Low-bit Large Language Models. CoRR, Vol. abs\/2402.11295 (2024)."},{"key":"e_1_3_2_2_42_1","volume-title":"ZeroQuant: Efficient and Affordable Post-Training Quantization for Large-Scale Transformers. In Conference on Neural Information Processing Systems (NeurIPS).","author":"Yao Zhewei","year":"2022","unstructured":"Zhewei Yao, Reza Yazdani Aminabadi, Minjia Zhang, Xiaoxia Wu, Conglong Li, and Yuxiong He. 2022. ZeroQuant: Efficient and Affordable Post-Training Quantization for Large-Scale Transformers. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_2_43_1","volume-title":"RPTQ: Reorder-based Post-training Quantization for Large Language Models. CoRR","author":"Yuan Zhihang","year":"2023","unstructured":"Zhihang Yuan, Lin Niu, Jiawei Liu, Wenyu Liu, Xinggang Wang, Yuzhang Shang, Guangyu Sun, Qiang Wu, Jiaxiang Wu, and Bingzhe Wu. 2023. RPTQ: Reorder-based Post-training Quantization for Large Language Models. CoRR, Vol. abs\/2304.01089 (2023)."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1472"},{"key":"e_1_3_2_2_45_1","volume-title":"Todor Mihaylov, Myle Ott, Sam Shleifer, Kurt Shuster, Daniel Simig, Punit Singh Koura, Anjali Sridhar, Tianlu Wang, and Luke Zettlemoyer.","author":"Zhang Susan","year":"2022","unstructured":"Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen, Christopher Dewan, Mona T. Diab, Xian Li, Xi Victoria Lin, Todor Mihaylov, Myle Ott, Sam Shleifer, Kurt Shuster, Daniel Simig, Punit Singh Koura, Anjali Sridhar, Tianlu Wang, and Luke Zettlemoyer. 2022. OPT: Open Pre-trained Transformer Language Models. CoRR, Vol. abs\/2205.01068 (2022)."},{"key":"e_1_3_2_2_46_1","volume-title":"A Survey on Model Compression for Large Language Models. CoRR","author":"Zhu Xunyu","year":"2023","unstructured":"Xunyu Zhu, Jian Li, Yong Liu, Can Ma, and Weiping Wang. 2023. A Survey on Model Compression for Large Language Models. CoRR, Vol. abs\/2308.07633 (2023)."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709287","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709287","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:38:27Z","timestamp":1755358707000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709287"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":46,"alternative-id":["10.1145\/3690624.3709287","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709287","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}