{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T18:33:05Z","timestamp":1770748385772,"version":"3.50.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,24]],"date-time":"2023-11-24T00:00:00Z","timestamp":1700784000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,24]]},"DOI":"10.1145\/3653081.3653102","type":"proceedings-article","created":{"date-parts":[[2024,5,4]],"date-time":"2024-05-04T00:13:02Z","timestamp":1714781582000},"page":"120-124","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Retrieval-Augmented Generation with Quantized Large Language Models: A Comparative Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9957-5689","authenticated-orcid":false,"given":"Shanglin","family":"Yang","sequence":"first","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1155-3559","authenticated-orcid":false,"given":"Jialin","family":"Zhu","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2686-3755","authenticated-orcid":false,"given":"Jialin","family":"Wang","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8479-3532","authenticated-orcid":false,"given":"Xiaohan","family":"Xu","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4262-8575","authenticated-orcid":false,"given":"Zihang","family":"Shao","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7236-798X","authenticated-orcid":false,"given":"Liwei","family":"Yao","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2806-8747","authenticated-orcid":false,"given":"Benchang","family":"Zheng","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3798-2518","authenticated-orcid":false,"given":"Hu","family":"Huang","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Laboratory, China Academy of Launch Vehicle Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2024,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong Yifan Du Chen Yang Yushuo Chen Zhipeng Chen Jinhao Jiang Ruiyang Ren Yifan Li Xinyu Tang Zikang Liu Peiyu Liu Jian-Yun Nie and Ji-Rong Wen. 2023. A Survey of Large Language Models. https:\/\/doi.org\/10.48550\/arXiv.2303.18223","DOI":"10.48550\/arXiv.2303.18223"},{"key":"e_1_3_2_1_2_1","volume-title":"Retrieved","author":"Bubeck S\u00e9bastien","year":"2023","unstructured":"S\u00e9bastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, and Yi Zhang. 2023. Sparks of Artificial General Intelligence: Early experiments with GPT-4. Retrieved November 13, 2023 from http:\/\/arxiv.org\/abs\/2303.12712"},{"key":"e_1_3_2_1_3_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems 33, (2020)","author":"Brown Tom","year":"1877","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D. Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, and Amanda Askell. 2020. Language models are few-shot learners. Advances in neural information processing systems 33, (2020), 1877\u20131901."},{"key":"e_1_3_2_1_4_1","volume-title":"ChatGPT: A comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet of Things and Cyber-Physical Systems","author":"Ray Partha Pratim","year":"2023","unstructured":"Partha Pratim Ray. 2023. ChatGPT: A comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet of Things and Cyber-Physical Systems (2023). Retrieved December 6, 2023 from https:\/\/www.sciencedirect.com\/science\/article\/pii\/S266734522300024X"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10439-023-03172-7"},{"key":"e_1_3_2_1_6_1","volume-title":"Finance Research Letters 53","author":"Dowling Michael","year":"2023","unstructured":"Michael Dowling and Brian Lucey. 2023. ChatGPT for (finance) research: The Bananarama conjecture. Finance Research Letters 53, (2023), 103662."},{"key":"e_1_3_2_1_7_1","unstructured":"Jiaxi Cui Zongjian Li Yang Yan Bohua Chen and Li Yuan. 2023. ChatLaw: Open-Source Legal Large Language Model with Integrated External Knowledge Bases."},{"key":"e_1_3_2_1_8_1","volume-title":"Advances in Neural Information Processing Systems 33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, and Tim Rockt\u00e4schel. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems 33, (2020), 9459\u20139474."},{"key":"e_1_3_2_1_9_1","volume-title":"8-bit Matrix Multiplication for Transformers at Scale. Retrieved","author":"Dettmers Tim","year":"2023","unstructured":"Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. 2022. LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale. Retrieved December 6, 2023 from http:\/\/arxiv.org\/abs\/2208.07339"},{"key":"e_1_3_2_1_10_1","volume-title":"Efficient LLM Inference on CPUs. Retrieved","author":"Shen Haihao","year":"2023","unstructured":"Haihao Shen, Hanwen Chang, Bo Dong, Yu Luo, and Hengyu Meng. 2023. Efficient LLM Inference on CPUs. Retrieved December 6, 2023 from http:\/\/arxiv.org\/abs\/2311.00502"},{"key":"e_1_3_2_1_11_1","volume-title":"Advances in Neural Information Processing Systems 35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, and Alex Ray. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems 35, (2022), 27730\u201327744."},{"key":"e_1_3_2_1_12_1","volume-title":"Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Maarten Bosma, Vincent Y. Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le. 2022. Finetuned Language Models Are Zero-Shot Learners. Retrieved December 6, 2023 from http:\/\/arxiv.org\/abs\/2109.01652"},{"key":"e_1_3_2_1_13_1","unstructured":"Amelia Glaese Nat McAleese Maja Tr\u0119bacz John Aslanides Vlad Firoiu Timo Ewalds Maribeth Rauh Laura Weidinger Martin Chadwick Phoebe Thacker Lucy Campbell-Gillingham Jonathan Uesato Po-Sen Huang Ramona Comanescu Fan Yang Abigail See Sumanth Dathathri Rory Greig Charlie Chen Doug Fritz Jaume Sanchez Elias Richard Green So\u0148a Mokr\u00e1 Nicholas Fernando Boxi Wu Rachel Foley Susannah Young Iason Gabriel William Isaac John Mellor Demis Hassabis Koray Kavukcuoglu Lisa Anne Hendricks and Geoffrey Irving. 2022. Improving alignment of dialogue agents via targeted human judgements. Retrieved December 6 2023 from http:\/\/arxiv.org\/abs\/2209.14375"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"e_1_3_2_1_15_1","volume-title":"Glm-130b: An open bilingual pre-trained model. arXiv preprint arXiv:2210.02414","author":"Zeng Aohan","year":"2022","unstructured":"Aohan Zeng, Xiao Liu, Zhengxiao Du, Zihan Wang, Hanyu Lai, Ming Ding, Zhuoyi Yang, Yifan Xu, Wendi Zheng, Xiao Xia, and others. 2022. Glm-130b: An open bilingual pre-trained model. arXiv preprint arXiv:2210.02414 (2022)."},{"key":"e_1_3_2_1_16_1","volume-title":"Baichuan 2: Open Large-scale Language Models. arXiv preprint arXiv:2309.10305","year":"2023","unstructured":"Baichuan. 2023. Baichuan 2: Open Large-scale Language Models. arXiv preprint arXiv:2309.10305 (2023). Retrieved from https:\/\/arxiv.org\/abs\/2309.10305"},{"key":"e_1_3_2_1_17_1","unstructured":"InternLM Team. 2023. InternLM: A Multilingual Language Model with Progressively Enhanced Capabilities. Retrieved from https:\/\/github.com\/InternLM\/InternLM"},{"key":"e_1_3_2_1_18_1","volume-title":"Advances in Neural Information Processing Systems","author":"Huang Yuzhen","year":"2023","unstructured":"Yuzhen Huang, Yuzhuo Bai, Zhihao Zhu, Junlei Zhang, Jinghan Zhang, Tangjun Su, Junteng Liu, Chuancheng Lv, Yikai Zhang, Jiayi Lei, Yao Fu, Maosong Sun, and Junxian He. 2023. C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models. In Advances in Neural Information Processing Systems, 2023."},{"key":"e_1_3_2_1_19_1","volume-title":"Distilling the Knowledge in a Neural Network. Retrieved","author":"Hinton Geoffrey","year":"2023","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the Knowledge in a Neural Network. Retrieved December 6, 2023 from http:\/\/arxiv.org\/abs\/1503.02531"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2309.00267"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics, 2002","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics, 2002. 311\u2013318. . Retrieved December 6, 2023 from https:\/\/aclanthology.org\/P02-1040.pdf"},{"key":"e_1_3_2_1_22_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out, 2004. 74\u201381. . Retrieved December 6, 2023 from https:\/\/aclanthology.org\/W04-1013.pdf"},{"key":"e_1_3_2_1_23_1","unstructured":"pypdfium2-team. 2023. pypdfium2. Retrieved from https:\/\/github.com\/pypdfium2-team\/pypdfium2"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_25_1","unstructured":"Shitao Xiao Zheng Liu Peitian Zhang and Niklas Muennighoff. 2023. C-Pack: Packaged Resources To Advance General Chinese Embedding."},{"key":"e_1_3_2_1_26_1","unstructured":"li-plus. 2023. ChatGLM.cpp. Retrieved from https:\/\/github.com\/li-plus\/chatglm.cpp"}],"event":{"name":"IoTAAI 2023: 2023 5th International Conference on Internet of Things, Automation and Artificial Intelligence","location":"Nanchang China","acronym":"IoTAAI 2023"},"container-title":["Proceedings of the 2023 5th International Conference on Internet of Things, Automation and Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3653081.3653102","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3653081.3653102","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T10:55:29Z","timestamp":1755860129000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3653081.3653102"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,24]]},"references-count":26,"alternative-id":["10.1145\/3653081.3653102","10.1145\/3653081"],"URL":"https:\/\/doi.org\/10.1145\/3653081.3653102","relation":{},"subject":[],"published":{"date-parts":[[2023,11,24]]},"assertion":[{"value":"2024-05-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}