{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:06:06Z","timestamp":1750309566699,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,16]]},"DOI":"10.1145\/3677389.3702522","type":"proceedings-article","created":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T16:55:46Z","timestamp":1741884946000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploring Efficient Optimization Techniques in Online Retrieval-Augmented Generation Application"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-2107-5855","authenticated-orcid":false,"given":"Yining","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2653-2992","authenticated-orcid":false,"given":"Yinan","family":"Peng","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0604-8816","authenticated-orcid":false,"given":"Chengying","family":"Tu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0150-715X","authenticated-orcid":false,"given":"Zherui","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5914-8585","authenticated-orcid":false,"given":"Hongfei","family":"Yan","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimedia Information Processing, School of Computer Science, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9704-1575","authenticated-orcid":false,"given":"Chong","family":"Chen","sequence":"additional","affiliation":[{"name":"Bejing Normal University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8075-9584","authenticated-orcid":false,"given":"Hao","family":"Ma","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1216-2871","authenticated-orcid":false,"given":"Jia","family":"Yang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8103-7978","authenticated-orcid":false,"given":"Yan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5403-2874","authenticated-orcid":false,"given":"Rikun","family":"Liao","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Meta AI. 2024. Introducing Meta Llama 3: The most capable openly available LLM to date. https:\/\/ai.meta.com\/blog\/meta-llama-3\/. Accessed: 2024-06-16."},{"key":"e_1_3_2_1_2_1","volume-title":"Self-rag: Learning to retrieve, generate, and critique through self-reflection. arXiv preprint arXiv:2310.11511","author":"Asai Akari","year":"2023","unstructured":"Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, and Hannaneh Hajishirzi. 2023. Self-rag: Learning to retrieve, generate, and critique through self-reflection. arXiv preprint arXiv:2310.11511 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Lin (Eds.)","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 1877--1901. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"e_1_3_2_1_4_1","volume-title":"Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation. arXiv preprint arXiv:2402.03216","author":"Chen Jianlv","year":"2024","unstructured":"Jianlv Chen, Shitao Xiao, Peitian Zhang, Kun Luo, Defu Lian, and Zheng Liu. 2024. Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation. arXiv preprint arXiv:2402.03216 (2024)."},{"key":"e_1_3_2_1_5_1","volume-title":"Dense x retrieval: What retrieval granularity should we use? arXiv preprint arXiv:2312.06648","author":"Chen Tong","year":"2023","unstructured":"Tong Chen, Hongwei Wang, Sihao Chen, Wenhao Yu, Kaixin Ma, Xinran Zhao, Dong Yu, and Hongming Zhang. 2023. Dense x retrieval: What retrieval granularity should we use? arXiv preprint arXiv:2312.06648 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Qlora: Efficient finetuning of quantized llms. Advances in Neural Information Processing Systems 36","author":"Dettmers Tim","year":"2024","unstructured":"Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. 2024. Qlora: Efficient finetuning of quantized llms. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Ragas: Automated evaluation of retrieval augmented generation. arXiv preprint arXiv:2309.15217","author":"Es Shahul","year":"2023","unstructured":"Shahul Es, Jithin James, Luis Espinosa-Anke, and Steven Schockaert. 2023. Ragas: Automated evaluation of retrieval augmented generation. arXiv preprint arXiv:2309.15217 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Precise zero-shot dense retrieval without relevance labels. arXiv preprint arXiv:2212.10496","author":"Gao Luyu","year":"2022","unstructured":"Luyu Gao, Xueguang Ma, Jimmy Lin, and Jamie Callan. 2022. Precise zero-shot dense retrieval without relevance labels. arXiv preprint arXiv:2212.10496 (2022)."},{"key":"e_1_3_2_1_10_1","volume-title":"Bruno Silva, Daniel Holstein, Dawei Li, Jennifer Marsman, Leonardo O Nunes, Mahsa Rouzbahman, Morris Sharp, et al.","author":"Gupta Aman","year":"2024","unstructured":"Aman Gupta, Anup Shirgaonkar, Angels de Luis Balaguer, Bruno Silva, Daniel Holstein, Dawei Li, Jennifer Marsman, Leonardo O Nunes, Mahsa Rouzbahman, Morris Sharp, et al. 2024. RAG vs Fine-tuning: Pipelines, Tradeoffs, and a Case Study on Agriculture. arXiv preprint arXiv:2401.08406 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_12_1","unstructured":"Lei Huang Weijiang Yu Weitao Ma Weihong Zhong Zhangyin Feng Haotian Wang Qianglong Chen Weihua Peng Xiaocheng Feng Bing Qin et al. 2023. A survey on hallucination in large language models: Principles taxonomy challenges and open questions. ACM Transactions on Information Systems (2023)."},{"key":"e_1_3_2_1_13_1","unstructured":"Colin Jarvis John Allard. 2023. A Survey of Techniques for Maximizing LLM Performance. https:\/\/www.youtube.com\/watch?v=ahnGLM-RC1Y."},{"key":"e_1_3_2_1_14_1","volume-title":"The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691","author":"Lester Brian","year":"2021","unstructured":"Brian Lester, Rami Al-Rfou, and Noah Constant. 2021. The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)."},{"key":"e_1_3_2_1_15_1","first-page":"9459","article-title":"2020. Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems 33 (2020), 9459--9474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_16_1","volume-title":"Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190","author":"Li Xiang Lisa","year":"2021","unstructured":"Xiang Lisa Li and Percy Liang. 2021. Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)."},{"key":"e_1_3_2_1_17_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_18_1","volume-title":"Zhengxiao Du, Zhilin Yang, and Jie Tang.","author":"Liu Xiao","year":"2021","unstructured":"Xiao Liu, Kaixuan Ji, Yicheng Fu, Weng Lam Tam, Zhengxiao Du, Zhilin Yang, and Jie Tang. 2021. P-tuning v2: Prompt tuning can be comparable to fine-tuning universally across scales and tasks. arXiv preprint arXiv:2110.07602 (2021)."},{"key":"e_1_3_2_1_19_1","volume-title":"Query rewriting for retrieval-augmented large language models. arXiv preprint arXiv:2305.14283","author":"Ma Xinbei","year":"2023","unstructured":"Xinbei Ma, Yeyun Gong, Pengcheng He, Hai Zhao, and Nan Duan. 2023. Query rewriting for retrieval-augmented large language models. arXiv preprint arXiv:2305.14283 (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs","author":"Malkov Yu A","year":"2018","unstructured":"Yu A Malkov and Dmitry A Yashunin. 2018. Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE transactions on pattern analysis and machine intelligence 42, 4 (2018), 824--836."},{"key":"e_1_3_2_1_21_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_22_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"Attention is all you need. Advances in Neural Information Processing Systems","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_1_24_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022), 24824--24837."},{"key":"e_1_3_2_1_25_1","unstructured":"Aiyuan Yang Bin Xiao Bingning Wang Borong Zhang Ce Bian Chao Yin Chenxu Lv Da Pan Dian Wang Dong Yan et al. 2023. Baichuan 2: Open large-scale language models. arXiv preprint arXiv:2309.10305 (2023)."},{"key":"e_1_3_2_1_27_1","unstructured":"Aohan Zeng Xiao Liu Zhengxiao Du Zihan Wang Hanyu Lai Ming Ding Zhuoyi Yang Yifan Xu Wendi Zheng Xiao Xia et al. 2022. Glm-130b: An open bilingual pre-trained model. arXiv preprint arXiv:2210.02414 (2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"Llamafactory: Unified efficient fine-tuning of 100+ language models. arXiv preprint arXiv:2403.13372","author":"Zheng Yaowei","year":"2024","unstructured":"Yaowei Zheng, Richong Zhang, Junhao Zhang, Yanhan Ye, and Zheyan Luo. 2024. Llamafactory: Unified efficient fine-tuning of 100+ language models. arXiv preprint arXiv:2403.13372 (2024)."}],"event":{"name":"JCDL '24: 24th ACM\/IEEE Joint Conference on Digital Libraries","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","IEEE TCDL"],"location":"Hong Kong China","acronym":"JCDL '24"},"container-title":["Proceedings of the 24th ACM\/IEEE Joint Conference on Digital Libraries"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677389.3702522","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677389.3702522","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:19:07Z","timestamp":1750295947000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677389.3702522"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,16]]},"references-count":27,"alternative-id":["10.1145\/3677389.3702522","10.1145\/3677389"],"URL":"https:\/\/doi.org\/10.1145\/3677389.3702522","relation":{},"subject":[],"published":{"date-parts":[[2024,12,16]]},"assertion":[{"value":"2025-03-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}