{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T02:32:16Z","timestamp":1765506736168,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFE0204500"],"award-info":[{"award-number":["2024YFE0204500"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["92267104"],"award-info":[{"award-number":["92267104"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3760990","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T00:52:37Z","timestamp":1762563157000},"page":"1748-1757","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Cost-Aware Approach for Collaborating Large Language Models and Small Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7840-8303","authenticated-orcid":false,"given":"Zheng","family":"Li","sequence":"first","affiliation":[{"name":"State Key Laboratory of Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7353-4159","authenticated-orcid":false,"given":"Xuyun","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computing, Macquarie University, Sydney, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0750-9403","authenticated-orcid":false,"given":"Sheng","family":"Lu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4192-8894","authenticated-orcid":false,"given":"Hua","family":"Deng","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5335-5898","authenticated-orcid":false,"given":"Hao","family":"Tian","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4833-2023","authenticated-orcid":false,"given":"Wanchun","family":"Dou","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Novel Software Technology, Nanjing University, Nanjing, Jiangsu, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","article-title":"Shannon entropy in artificial intelligence and its applications based on information theory","volume":"13","author":"Ali Aqib","year":"2023","unstructured":"Aqib Ali, Sania Anam, and Muhammad Munawar Ahmed. 2023. Shannon entropy in artificial intelligence and its applications based on information theory. Journal of Applied and Emerging Sciences, Vol. 13, 1 (2023), 09-17.","journal-title":"Journal of Applied and Emerging Sciences"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679783"},{"key":"e_1_3_2_1_3_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et al. 2023. Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)."},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Machine Learning","author":"Bergner Benjamin","year":"2024","unstructured":"Benjamin Bergner, Andrii Skliar, Amelie Royer, Tijmen Blankevoort, Yuki Asano, and Babak Ehteshami Bejnordi. 2024. Think big, generate quick: Llm-to-slm for fast autoregressive decoding. International Conference on Machine Learning (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Yupeng Chang Xu Wang Jindong Wang Yuan Wu Linyi Yang Kaijie Zhu Hao Chen Xiaoyuan Yi Cunxiang Wang Yidong Wang et al. 2024. A survey on evaluation of large language models. ACM transactions on intelligent systems and technology Vol. 15 3 (2024) 1-45.","DOI":"10.1145\/3641289"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i10.29003"},{"key":"e_1_3_2_1_7_1","first-page":"70115","article-title":"Large language models are visual reasoning coordinators","volume":"36","author":"Chen Liangyu","year":"2023","unstructured":"Liangyu Chen, Bo Li, Sheng Shen, Jingkang Yang, Chunyuan Li, Kurt Keutzer, Trevor Darrell, and Ziwei Liu. 2023. Large language models are visual reasoning coordinators. Advances in Neural Information Processing Systems, Vol. 36 (2023), 70115-70140.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Machine Learning. PMLR, 10421-10430","author":"Fu Yao","year":"2023","unstructured":"Yao Fu, Hao Peng, Litu Ou, Ashish Sabharwal, and Tushar Khot. 2023. Specializing smaller language models towards multi-step reasoning. In International Conference on Machine Learning. PMLR, 10421-10430."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3662006.3662067"},{"key":"e_1_3_2_1_10_1","first-page":"2162","article-title":"Hierarchical and dynamic prompt compression for efficient zero-shot API usage","volume":"2024","author":"Jiang Yichen","year":"2024","unstructured":"Yichen Jiang, Marco Vecchio, Mohit Bansal, and Anders Johannsen. 2024. Hierarchical and dynamic prompt compression for efficient zero-shot API usage. In Findings of the Association for Computational Linguistics: EACL 2024. 2162-2174.","journal-title":"Findings of the Association for Computational Linguistics: EACL"},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Machine Learning. PMLR, 11985-12003","author":"Lang Hunter","year":"2022","unstructured":"Hunter Lang, Monica N Agrawal, Yoon Kim, and David Sontag. 2022. Co-training improves prompt-based learning for large language models. In International Conference on Machine Learning. PMLR, 11985-12003."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3649449"},{"key":"e_1_3_2_1_13_1","volume-title":"Symbolic chain-of-thought distillation: Small models can also'' think'' step-by-step. Findings of the Association for Computational Linguistics","author":"Li Liunian Harold","year":"2023","unstructured":"Liunian Harold Li, Jack Hessel, Youngjae Yu, Xiang Ren, Kai-Wei Chang, and Yejin Choi. 2023. Symbolic chain-of-thought distillation: Small models can also'' think'' step-by-step. Findings of the Association for Computational Linguistics (2023)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.840"},{"key":"e_1_3_2_1_15_1","volume-title":"prompt, and predict: A systematic survey of prompting methods in natural language processing. ACM computing surveys","author":"Liu Pengfei","year":"2023","unstructured":"Pengfei Liu, Weizhe Yuan, Jinlan Fu, Zhengbao Jiang, Hiroaki Hayashi, and Graham Neubig. 2023. Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing. ACM computing surveys, Vol. 55, 9 (2023), 1-35."},{"key":"e_1_3_2_1_16_1","volume-title":"Large language model is not a good few-shot information extractor, but a good reranker for hard samples! Conference on Empirical Methods in Natural Language Processing","author":"Ma Yubo","year":"2023","unstructured":"Yubo Ma, Yixin Cao, YongChing Hong, and Aixin Sun. 2023. Large language model is not a good few-shot information extractor, but a good reranker for hard samples! Conference on Empirical Methods in Natural Language Processing (2023)."},{"key":"e_1_3_2_1_17_1","first-page":"6","article-title":"Introducing meta llama 3: The most capable openly available llm to date","volume":"2","author":"Meta AI","year":"2024","unstructured":"AI Meta. 2024. Introducing meta llama 3: The most capable openly available llm to date. Meta AI, Vol. 2, 5 (2024), 6.","journal-title":"Meta AI"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605943"},{"key":"e_1_3_2_1_19_1","first-page":"94934","article-title":"Fundamental limits of prompt compression: A rate-distortion framework for black-box language models","volume":"37","author":"Nagle Alliot","year":"2025","unstructured":"Alliot Nagle, Adway Girish, Marco Bondaschi, Michael Gastpar, Ashok Vardhan Makkuva, and Hyeji Kim. 2025. Fundamental limits of prompt compression: A rate-distortion framework for black-box language models. Advances in Neural Information Processing Systems, Vol. 37 (2025), 94934-94970.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1018"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.441"},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Machine Learning. PMLR, 26724-26768","author":"Oymak Samet","year":"2023","unstructured":"Samet Oymak, Ankit Singh Rawat, Mahdi Soltanolkotabi, and Christos Thrampoulidis. 2023. On the role of attention in prompt-tuning. In International Conference on Machine Learning. PMLR, 26724-26768."},{"key":"e_1_3_2_1_23_1","first-page":"126544","article-title":"Gorilla: Large language model connected with massive apis","volume":"37","author":"Patil Shishir G","year":"2025","unstructured":"Shishir G Patil, Tianjun Zhang, Xin Wang, and Joseph E Gonzalez. 2025. Gorilla: Large language model connected with massive apis. Advances in Neural Information Processing Systems, Vol. 37 (2025), 126544-126565.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2022.3209479"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681010"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549113"},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Learning Representations","author":"Wang Xuezhi","year":"2023","unstructured":"Xuezhi Wang, Jason Wei, Dale Schuurmans, Quoc Le, Ed Chi, Sharan Narang, Aakanksha Chowdhery, and Denny Zhou. 2023. Self-consistency improves chain of thought reasoning in language models. International Conference on Learning Representations (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al., 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems, Vol. 35 (2022), 24824-24837."},{"key":"e_1_3_2_1_29_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Yao Shunyu","year":"2024","unstructured":"Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Tom Griffiths, Yuan Cao, and Karthik Narasimhan. 2024. Tree of thoughts: Deliberate problem solving with large language models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29920"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i24.34777"},{"key":"e_1_3_2_1_32_1","volume-title":"Cogenesis: A framework collaborating large and small language models for secure context-aware instruction following. Findings of the Association for Computational Linguistics","author":"Zhang Kaiyan","year":"2024","unstructured":"Kaiyan Zhang, Jianyu Wang, Ermo Hua, Biqing Qi, Ning Ding, and Bowen Zhou. 2024b. Cogenesis: A framework collaborating large and small language models for secure context-aware instruction following. Findings of the Association for Computational Linguistics (2024)."},{"key":"e_1_3_2_1_33_1","volume-title":"Chain of Preference Optimization: Improving Chain-of-Thought Reasoning in LLMs. arXiv preprint arXiv:2406.09136","author":"Zhang Xuan","year":"2024","unstructured":"Xuan Zhang, Chao Du, Tianyu Pang, Qian Liu, Wei Gao, and Min Lin. 2024a. Chain of Preference Optimization: Improving Chain-of-Thought Reasoning in LLMs. arXiv preprint arXiv:2406.09136 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Character-level convolutional networks for text classification. Advances in neural information processing systems","author":"Zhang Xiang","year":"2015","unstructured":"Xiang Zhang, Junbo Zhao, and Yann LeCun. 2015. Character-level convolutional networks for text classification. Advances in neural information processing systems, Vol. 28 (2015)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01435"}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3760990","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T02:28:43Z","timestamp":1765506523000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3760990"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":35,"alternative-id":["10.1145\/3746252.3760990","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3760990","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}