{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T01:25:52Z","timestamp":1767835552698,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["6250076060"],"award-info":[{"award-number":["6250076060"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,21]]},"DOI":"10.1145\/3772429.3772445","type":"proceedings-article","created":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T13:59:08Z","timestamp":1766498348000},"page":"122-129","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Beyond GPT-5: Making LLMs Cheaper and Better via Performance-Efficiency Optimized Routing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0423-5746","authenticated-orcid":false,"given":"Yiqun","family":"Zhang","sequence":"first","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}],"role":[{"role":"author","vocab":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0090-4870","authenticated-orcid":false,"given":"Hao","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}],"role":[{"role":"author","vocab":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4363-0549","authenticated-orcid":false,"given":"Jianhao","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}],"role":[{"role":"author","vocab":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9249-7498","authenticated-orcid":false,"given":"Hangfan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}],"role":[{"role":"author","vocab":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8486-7562","authenticated-orcid":false,"given":"Peng","family":"Ye","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}],"role":[{"role":"author","vocab":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8968-3386","authenticated-orcid":false,"given":"Lei","family":"Bai","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}],"role":[{"role":"author","vocab":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1908-1344","authenticated-orcid":false,"given":"Shuyue","family":"Hu","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, Shanghai, China"}],"role":[{"role":"author","vocab":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,23]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Mohammad\u00a0Ali Alomrani Yingxue Zhang Derek Li Qianyi Sun Soumyasundar Pal Zhanguang Zhang Yaochen Hu Rohan\u00a0Deepak Ajwani Antonios Valkanas Raika Karimi et\u00a0al. 2025. Reasoning on a Budget: A Survey of Adaptive and Controllable Test-Time Compute in LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.02076 (2025)."},{"key":"e_1_3_3_2_3_2","unstructured":"Rahul\u00a0K Arora Jason Wei Rebecca\u00a0Soskin Hicks Preston Bowman Joaquin Qui\u00f1onero-Candela Foivos Tsimpourlas Michael Sharman Meghan Shah Andrea Vallone Alex Beutel et\u00a0al. 2025. Healthbench: Evaluating large language models towards improved human health. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.08775 (2025)."},{"key":"e_1_3_3_2_4_2","unstructured":"Victor Barres Honghua Dong Soham Ray Xujie Si and Karthik Narasimhan. 2025. \u03c42-Bench: Evaluating Conversational Agents in a Dual-Control Environment. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2506.07982 (2025)."},{"key":"e_1_3_3_2_5_2","unstructured":"Lingjiao Chen Matei Zaharia and James Zou. 2023. FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance. Transactions on Machine Learning Research (2023)."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2120"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Shuhao Chen Weisen Jiang Baijiong Lin James Kwok and Yu Zhang. 2024. Routerdc: Query-based router by dual contrastive learning for assembling large language models. Advances in Neural Information Processing Systems 37 (2024) 66305\u201366328.","DOI":"10.52202\/079017-2120"},{"key":"e_1_3_3_2_8_2","unstructured":"Francois Chollet Mike Knoop Gregory Kamradt and Bryan Landers. 2024. Arc prize 2024: Technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.04604 (2024)."},{"key":"e_1_3_3_2_9_2","unstructured":"Claude. 2025. System Card Addendum: Claude Opus 4.1. www.anthropic.com\/news\/claude-opus-4-1 (2025)."},{"key":"e_1_3_3_2_10_2","unstructured":"Claude. 2025. System Card: Claude Opus 4 & Claude Sonnet 4. www.anthropic.com\/claude\/sonnet (2025)."},{"key":"e_1_3_3_2_11_2","unstructured":"Gheorghe Comanici Eric Bieber Mike Schaekermann Ice Pasupat Noveen Sachdeva Inderjit Dhillon Marcel Blistein Ori Ram Dan Zhang Evan Rosen et\u00a0al. 2025. Gemini 2.5: Pushing the frontier with advanced reasoning multimodality long context and next generation agentic capabilities. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.06261 (2025)."},{"key":"e_1_3_3_2_12_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Feng Tao","year":"2025","unstructured":"Tao Feng, Yanzhen Shen, and Jiaxuan You. 2025. GraphRouter: A Graph-based Router for LLM Selections. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=eU39PDsZtT"},{"key":"e_1_3_3_2_13_2","unstructured":"Taicheng Guo Xiuying Chen Yaqi Wang Ruidi Chang Shichao Pei Nitesh\u00a0V Chawla Olaf Wiest and Xiangliang Zhang. 2024. Large language model based multi-agents: A survey of progress and challenges. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.01680 (2024)."},{"key":"e_1_3_3_2_14_2","unstructured":"Tingxu Han Zhenting Wang Chunrong Fang Shiyu Zhao Shiqing Ma and Zhenyu Chen. 2025. Token-Budget-Aware LLM Reasoning. arxiv:https:\/\/arXiv.org\/abs\/2412.18547\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2412.18547"},{"key":"e_1_3_3_2_15_2","volume-title":"Agentic Markets Workshop at ICML 2024","author":"Hu Qitian\u00a0Jason","year":"2024","unstructured":"Qitian\u00a0Jason Hu, Jacob Bieker, Xiuyu Li, Nan Jiang, Benjamin Keigwin, Gaurav Ranganath, Kurt Keutzer, and Shriyash\u00a0Kaustubh Upadhyay. 2024. RouterBench: A Benchmark for Multi-LLM Routing System. In Agentic Markets Workshop at ICML 2024."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Zhongzhan Huang Guoming Ling Vincent\u00a0S Liang Yupei Lin Yandong Chen Shanshan Zhong Hefeng Wu and Liang Lin. 2025. RouterEval: A Comprehensive Benchmark for Routing LLMs to Explore Model-level Scaling Up in LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.10657 (2025).","DOI":"10.18653\/v1\/2025.findings-emnlp.208"},{"key":"e_1_3_3_2_17_2","unstructured":"Naman Jain King Han Alex Gu Wen-Ding Li Fanjia Yan Tianjun Zhang Sida Wang Armando Solar-Lezama Koushik Sen and Ion Stoica. 2024. Livecodebench: Holistic and contamination free evaluation of large language models for code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.07974 (2024)."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.792"},{"key":"e_1_3_3_2_19_2","unstructured":"Lingjie Jiang Xun Wu Shaohan Huang Qingxiu Dong Zewen Chi Li Dong Xingxing Zhang Tengchao Lv Lei Cui and Furu Wei. 2025. Think only when you need with large hybrid-reasoning models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.14631 (2025)."},{"key":"e_1_3_3_2_20_2","unstructured":"Wittawat Jitkrittum Harikrishna Narasimhan Ankit\u00a0Singh Rawat Jeevesh Juneja Zifeng Wang Chen-Yu Lee Pradeep Shenoy Rina Panigrahy Aditya\u00a0Krishna Menon and Sanjiv Kumar. 2025. Universal model routing for efficient llm inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.08773 (2025)."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i23.34608"},{"key":"e_1_3_3_2_22_2","unstructured":"Jinliang Lu Ziliang Pang Min Xiao Yaochen Zhu Rui Xia and Jiajun Zhang. 2024. Merge ensemble and cooperate! a survey on collaborative strategies in the era of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.06089 (2024)."},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.109"},{"key":"e_1_3_3_2_24_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Ong Isaac","year":"2024","unstructured":"Isaac Ong, Amjad Almahairi, Vincent Wu, Wei-Lin Chiang, Tianhao Wu, Joseph\u00a0E Gonzalez, M\u00a0Waleed Kadous, and Ion Stoica. 2024. RouteLLM: Learning to Route LLMs from Preference Data. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_3_2_25_2","unstructured":"OpenAI. 2025. GPT-5 System Card. openai.com\/index\/gpt-5-system-card (2025)."},{"key":"e_1_3_3_2_26_2","unstructured":"Long Phan Alice Gatti Ziwen Han Nathaniel Li Josephina Hu Hugh Zhang Chen Bo\u00a0Calvin Zhang Mohamed Shaaban John Ling Sean Shi et\u00a0al. 2025. Humanity\u2019s last exam. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.14249 (2025)."},{"key":"e_1_3_3_2_27_2","volume-title":"First Conference on Language Modeling","author":"Rein David","year":"2024","unstructured":"David Rein, Betty\u00a0Li Hou, Asa\u00a0Cooper Stickland, Jackson Petty, Richard\u00a0Yuanzhe Pang, Julien Dirani, Julian Michael, and Samuel\u00a0R Bowman. 2024. Gpqa: A graduate-level google-proof q&a benchmark. In First Conference on Language Modeling."},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.701"},{"key":"e_1_3_3_2_29_2","volume-title":"First Conference on Language Modeling","author":"Shnitzer Tal","year":"2023","unstructured":"Tal Shnitzer, Anthony Ou, M\u00edrian Silva, Kate Soule, Yuekai Sun, Justin Solomon, Neil Thompson, and Mikhail Yurochkin. 2023. Large Language Model Routing with Benchmark Datasets. In First Conference on Language Modeling."},{"key":"e_1_3_3_2_30_2","unstructured":"Vighnesh Subramaniam Yilun Du Joshua\u00a0B Tenenbaum Antonio Torralba Shuang Li and Igor Mordatch. 2025. Multiagent finetuning: Self improvement with diverse reasoning chains. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.05707 (2025)."},{"key":"e_1_3_3_2_31_2","unstructured":"Ziyu Wan Yunxiang Li Yan Song Hanjing Wang Linyi Yang Mark Schmidt Jun Wang Weinan Zhang Shuyue Hu and Ying Wen. 2025. Rema: Learning to meta-think for llms with multi-agent reinforcement learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.09501 (2025)."},{"key":"e_1_3_3_2_32_2","unstructured":"Jason Wei Nguyen Karina Hyung\u00a0Won Chung Yunxin\u00a0Joy Jiao Spencer Papay Amelia Glaese John Schulman and William Fedus. 2024. Measuring short-form factuality in large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.04368 (2024)."},{"key":"e_1_3_3_2_33_2","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv et\u00a0al. 2025. Qwen3 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.09388 (2025)."},{"key":"e_1_3_3_2_34_2","unstructured":"Hangfan Zhang Zhiyao Cui Xinrun Wang Qiaosheng Zhang Zhen Wang Dinghao Wu and Shuyue Hu. 2025. If Multi-Agent Debate is the Answer What is the Question? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.08788 (2025)."},{"key":"e_1_3_3_2_35_2","unstructured":"Yiqun Zhang Hao Li Chenxu Wang Linyao Chen Qiaosheng Zhang Peng Ye Shi Feng Daling Wang Zhen Wang Xinrun Wang et\u00a0al. 2025. The Avengers: A Simple Recipe for Uniting Smaller Language Models to Challenge Proprietary Giants. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.19797 (2025)."},{"key":"e_1_3_3_2_36_2","unstructured":"Yanzhao Zhang Mingxin Li Dingkun Long Xin Zhang Huan Lin Baosong Yang Pengjun Xie An Yang Dayiheng Liu Junyang Lin Fei Huang and Jingren Zhou. 2025. Qwen3 Embedding: Advancing Text Embedding and Reranking Through Foundation Models. arxiv:https:\/\/arXiv.org\/abs\/2506.05176\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2506.05176"},{"key":"e_1_3_3_2_37_2","unstructured":"Yi-Kai Zhang De-Chuan Zhan and Han-Jia Ye. 2025. Capability Instruction Tuning: A New Paradigm for Dynamic LLM Routing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.17282 (2025)."},{"key":"e_1_3_3_2_38_2","unstructured":"Shenghe Zheng Hongzhi Wang Chenyu Huang Xiaohui Wang Tao Chen Jiayuan Fan Shuyue Hu and Peng Ye. 2025. Decouple and Orthogonalize: A Data-Free Framework for LoRA Merging. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.15875 (2025)."},{"key":"e_1_3_3_2_39_2","unstructured":"Richard Zhuang Tianhao Wu Zhaojin Wen Andrew Li Jiantao Jiao and Kannan Ramchandran. 2024. EmbedLLM: Learning Compact Representations of Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.02223 (2024)."},{"key":"e_1_3_3_2_40_2","unstructured":"Richard Zhuang Tianhao Wu Zhaojin Wen Andrew Li Jiantao Jiao and Kannan Ramchandran. 2024. EmbedLLM: Learning Compact Representations of Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2410.02223\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2410.02223"}],"event":{"name":"DAI '25: The Seventh International Conference on Distributed Artificial Intelligence","location":"London United Kingdom","acronym":"DAI '25"},"container-title":["Proceedings of the 2025 The Seventh International Conference on Distributed Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772429.3772445","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T19:42:28Z","timestamp":1767814948000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772429.3772445"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,21]]},"references-count":39,"alternative-id":["10.1145\/3772429.3772445","10.1145\/3772429"],"URL":"https:\/\/doi.org\/10.1145\/3772429.3772445","relation":{},"subject":[],"published":{"date-parts":[[2025,11,21]]},"assertion":[{"value":"2025-12-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}