{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:02:08Z","timestamp":1775815328989,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["23IAA02114 and 62472241"],"award-info":[{"award-number":["23IAA02114 and 62472241"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"joint project of Infinigence AI & Tsinghua University"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,22]]},"DOI":"10.1145\/3696410.3714765","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:47:11Z","timestamp":1745362031000},"page":"1822-1833","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Division-of-Thoughts: Harnessing Hybrid Language Model Synergy for Efficient On-Device Agents"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-5673-9978","authenticated-orcid":false,"given":"Chenyang","family":"Shao","sequence":"first","affiliation":[{"name":"Department of Electronic Engineering BNRist, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6139-192X","authenticated-orcid":false,"given":"Xinyuan","family":"Hu","sequence":"additional","affiliation":[{"name":"Department of Quantitative Theory &amp; Methods, Emory University, Atlanta, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3505-4419","authenticated-orcid":false,"given":"Yutang","family":"Lin","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5720-4026","authenticated-orcid":false,"given":"Fengli","family":"Xu","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering BNRist, Tsinghua University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Ammar Ahmad Awan, Jyoti Aneja, Ahmed Awadallah, Hany Awadalla, Nguyen Bach, Amit Bahree, Arash Bakhtiari, Harkirat Behl, et al.","author":"Abdin Marah","year":"2024","unstructured":"Marah Abdin, Sam Ade Jacobs, Ammar Ahmad Awan, Jyoti Aneja, Ahmed Awadallah, Hany Awadalla, Nguyen Bach, Amit Bahree, Arash Bakhtiari, Harkirat Behl, et al. 2024. Phi-3 technical report: A highly capable language model locally on your phone. arXiv preprint arXiv:2404.14219 (2024)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29720"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i10.29003"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671965"},{"key":"e_1_3_2_1_5_1","volume-title":"Frugalgpt: How to use large language models while reducing cost and improving performance. arXiv preprint arXiv:2305.05176","author":"Chen Lingjiao","year":"2023","unstructured":"Lingjiao Chen, Matei Zaharia, and James Zou. 2023. Frugalgpt: How to use large language models while reducing cost and improving performance. arXiv preprint arXiv:2305.05176 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Octo-planner: On-device Language Model for Planner-Action Agents. arXiv preprint arXiv:2406.18082","author":"Chen Wei","year":"2024","unstructured":"Wei Chen, Zhiyuan Li, Zhen Guo, and Yikang Shen. 2024a. Octo-planner: On-device Language Model for Planner-Action Agents. arXiv preprint arXiv:2406.18082 (2024)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10796-008-9087-2"},{"key":"e_1_3_2_1_8_1","volume-title":"A survey on in-context learning. arXiv preprint arXiv:2301.00234","author":"Dong Qingxiu","year":"2022","unstructured":"Qingxiu Dong, Lei Li, Damai Dai, Ce Zheng, Zhiyong Wu, Baobao Chang, Xu Sun, Jingjing Xu, and Zhifang Sui. 2022. A survey on in-context learning. arXiv preprint arXiv:2301.00234 (2022)."},{"key":"e_1_3_2_1_9_1","volume-title":"DROP: A reading comprehension benchmark requiring discrete reasoning over paragraphs. arXiv preprint arXiv:1903.00161","author":"Dua Dheeru","year":"2019","unstructured":"Dheeru Dua, Yizhong Wang, Pradeep Dasigi, Gabriel Stanovsky, Sameer Singh, and Matt Gardner. 2019. DROP: A reading comprehension benchmark requiring discrete reasoning over paragraphs. arXiv preprint arXiv:1903.00161 (2019)."},{"key":"e_1_3_2_1_10_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671984"},{"key":"e_1_3_2_1_12_1","unstructured":"Tom Gunter Zirui Wang Chong Wang Ruoming Pang Andy Narayanan Aonan Zhang Bowen Zhang Chen Chen Chung-Cheng Chiu David Qiu et al. 2024. Apple intelligence foundation language models. arXiv preprint arXiv:2407.21075 (2024)."},{"key":"e_1_3_2_1_13_1","volume-title":"Aditya Krishna Menon, and Sanjiv Kumar.","author":"Gupta Neha","year":"2024","unstructured":"Neha Gupta, Harikrishna Narasimhan, Wittawat Jitkrittum, Ankit Singh Rawat, Aditya Krishna Menon, and Sanjiv Kumar. 2024. Language Model Cascades: Token-level uncertainty and beyond. arXiv preprint arXiv:2404.10136 (2024)."},{"key":"e_1_3_2_1_14_1","volume-title":"Measuring mathematical problem solving with the math dataset. arXiv preprint arXiv:2103.03874","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn Song, and Jacob Steinhardt. 2021. Measuring mathematical problem solving with the math dataset. arXiv preprint arXiv:2103.03874 (2021)."},{"key":"e_1_3_2_1_15_1","volume-title":"Scaling sentence embeddings with large language models. arXiv preprint arXiv:2307.16645","author":"Jiang Ting","year":"2023","unstructured":"Ting Jiang, Shaohan Huang, Zhongzhi Luan, Deqing Wang, and Fuzhen Zhuang. 2023. Scaling sentence embeddings with large language models. arXiv preprint arXiv:2307.16645 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Next-generation of virtual personal assistants (microsoft cortana, apple siri, amazon alexa and google home). In 2018 IEEE 8th annual computing and communication workshop and conference (CCWC)","author":"Kepuska Veton","unstructured":"Veton Kepuska and Gamal Bohouta. 2018. Next-generation of virtual personal assistants (microsoft cortana, apple siri, amazon alexa and google home). In 2018 IEEE 8th annual computing and communication workshop and conference (CCWC). IEEE, 99--103."},{"key":"e_1_3_2_1_17_1","volume-title":"Decomposed prompting: A modular approach for solving complex tasks. arXiv preprint arXiv:2210.02406","author":"Khot Tushar","year":"2022","unstructured":"Tushar Khot, Harsh Trivedi, Matthew Finlayson, Yao Fu, Kyle Richardson, Peter Clark, and Ashish Sabharwal. 2022. Decomposed prompting: A modular approach for solving complex tasks. arXiv preprint arXiv:2210.02406 (2022)."},{"key":"e_1_3_2_1_18_1","volume-title":"Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa.","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. Advances in neural information processing systems, Vol. 35 (2022), 22199--22213."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671620"},{"key":"e_1_3_2_1_20_1","volume-title":"International conference on machine learning. PMLR, 2873--2882","author":"Lake Brenden","year":"2018","unstructured":"Brenden Lake and Marco Baroni. 2018. Generalization without systematicity: On the compositional skills of sequence-to-sequence recurrent networks. In International conference on machine learning. PMLR, 2873--2882."},{"key":"e_1_3_2_1_21_1","volume-title":"Limp: Large language model enhanced intent-aware mobility prediction. arXiv preprint arXiv:2408.12832","author":"Li Songwei","year":"2024","unstructured":"Songwei Li, Jie Feng, Jiawei Chi, Xinyuan Hu, Xiaomeng Zhao, and Fengli Xu. 2024a. Limp: Large language model enhanced intent-aware mobility prediction. arXiv preprint arXiv:2408.12832 (2024)."},{"key":"e_1_3_2_1_22_1","unstructured":"Yuanchun Li Hao Wen Weijun Wang Xiangyu Li Yizhen Yuan Guohong Liu Jiacheng Liu Wenxing Xu Xiang Wang Yi Sun et al. 2024b. Personal llm agents: Insights and survey about the capability efficiency and security. arXiv preprint arXiv:2401.05459 (2024)."},{"key":"e_1_3_2_1_23_1","first-page":"87","article-title":"AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration","volume":"6","author":"Lin Ji","year":"2024","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Wei-Ming Chen, Wei-Chen Wang, Guangxuan Xiao, Xingyu Dang, Chuang Gan, and Song Han. 2024. AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration. Proceedings of Machine Learning and Systems, Vol. 6 (2024), 87--100.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_24_1","volume-title":"Small Language Models: Survey, Measurements, and Insights. arXiv preprint arXiv:2409.15790","author":"Lu Zhenyan","year":"2024","unstructured":"Zhenyan Lu, Xiang Li, Dongqi Cai, Rongjie Yi, Fangming Liu, Xiwen Zhang, Nicholas D Lane, and Mengwei Xu. 2024. Small Language Models: Survey, Measurements, and Insights. arXiv preprint arXiv:2409.15790 (2024)."},{"key":"e_1_3_2_1_25_1","volume-title":"CHAMP: A Competition-level Dataset for Fine-Grained Analyses of LLMs' Mathematical Reasoning Capabilities. arXiv preprint arXiv:2401.06961","author":"Mao Yujun","year":"2024","unstructured":"Yujun Mao, Yoon Kim, and Yilun Zhou. 2024. CHAMP: A Competition-level Dataset for Fine-Grained Analyses of LLMs' Mathematical Reasoning Capabilities. arXiv preprint arXiv:2401.06961 (2024)."},{"key":"e_1_3_2_1_26_1","volume-title":"Voice Commerce: Understanding shopping-related voice assistants and their effect on brands.","author":"Mari Alex","year":"2019","unstructured":"Alex Mari. 2019. Voice Commerce: Understanding shopping-related voice assistants and their effect on brands. (2019)."},{"key":"e_1_3_2_1_27_1","unstructured":"Meta. 2024. Introducing Meta Llama 3: The most capable openly available LLM to date. https:\/\/ai.meta.com\/blog\/meta-llama-3\/"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688061"},{"key":"e_1_3_2_1_29_1","unstructured":"OpenAI. 2024. Hello GPT-4o. https:\/\/openai.com\/index\/hello-gpt-4o\/"},{"key":"e_1_3_2_1_30_1","volume-title":"Programming puzzles. arXiv preprint arXiv:2106.05784","author":"Schuster Tal","year":"2021","unstructured":"Tal Schuster, Ashwin Kalyan, Oleksandr Polozov, and Adam Tauman Kalai. 2021. Programming puzzles. arXiv preprint arXiv:2106.05784 (2021)."},{"key":"e_1_3_2_1_31_1","volume-title":"DefInt: A Default-interventionist Framework for Efficient Reasoning with Hybrid Large Language Models. arXiv preprint arXiv:2402.02563","author":"Shang Yu","year":"2024","unstructured":"Yu Shang, Yu Li, Fengli Xu, and Yong Li. 2024a. DefInt: A Default-interventionist Framework for Efficient Reasoning with Hybrid Large Language Models. arXiv preprint arXiv:2402.02563 (2024)."},{"key":"e_1_3_2_1_32_1","volume-title":"Agentsquare: Automatic llm agent search in modular design space. arXiv preprint arXiv:2410.06153","author":"Shang Yu","year":"2024","unstructured":"Yu Shang, Yu Li, Keyu Zhao, Likai Ma, Jiahe Liu, Fengli Xu, and Yong Li. 2024b. Agentsquare: Automatic llm agent search in modular design space. arXiv preprint arXiv:2410.06153 (2024)."},{"key":"e_1_3_2_1_33_1","volume-title":"Beyond imitation: Generating human mobility from context-aware reasoning with large language models. arXiv preprint arXiv:2402.09836","author":"Shao Chenyang","year":"2024","unstructured":"Chenyang Shao, Fengli Xu, Bingbing Fan, Jingtao Ding, Yuan Yuan, Meng Wang, and Yong Li. 2024. Beyond imitation: Generating human mobility from context-aware reasoning with large language models. arXiv preprint arXiv:2402.09836 (2024)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642459"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00280"},{"key":"e_1_3_2_1_36_1","volume-title":"Commonsenseqa: A question answering challenge targeting commonsense knowledge. arXiv preprint arXiv:1811.00937","author":"Talmor Alon","year":"2018","unstructured":"Alon Talmor, Jonathan Herzig, Nicholas Lourie, and Jonathan Berant. 2018. Commonsenseqa: A question answering challenge targeting commonsense knowledge. arXiv preprint arXiv:1811.00937 (2018)."},{"key":"e_1_3_2_1_37_1","unstructured":"Jason Wei Yi Tay Rishi Bommasani Colin Raffel Barret Zoph Sebastian Borgeaud Dani Yogatama Maarten Bosma Denny Zhou Donald Metzler et al. 2022a. Emergent abilities of large language models. arXiv preprint arXiv:2206.07682 (2022)."},{"key":"e_1_3_2_1_38_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022b. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 24824--24837.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_39_1","unstructured":"Fengli Xu Qianyue Hao Zefang Zong Jingwei Wang Yunke Zhang Jingyi Wang Xiaochong Lan Jiahui Gong Tianjian Ouyang Fanjin Meng et al. 2025. Towards Large Reasoning Models: A Survey of Reinforced Reasoning with Large Language Models. arXiv preprint arXiv:2501.09686 (2025)."},{"key":"e_1_3_2_1_40_1","volume-title":"On-device language models: A comprehensive review. arXiv preprint arXiv:2409.00088","author":"Xu Jiajun","year":"2024","unstructured":"Jiajun Xu, Zhiyuan Li, Wei Chen, Qun Wang, Xin Gao, Qi Cai, and Ziyuan Ling. 2024a. On-device language models: A comprehensive review. arXiv preprint arXiv:2409.00088 (2024)."},{"key":"e_1_3_2_1_41_1","unstructured":"Mengwei Xu Wangsong Yin Dongqi Cai Rongjie Yi Daliang Xu Qipeng Wang Bingyang Wu Yihao Zhao Chen Yang Shihe Wang et al. 2024b. A survey of resource-efficient llm and multimodal foundation models. arXiv preprint arXiv:2401.08092 (2024)."},{"key":"e_1_3_2_1_42_1","first-page":"20744","article-title":"Webshop: Towards scalable real-world web interaction with grounded language agents","volume":"35","author":"Yao Shunyu","year":"2022","unstructured":"Shunyu Yao, Howard Chen, John Yang, and Karthik Narasimhan. 2022. Webshop: Towards scalable real-world web interaction with grounded language agents. Advances in Neural Information Processing Systems, Vol. 35 (2022), 20744--20757.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","volume-title":"Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Thomas L Griffiths, Yuan Cao, and Karthik Narasimhan. 2023. Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601 (2023)."},{"key":"e_1_3_2_1_44_1","unstructured":"Denny Zhou Nathanael Sch\u00e4rli Le Hou Jason Wei Nathan Scales Xuezhi Wang Dale Schuurmans Claire Cui Olivier Bousquet Quoc Le et al. 2022. Least-to-most prompting enables complex reasoning in large language models. arXiv preprint arXiv:2205.10625 (2022)."},{"key":"e_1_3_2_1_45_1","volume-title":"Synergizing llm agents and knowledge graph for socioeconomic prediction in lbsn. arXiv preprint arXiv:2411.00028","author":"Zhou Zhilun","year":"2024","unstructured":"Zhilun Zhou, Jingyang Fan, Yu Liu, Fengli Xu, Depeng Jin, and Yong Li. 2024. Synergizing llm agents and knowledge graph for socioeconomic prediction in lbsn. arXiv preprint arXiv:2411.00028 (2024)."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714765","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714765","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:41Z","timestamp":1750295921000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714765"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":45,"alternative-id":["10.1145\/3696410.3714765","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714765","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}