{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:37:38Z","timestamp":1774539458094,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"the Tencent WeChat Rhino-Bird Focused Research Program","award":["JRWXG2021411"],"award-info":[{"award-number":["JRWXG2021411"]}]},{"name":"the Key Scientific and Technological Innovation Program of Shandong Province","award":["2019JZZY010129"],"award-info":[{"award-number":["2019JZZY010129"]}]},{"name":"Natural Science Foundation of China","award":["62272274, 61972234, 62072279, 62102234, 62202271;"],"award-info":[{"award-number":["62272274, 61972234, 62072279, 62102234, 62202271;"]}]},{"name":"Shandong University multidisciplinary research and innovation team of young scholars","award":["No. 2020QNQT017"],"award-info":[{"award-number":["No. 2020QNQT017"]}]},{"name":"the Fundamental Research Funds of Shandong University"},{"name":"the Natural Science Foundation of Shandong Province","award":["ZR2022QF004"],"award-info":[{"award-number":["ZR2022QF004"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,22]]},"DOI":"10.1145\/3696410.3714825","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:47:11Z","timestamp":1745362031000},"page":"2222-2237","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Tool Learning in the Wild: Empowering Language Models as Automatic Tool Agents"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9658-4906","authenticated-orcid":false,"given":"Zhengliang","family":"Shi","sequence":"first","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1301-3700","authenticated-orcid":false,"given":"Shen","family":"Gao","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6547-1984","authenticated-orcid":false,"given":"Lingyong","family":"Yan","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6665-6406","authenticated-orcid":false,"given":"Yue","family":"Feng","sequence":"additional","affiliation":[{"name":"University of Birmingham, Birmingham, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9351-4160","authenticated-orcid":false,"given":"Xiuyi","family":"Chen","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4592-4074","authenticated-orcid":false,"given":"Zhumin","family":"Chen","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0684-6205","authenticated-orcid":false,"given":"Dawei","family":"Yin","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9609-9505","authenticated-orcid":false,"given":"Suzan","family":"Verberne","sequence":"additional","affiliation":[{"name":"Leiden University, Leiden, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9076-6565","authenticated-orcid":false,"given":"Zhaochun","family":"Ren","sequence":"additional","affiliation":[{"name":"Leiden University, Leiden, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Saaket Agashe Yue Fan and Xin Eric Wang. 2023. Evaluating multi-agent coordination abilities in large language models. In arXiv preprint arXiv:2310.03903."},{"key":"e_1_3_2_1_2_1","volume-title":"Chem- Crow: Augmenting large-language models with chemistry tools. arXiv preprint arXiv:2304.05376","author":"Bran Andres M","year":"2023","unstructured":"Andres M Bran, Sam Cox, Andrew D White, and Philippe Schwaller. 2023. Chem- Crow: Augmenting large-language models with chemistry tools. arXiv preprint arXiv:2304.05376 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. arXiv preprint arXiv:2211.12588","author":"Chen Wenhu","year":"2022","unstructured":"Wenhu Chen, Xueguang Ma, Xinyi Wang, and William W Cohen. 2022. Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. arXiv preprint arXiv:2211.12588 (2022)."},{"key":"e_1_3_2_1_4_1","volume-title":"Strong Model Collapse. arXiv preprint arXiv:2410.04840","author":"Dohmatob Elvis","year":"2024","unstructured":"Elvis Dohmatob, Yunzhen Feng, and Julia Kempe. 2024. Strong Model Collapse. arXiv preprint arXiv:2410.04840 (2024)."},{"key":"e_1_3_2_1_5_1","volume-title":"Reformatted Alignment","author":"Fan Run-Ze","unstructured":"Run-Ze Fan, Xuefeng Li, Haoyang Zou, Junlong Li, Shwai He, Ethan Chern, Jiewen Hu, and Pengfei Liu. 2024. Reformatted Alignment. In Association for Computational Linguistics: EMNLP."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of Machine Learning Research: PMLR.","author":"Gao Luyu","year":"2023","unstructured":"Luyu Gao, Aman Madaan, Shuyan Zhou, Uri Alon, Pengfei Liu, Yiming Yang, Jamie Callan, and Graham Neubig. 2023. PAL: Program-aided Language Models. In Proceedings of Machine Learning Research: PMLR."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29759"},{"key":"e_1_3_2_1_8_1","volume-title":"StableToolBench: Towards Stable Large- Scale Benchmarking on Tool Learning of Large Language Models. arXiv preprint arXiv:2403.07714","author":"Guo Zhicheng","year":"2024","unstructured":"Zhicheng Guo, Sijie Cheng, HaoWang, Shihao Liang, Yujia Qin, Peng Li, Zhiyuan Liu, Maosong Sun, and Yang Liu. 2024. StableToolBench: Towards Stable Large- Scale Benchmarking on Tool Learning of Large Language Models. arXiv preprint arXiv:2403.07714 (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"ToolkenGPT: Augmenting Frozen Language Models with Massive Tools via Tool Embeddings. arXiv","author":"Hao Shibo","year":"2023","unstructured":"Shibo Hao, Tianyang Liu, Zhen Wang, and Zhiting Hu. 2023. ToolkenGPT: Augmenting Frozen Language Models with Massive Tools via Tool Embeddings. arXiv (2023)."},{"key":"e_1_3_2_1_10_1","volume-title":"Unveiling theory of mind in large language models: A parallel to single neurons in the human brain. arXiv preprint arXiv:2309.01660","author":"Jamali Mohsen","year":"2023","unstructured":"Mohsen Jamali, Ziv M Williams, and Jing Cai. 2023. Unveiling theory of mind in large language models: A parallel to single neurons in the human brain. arXiv preprint arXiv:2309.01660 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"Genegpt: Augmenting large language models with domain tools for improved access to biomedical information. Bioinformatics","author":"Jin Qiao","year":"2024","unstructured":"Qiao Jin, Yifan Yang, Qingyu Chen, and Zhiyong Lu. 2024. Genegpt: Augmenting large language models with domain tools for improved access to biomedical information. Bioinformatics (2024)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_13_1","volume-title":"Dspy: Compiling declarative language model calls into self-improving pipelines. arXiv preprint arXiv:2310.03714","author":"Khattab Omar","year":"2023","unstructured":"Omar Khattab, Arnav Singhvi, Paridhi Maheshwari, Zhiyuan Zhang, Keshav Santhanam, Sri Vardhamanan, Saiful Haq, Ashutosh Sharma, Thomas T Joshi, Hanna Moazam, et al. 2023. Dspy: Compiling declarative language model calls into self-improving pipelines. arXiv preprint arXiv:2310.03714 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"API-Bank: A Comprehensive Benchmark for Tool-Augmented LLMs","author":"Li Minghao","unstructured":"Minghao Li, Yingxiu Zhao, Bowen Yu, Feifan Song, Hangyu Li, Haiyang Yu, Zhoujun Li, Fei Huang, and Yongbin Li. 2023. API-Bank: A Comprehensive Benchmark for Tool-Augmented LLMs. In Association for Computational Linguistics: EMNLP."},{"key":"e_1_3_2_1_15_1","volume-title":"Best Practices and Lessons Learned on Synthetic Data. In First Conference on Language Modeling.","author":"Liu Ruibo","year":"2024","unstructured":"Ruibo Liu, Jerry Wei, Fangyu Liu, Chenglei Si, Yanzhe Zhang, Jinmeng Rao, Steven Zheng, Daiyi Peng, Diyi Yang, Denny Zhou, et al. 2024. Best Practices and Lessons Learned on Synthetic Data. In First Conference on Language Modeling."},{"key":"e_1_3_2_1_16_1","unstructured":"Weiwen Liu Xu Huang Xingshan Zeng Xinlong Hao Shuai Yu Dexun Li Shuai Wang Weinan Gan Zhengying Liu Yuanqing Yu et al. 2024. ToolACE: Winning the Points of LLM Function Calling. arXiv preprint arXiv:2409.00920 (2024)."},{"key":"e_1_3_2_1_17_1","volume-title":"Apigen: Automated pipeline for generating verifiable and diverse function-calling datasets. arXiv preprint arXiv:2406.18518","author":"Liu Zuxin","year":"2024","unstructured":"Zuxin Liu, Thai Hoang, Jianguo Zhang, Ming Zhu, Tian Lan, Shirley Kokane, Juntao Tan, Weiran Yao, Zhiwei Liu, Yihao Feng, et al. 2024. Apigen: Automated pipeline for generating verifiable and diverse function-calling datasets. arXiv preprint arXiv:2406.18518 (2024)."},{"key":"e_1_3_2_1_18_1","unstructured":"JianingWang Ming Gao Xiaoli Li Xiang Li Nuo Chen Qiushi Sun. 2023. Evaluating and Enhancing the Robustness of Code Pre-trained Models through Structure- Aware Adversarial Samples Generation. In EMNLP."},{"key":"e_1_3_2_1_19_1","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et al. 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems (2022)."},{"key":"e_1_3_2_1_20_1","volume-title":"Gonzalez","author":"Patil Shishir G.","year":"2023","unstructured":"Shishir G. Patil, Tianjun Zhang, Xin Wang, and Joseph E. Gonzalez. 2023. Gorilla: Large Language Model Connected with Massive APIs. arXiv preprint arXiv:2305.15334 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"WebCPM: Interactive Web Search for Chinese Long-form Question Answering","author":"Qin Yujia","unstructured":"Yujia Qin, Zihan Cai, Dian Jin, Lan Yan, Shihao Liang, Kunlun Zhu, Yankai Lin, Xu Han, Ning Ding, Huadong Wang, Ruobing Xie, Fanchao Qi, Zhiyuan Liu, Maosong Sun, and Jie Zhou. 2023. WebCPM: Interactive Web Search for Chinese Long-form Question Answering. In Association for Computational Linguistics: ACL."},{"key":"e_1_3_2_1_22_1","unstructured":"Yujia Qin Shengding Hu Yankai Lin Weize Chen Ning Ding Ganqu Cui Zheni Zeng Yufei Huang Chaojun Xiao Chi Han et al. 2023. Tool learning with foundation models. arXiv preprint arXiv:2304.08354 (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Learning Representations: ICLR","author":"Qin Yujia","year":"2023","unstructured":"Yujia Qin, Shi Liang, Yining Ye, Kunlun Zhu, Lan Yan, Ya-Ting Lu, Yankai Lin, Xin Cong, Xiangru Tang, Bill Qian, Sihan Zhao, Runchu Tian, Ruobing Xie, Jie Zhou, Marc H. Gerstein, Dahai Li, Zhiyuan Liu, and Maosong Sun. 2023. ToolLLM: Facilitating Large Language Models to Master 16000 Real-world APIs. International Conference on Learning Representations: ICLR (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"COLT: Towards Completeness-Oriented Tool Retrieval for Large Language Models. arXiv preprint arXiv:2405.16089","author":"Qu Changle","year":"2024","unstructured":"Changle Qu, Sunhao Dai, XiaochiWei, Hengyi Cai, ShuaiqiangWang, Dawei Yin, Jun Xu, and Ji-Rong Wen. 2024. COLT: Towards Completeness-Oriented Tool Retrieval for Large Language Models. arXiv preprint arXiv:2405.16089 (2024)."},{"key":"e_1_3_2_1_25_1","volume-title":"Deep- Speed: System Optimizations Enable Training Deep Learning Models with Over 100 Billion Parameters. In SIGKDD.","author":"Rasley Jeff","year":"2020","unstructured":"Jeff Rasley, Samyam Rajbhandari, Olatunji Ruwase, and Yuxiong He. 2020. Deep- Speed: System Optimizations Enable Training Deep Learning Models with Over 100 Billion Parameters. In SIGKDD."},{"key":"e_1_3_2_1_26_1","volume-title":"Toolformer: Language Models Can Teach Themselves to Use Tools. Neural Information Processing Systems: NeurIPS","author":"Schick Timo","year":"2023","unstructured":"Timo Schick, Jane Dwivedi-Yu, Roberto Dess\u00ec, Roberta Raileanu, Maria Lomeli, Luke Zettlemoyer, Nicola Cancedda, and Thomas Scialom. 2023. Toolformer: Language Models Can Teach Themselves to Use Tools. Neural Information Processing Systems: NeurIPS (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"Evaluating Large Language Model Creativity from a Literary Perspective. arXiv preprint arXiv:2312.03746","author":"Shanahan Murray","year":"2023","unstructured":"Murray Shanahan and Catherine Clarke. 2023. Evaluating Large Language Model Creativity from a Literary Perspective. arXiv preprint arXiv:2312.03746 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"Small llms are weak tool learners: A multi-llm agent. arXiv preprint arXiv:2401.07324","author":"Shen Weizhou","year":"2024","unstructured":"Weizhou Shen, Chenliang Li, Hongzhan Chen, Ming Yan, Xiaojun Quan, Hehong Chen, Ji Zhang, and Fei Huang. 2024. Small llms are weak tool learners: A multi-llm agent. arXiv preprint arXiv:2401.07324 (2024)."},{"key":"e_1_3_2_1_29_1","volume-title":"Hugginggpt: Solving ai tasks with chatgpt and its friends in hugging face. Advances in Neural Information Processing Systems","author":"Shen Yongliang","year":"2024","unstructured":"Yongliang Shen, Kaitao Song, Xu Tan, Dongsheng Li, Weiming Lu, and Yueting Zhuang. 2024. Hugginggpt: Solving ai tasks with chatgpt and its friends in hugging face. Advances in Neural Information Processing Systems (2024)."},{"key":"e_1_3_2_1_30_1","volume-title":"Learning to Use Tools via Cooperative and Interactive Agents. arXiv preprint arXiv:2403.03031","author":"Shi Zhengliang","year":"2024","unstructured":"Zhengliang Shi, Shen Gao, Xiuyi Chen, Lingyong Yan, Haibo Shi, Dawei Yin, Zhumin Chen, Pengjie Ren, Suzan Verberne, and Zhaochun Ren. 2024. Learning to Use Tools via Cooperative and Interactive Agents. arXiv preprint arXiv:2403.03031 (2024)."},{"key":"e_1_3_2_1_31_1","unstructured":"Jiafeng Guo Xueqi Cheng Shiyu Ni Keping Bi. 2024. When Do LLMs Need Retrieval Augmentation? Mitigating LLMs' Overconfidence Helps Retrieval Augmentation. In ACL."},{"key":"e_1_3_2_1_32_1","volume-title":"RestGPT: Connecting Large Language Models with Real-World Applications via RESTful APIs. arXiv","author":"Song Yifan","year":"2023","unstructured":"Yifan Song, Weimin Xiong, Dawei Zhu, Chengzu Li, Ke Wang, Ye Tian, and Sujian Li. 2023. RestGPT: Connecting Large Language Models with Real-World Applications via RESTful APIs. arXiv (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"MAIR: A Massive Benchmark for Evaluating Instructed Retrieval. In EMNLP.","author":"Sun Weiwei","year":"2024","unstructured":"Weiwei Sun, Zhengliang Shi, Jiulong Wu, Lingyong Yan, Xinyu Ma, Yiding Liu, Min Cao, Dawei Yin, and Zhaochun Ren. 2024. MAIR: A Massive Benchmark for Evaluating Instructed Retrieval. In EMNLP."},{"key":"e_1_3_2_1_34_1","volume-title":"Is ChatGPT good at search? investigating large language models as re-ranking agents. arXiv preprint arXiv:2304.09542","author":"Sun Weiwei","year":"2023","unstructured":"Weiwei Sun, Lingyong Yan, Xinyu Ma, Shuaiqiang Wang, Pengjie Ren, Zhumin Chen, Dawei Yin, and Zhaochun Ren. 2023. Is ChatGPT good at search? investigating large language models as re-ranking agents. arXiv preprint arXiv:2304.09542 (2023)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-demos.29"},{"key":"e_1_3_2_1_36_1","volume-title":"Toolalpaca: Generalized tool learning for language models with 3000 simulated cases. arXiv preprint arXiv:2306.05301","author":"Tang Qiaoyu","year":"2023","unstructured":"Qiaoyu Tang, Ziliang Deng, Hongyu Lin, Xianpei Han, Qiao Liang, and Le Sun. 2023. Toolalpaca: Generalized tool learning for language models with 3000 simulated cases. arXiv preprint arXiv:2306.05301 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"Improving pretraining data using perplexity correlations. arXiv preprint arXiv:2409.05816","author":"Thrush Tristan","year":"2024","unstructured":"Tristan Thrush, Christopher Potts, and Tatsunori Hashimoto. 2024. Improving pretraining data using perplexity correlations. arXiv preprint arXiv:2409.05816 (2024)."},{"key":"e_1_3_2_1_38_1","volume-title":"Executable code actions elicit better llm agents. arXiv preprint arXiv:2402.01030","author":"Wang Xingyao","year":"2024","unstructured":"Xingyao Wang, Yangyi Chen, Lifan Yuan, Yizhe Zhang, Yunzhu Li, Hao Peng, and Heng Ji. 2024. Executable code actions elicit better llm agents. arXiv preprint arXiv:2402.01030 (2024)."},{"key":"e_1_3_2_1_39_1","volume-title":"Self-Instruct: Aligning Language Models with Self-Generated Instructions","author":"Kordi Yeganeh","unstructured":"YizhongWang, Yeganeh Kordi, Swaroop Mishra, Alisa Liu, Noah A. Smith, Daniel Khashabi, and Hannaneh Hajishirzi. 2023. Self-Instruct: Aligning Language Models with Self-Generated Instructions. In Association for Computational Linguistics: ACL."},{"key":"e_1_3_2_1_40_1","volume-title":"explain, plan and select: Interactive planning with large language models enables open-world multi-task agents. arXiv preprint arXiv:2302.01560","author":"Cai Shaofei","year":"2023","unstructured":"ZihaoWang, Shaofei Cai, Guanzhou Chen, Anji Liu, Xiaojian Ma, and Yitao Liang. 2023. Describe, explain, plan and select: Interactive planning with large language models enables open-world multi-task agents. arXiv preprint arXiv:2302.01560 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"CodecLM: Aligning Language Models with Tailored Synthetic Data. arXiv preprint arXiv:2404.05875","author":"Wang Zifeng","year":"2024","unstructured":"Zifeng Wang, Chun-Liang Li, Vincent Perot, Long T Le, Jin Miao, Zizhao Zhang, Chen-Yu Lee, and Tomas Pfister. 2024. CodecLM: Aligning Language Models with Tailored Synthetic Data. arXiv preprint arXiv:2404.05875 (2024)."},{"key":"e_1_3_2_1_42_1","volume-title":"Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564","author":"Wu Shijie","year":"2023","unstructured":"Shijie Wu, Ozan Irsoy, Steven Lu, Vadim Dabravolski, Mark Dredze, Sebastian Gehrmann, Prabhanjan Kambadur, David Rosenberg, and Gideon Mann. 2023. Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"On the tool manipulation capability of open-source large language models. arXiv preprint arXiv:2305.16504","author":"Xu Qiantong","year":"2023","unstructured":"Qiantong Xu, Fenglu Hong, Bo Li, Changran Hu, Zhengyu Chen, and Jian Zhang. 2023. On the tool manipulation capability of open-source large language models. arXiv preprint arXiv:2305.16504 (2023)."},{"key":"e_1_3_2_1_44_1","volume-title":"A Survey on Knowledge Distillation of Large Language Models. arXiv","author":"Xu Xiaohan","year":"2024","unstructured":"Xiaohan Xu, Ming Li, Chongyang Tao, Tao Shen, Reynold Cheng, Jinyang Li, Can Xu, Dacheng Tao, and Tianyi Zhou. 2024. A Survey on Knowledge Distillation of Large Language Models. arXiv (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"Magpie: Alignment Data Synthesis from Scratch by Prompting Aligned LLMs with Nothing. arXiv preprint arXiv:2406.08464","author":"Xu Zhangchen","year":"2024","unstructured":"Zhangchen Xu, Fengqing Jiang, Luyao Niu, Yuntian Deng, Radha Poovendran, Yejin Choi, and Bill Yuchen Lin. 2024. Magpie: Alignment Data Synthesis from Scratch by Prompting Aligned LLMs with Nothing. arXiv preprint arXiv:2406.08464 (2024)."},{"key":"e_1_3_2_1_46_1","unstructured":"Ke Yang Jiateng Liu John Wu Chaoqi Yang Yi R Fung Sha Li Zixuan Huang Xu Cao Xingyao Wang Yiquan Wang et al. 2024. If llm is the wizard then code is the wand: A survey on how code empowers large language models to serve as intelligent agents. arXiv preprint arXiv:2401.00812 (2024)."},{"key":"e_1_3_2_1_47_1","volume-title":"GPT4Tools: Teaching Large Language Model to Use Tools via Self-instruction. Neural Information Processing Systems: NeurIPS","author":"Yang Rui","year":"2023","unstructured":"Rui Yang, Lin Song, Yanwei Li, Sijie Zhao, Yixiao Ge, Xiu Li, and Ying Shan. 2023. GPT4Tools: Teaching Large Language Model to Use Tools via Self-instruction. Neural Information Processing Systems: NeurIPS (2023)."},{"key":"e_1_3_2_1_48_1","volume-title":"ReAct: Synergizing Reasoning and Acting in Language Models. In International Conference on Learning Representations: ICLR.","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Jeffrey Zhao, Dian Yu, Nan Du, Izhak Shafran, Karthik R Narasimhan, and Yuan Cao. 2023. ReAct: Synergizing Reasoning and Acting in Language Models. In International Conference on Learning Representations: ICLR."},{"key":"e_1_3_2_1_49_1","volume-title":"Tooleyes: Fine-grained evaluation for tool learning capabilities of large language models in real-world scenarios. arXiv preprint arXiv:2401.00741","author":"Ye Junjie","year":"2024","unstructured":"Junjie Ye, Guanyu Li, Songyang Gao, Caishuang Huang, Yilong Wu, Sixian Li, Xiaoran Fan, Shihan Dou, Qi Zhang, Tao Gui, et al. 2024. Tooleyes: Fine-grained evaluation for tool learning capabilities of large language models in real-world scenarios. arXiv preprint arXiv:2401.00741 (2024)."},{"key":"e_1_3_2_1_50_1","volume-title":"Lumos: Learning agents with unified data, modular design, and open-source llms. arXiv preprint arXiv:2311.05657","author":"Yin Da","year":"2023","unstructured":"Da Yin, Faeze Brahman, Abhilasha Ravichander, Khyathi Chandu, Kai-Wei Chang, Yejin Choi, and Bill Yuchen Lin. 2023. Lumos: Learning agents with unified data, modular design, and open-source llms. arXiv preprint arXiv:2311.05657 (2023)."},{"key":"e_1_3_2_1_51_1","volume-title":"RankRAG: Unifying Context Ranking with Retrieval-Augmented Generation in LLMs. arXiv preprint arXiv:2407.02485","author":"Yu Yue","year":"2024","unstructured":"Yue Yu,Wei Ping, Zihan Liu, BoxinWang, Jiaxuan You, Chao Zhang, Mohammad Shoeybi, and Bryan Catanzaro. 2024. RankRAG: Unifying Context Ranking with Retrieval-Augmented Generation in LLMs. arXiv preprint arXiv:2407.02485 (2024)."},{"key":"e_1_3_2_1_52_1","volume-title":"International Conference on Learning Representations: ICLR","author":"Yuan Lifan","year":"2024","unstructured":"Lifan Yuan, Yangyi Chen, Xingyao Wang, Yi R Fung, Hao Peng, and Heng Ji. 2024. Craft: Customizing llms by creating and retrieving from specialized toolsets. International Conference on Learning Representations: ICLR (2024)."},{"key":"e_1_3_2_1_53_1","volume-title":"Agenttuning: Enabling generalized agent abilities for llms. arXiv","author":"Zeng Aohan","year":"2023","unstructured":"Aohan Zeng, Mingdao Liu, Rui Lu, Bowen Wang, Xiao Liu, Yuxiao Dong, and Jie Tang. 2023. Agenttuning: Enabling generalized agent abilities for llms. arXiv (2023)."},{"key":"e_1_3_2_1_54_1","volume-title":"Lima: Less is more for alignment. Advances in Neural Information Processing Systems","author":"Zhou Chunting","year":"2024","unstructured":"Chunting Zhou, Pengfei Liu, Puxin Xu, Srinivasan Iyer, Jiao Sun, Yuning Mao, Xuezhe Ma, Avia Efrat, Ping Yu, Lili Yu, et al. 2024. Lima: Less is more for alignment. Advances in Neural Information Processing Systems (2024)."},{"key":"e_1_3_2_1_55_1","volume-title":"Programming Every Example: Lifting Pre-training Data Quality like Experts at Scale. arXiv preprint arXiv:2409.17115","author":"Zhou Fan","year":"2024","unstructured":"Fan Zhou, Zengzhi Wang, Qian Liu, Junlong Li, and Pengfei Liu. 2024. Programming Every Example: Lifting Pre-training Data Quality like Experts at Scale. arXiv preprint arXiv:2409.17115 (2024)."},{"key":"e_1_3_2_1_56_1","volume-title":"Efficient Action Space Navigation in Large Language Models with A* Search. ArXiv","author":"Zhuang Yuchen","year":"2023","unstructured":"Yuchen Zhuang, Xiang Chen, Tong Yu, Saayan Mitra, Victor S. Bursztyn, Ryan A. Rossi, Somdeb Sarkhel, and Chao Zhang. 2023. ToolChain*: Efficient Action Space Navigation in Large Language Models with A* Search. ArXiv (2023)."},{"key":"e_1_3_2_1_57_1","volume-title":"ToolQA: A Dataset for LLM Question Answering with External Tools. arXiv","author":"Zhuang Yuchen","year":"2023","unstructured":"Yuchen Zhuang, Yue Yu, Kuan Wang, Haotian Sun, and Chao Zhang. 2023. ToolQA: A Dataset for LLM Question Answering with External Tools. arXiv (2023)."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714825","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714825","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:42Z","timestamp":1750295922000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714825"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":57,"alternative-id":["10.1145\/3696410.3714825","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714825","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}