{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T09:41:19Z","timestamp":1775122879848,"version":"3.50.1"},"reference-count":199,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,1,13]],"date-time":"2025-01-13T00:00:00Z","timestamp":1736726400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,13]],"date-time":"2025-01-13T00:00:00Z","timestamp":1736726400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s11704-024-40678-2","type":"journal-article","created":{"date-parts":[[2025,1,13]],"date-time":"2025-01-13T02:38:15Z","timestamp":1736735895000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":45,"title":["Tool learning with large language models: a survey"],"prefix":"10.1007","volume":"19","author":[{"given":"Changle","family":"Qu","sequence":"first","affiliation":[]},{"given":"Sunhao","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Xiaochi","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Hengyi","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Shuaiqiang","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Dawei","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Ji-rong","family":"Wen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,13]]},"reference":[{"issue":"3","key":"40678_CR1","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1038\/scientificamerican0960-62","volume":"203","author":"S L Washburn","year":"1960","unstructured":"Washburn S L. Tools and human evolution. Scientific American, 1960, 203(3): 62\u201375","journal-title":"Scientific American"},{"key":"40678_CR2","volume-title":"Tools, Language, and Cognition in Human Evolution","author":"K R Gibson","year":"1994","unstructured":"Gibson K R, Ingold T. Tools, Language, and Cognition in Human Evolution. Cambridge: Cambridge University Press, 1994"},{"key":"40678_CR3","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7209.001.0001","volume-title":"What Is Cognitive Science?","author":"B Von Eckardt","year":"1995","unstructured":"Von Eckardt B. What Is Cognitive Science? The MIT Press, 1995, ISBN: 9780262720236"},{"key":"40678_CR4","doi-asserted-by":"publisher","DOI":"10.1353\/book.98237","volume-title":"Animal Tool Behavior: the Use and Manufacture of Tools by Animals","author":"R W Shumaker","year":"2011","unstructured":"Shumaker R W, Walkup K R, Beck B B. Animal Tool Behavior: the Use and Manufacture of Tools by Animals. Baltimore: Johns Hopkins University Press, 2011"},{"key":"40678_CR5","volume-title":"GPT-4 technical report","author":"J Achiam","year":"2024","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, et al. GPT-4 technical report. 2024, arXiv preprint arXiv: 2303.08774"},{"key":"40678_CR6","doi-asserted-by":"publisher","first-page":"113679","DOI":"10.1016\/j.eswa.2020.113679","volume":"165","author":"W S El-Kassas","year":"2021","unstructured":"El-Kassas W S, Salama C R, Rafea A A, Mohamed H K. Automatic text summarization: a comprehensive survey. Expert Systems with Applications, 2021, 165: 113679","journal-title":"Expert Systems with Applications"},{"key":"40678_CR7","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1162\/tacl_a_00632","volume":"12","author":"T Zhang","year":"2024","unstructured":"Zhang T, Ladhak F, Durmus E, Liang P, McKeown K, Hashimoto T B. Benchmarking large language models for news summarization. Transactions of the Association for Computational Linguistics, 2024, 12: 39\u201357","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"40678_CR8","first-page":"41092","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"B Zhang","year":"2023","unstructured":"Zhang B, Haddow B, Birch A. Prompting large language model for machine translation: a case study. In: Proceedings of the 40th International Conference on Machine Learning. 2023, 41092\u201341110"},{"key":"40678_CR9","volume-title":"Improving LLM-based machine translation with systematic self-correction","author":"Z Feng","year":"2024","unstructured":"Feng Z, Zhang Y, Li H, Liu W, Lang J, Feng Y, Wu J, Liu Z. Improving LLM-based machine translation with systematic self-correction. 2024, arXiv preprint arXiv: 2402.16379v2"},{"key":"40678_CR10","doi-asserted-by":"publisher","first-page":"2369","DOI":"10.18653\/v1\/D18-1259","volume-title":"Proceedings of 2018 Conference on Empirical Methods in Natural Language Processing","author":"Z Yang","year":"2018","unstructured":"Yang Z, Qi P, Zhang S, Bengio Y, Cohen W W, Salakhutdinov R, Manning C D. HotpotQA: a dataset for diverse, explainable multi-hop question answering. In: Proceedings of 2018 Conference on Empirical Methods in Natural Language Processing. 2018, 2369\u20132380"},{"key":"40678_CR11","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1162\/tacl_a_00276","volume":"7","author":"T Kwiatkowski","year":"2019","unstructured":"Kwiatkowski T, Palomaki J, Redfield O, Collins M, Parikh A, Alberti C, Epstein D, Polosukhin I, Devlin J, Lee K, Toutanova K, Jones L, Kelcey M, Chang M W, Dai A M, Uszkoreit J, Le Q, Petrov S. Natural questions: a benchmark for question answering research. Transactions of the Association for Computational Linguistics, 2019, 7: 452\u2013466","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"40678_CR12","first-page":"9802","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics","author":"A Mallen","year":"2023","unstructured":"Mallen A, Asai A, Zhong V, Das R, Khashabi D, Hajishirzi H. When not to trust language models: Investigating effectiveness of parametric and non-parametric memories. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics. 2023, 9802\u20139822"},{"key":"40678_CR13","doi-asserted-by":"publisher","first-page":"13697","DOI":"10.18653\/v1\/2024.findings-acl.813","volume-title":"Proceedings of Findings of the Association for Computational Linguistics ACL 2024","author":"T Vu","year":"2024","unstructured":"Vu T, Iyyer M, Wang X, Constant N, Wei J, Wei J, Tar C, Sung Y H, Zhou D, Le Q, Luong T. FreshLLMs: refreshing large language models with search engine augmentation. In: Proceedings of Findings of the Association for Computational Linguistics ACL 2024. 2024, 13697\u201313720"},{"issue":"12","key":"40678_CR14","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1145\/3571730","volume":"55","author":"Z Ji","year":"2023","unstructured":"Ji Z, Lee N, Frieske R, Yu T, Su D, Xu Y, Ishii E, Bang Y J, Madotto A, Fung P. Survey of hallucination in natural language generation. ACM Computing Surveys, 2023, 55(12): 248","journal-title":"ACM Computing Surveys"},{"key":"40678_CR15","volume-title":"Siren\u2019s song in the AI ocean: a survey on hallucination in large language models","author":"Y Zhang","year":"2023","unstructured":"Zhang Y, Li Y, Cui L, Cai D, Liu L, Fu T, Huang X, Zhao E, Zhang Y, Chen Y, Wang L, Luu A T, Bi W, Shi F, Shi S. Siren\u2019s song in the AI ocean: a survey on hallucination in large language models. 2023, arXiv preprint arXiv: 2309.01219"},{"key":"40678_CR16","volume-title":"Tool learning with foundation models","author":"Y Qin","year":"2024","unstructured":"Qin Y, Hu S, Lin Y, Chen W, Ding N, et al. Tool learning with foundation models. 2024, arXiv preprint arXiv: 2304.08354"},{"key":"40678_CR17","first-page":"36","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"T Schick","year":"2023","unstructured":"Schick T, Dwivedi-Yu J, Dess\u00ec R, Raileanu R, Lomeli M, Hambro E, Zettlemoyer L, Cancedda N, Scialom T. Toolformer: language models can teach themselves to use tools. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2023, 36"},{"key":"40678_CR18","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Y Qin","year":"2024","unstructured":"Qin Y, Liang S, Ye Y, Zhu K, Yan L, Lu Y, Lin Y, Cong X, Tang X, Qian B, Zhao S, Hong L, Tian R, Xie R, Zhou J, Gerstein M, Li D, Liu Z, Sun M. ToolLLM: facilitating large language models to master 16000+ real-world APIs. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR19","volume-title":"ToolAlpaca: generalized tool learning for language models with 3000 simulated cases","author":"Q Tang","year":"2023","unstructured":"Tang Q, Deng Z, Lin H, Han X, Liang Q, Cao B, Sun L. ToolAlpaca: generalized tool learning for language models with 3000 simulated cases. 2023, arXiv preprint arXiv: 2306.05301"},{"key":"40678_CR20","doi-asserted-by":"publisher","first-page":"2983","DOI":"10.1145\/3626772.3661381","volume-title":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"H Wang","year":"2024","unstructured":"Wang H, Qin Y, Lin Y, Pan J Z, Wong K F. Empowering large language models: tool learning for real-world interaction. In: Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2024, 2983\u20132986"},{"key":"40678_CR21","first-page":"20744","volume-title":"Proceedings of the 36th Conference on Neural Information Processing Systems","author":"S Yao","year":"2022","unstructured":"Yao S, Chen H, Yang J, Narasimhan K. WebShop: towards scalable real-world web interaction with grounded language agents. In: Proceedings of the 36th Conference on Neural Information Processing Systems. 2022, 20744\u201320757"},{"key":"40678_CR22","volume-title":"Internet-augmented language models through few-shot prompting for open-domain question answering","author":"A Lazaridou","year":"2022","unstructured":"Lazaridou A, Gribovskaya E, Stokowiec W, Grigorev N. Internet-augmented language models through few-shot prompting for open-domain question answering. 2022, arXiv preprint arXiv: 2203.05115"},{"key":"40678_CR23","first-page":"112","volume-title":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics","author":"Y Lu","year":"2024","unstructured":"Lu Y, Yu H, Khashabi D. GEAR: augmenting language models with generalizable and efficient tool resolution. In: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics. 2024, 112\u2013138"},{"key":"40678_CR24","first-page":"6981","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics","author":"L Pan","year":"2023","unstructured":"Pan L, Wu X, Lu X, Luu A T, Wang W Y, Kan M Y, Nakov P. Fact-checking complex claims with program-guided reasoning. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics. 2023, 6981\u20137004"},{"key":"40678_CR25","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"X Wang","year":"2024","unstructured":"Wang X, Wang Z, Liu J, Chen Y, Yuan L, Peng H, Ji H. MINT: evaluating LLMs in multi-turn interaction with tools and language feedback. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR26","volume-title":"TALM: tool augmented language models","author":"A Parisi","year":"2022","unstructured":"Parisi A, Zhao Y, Fiedel N. TALM: tool augmented language models. 2022, arXiv preprint arXiv: 2205.12255"},{"key":"40678_CR27","volume-title":"MRKL systems: a modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning","author":"E Karpas","year":"2022","unstructured":"Karpas E, Abend O, Belinkov Y, Lenz B, Lieber O, Ratner N, Shoham Y, Bata H, Levine Y, Leyton-Brown K, Muhlgay D, Rozen N, Schwartz E, Shachaf G, Shalev-Shwartz S, Shashua A, Tenenholtz M. MRKL systems: a modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning. 2022, arXiv preprint arXiv: 2205.00445"},{"key":"40678_CR28","volume-title":"WebGPT: Browserassisted question-answering with human feedback","author":"R Nakano","year":"2022","unstructured":"Nakano R, Hilton J, Balaji S, Wu J, Ouyang L, Kim C, Hesse C, Jain S, Kosaraju V, Saunders W, Jiang X, Cobbe K, Eloundou T, Krueger G, Button K, Knight M, Chess B, Schulman J. WebGPT: Browserassisted question-answering with human feedback. 2022, arXiv preprint arXiv: 2112.09332"},{"key":"40678_CR29","first-page":"11854","volume-title":"Proceedings of 2023 IEEE\/CVF International Conference on Computer Vision","author":"D Sur\u00eds","year":"2023","unstructured":"Sur\u00eds D, Menon S, Vondrick C. ViperGPT: visual inference via python execution for reasoning. In: Proceedings of 2023 IEEE\/CVF International Conference on Computer Vision. 2023, 11854\u201311864"},{"key":"40678_CR30","doi-asserted-by":"publisher","first-page":"3102","DOI":"10.18653\/v1\/2023.emnlp-main.187","volume-title":"Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing","author":"M Li","year":"2023","unstructured":"Li M, Zhao Y, Yu B, Song F, Li H, Yu H, Li Z, Huang F, Li Y. API-Bank: a comprehensive benchmark for tool-augmented LLMs. In: Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing. 2023, 3102\u20133116"},{"key":"40678_CR31","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Y Huang","year":"2024","unstructured":"Huang Y, Shi J, Li Y, Fan C, Wu S, Zhang Q, Liu Y, Zhou P, Wan Y, Gong N Z, Sun L C. MetaTool benchmark for large language models: deciding whether to use tools and which to use. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR32","first-page":"9510","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","author":"Z Chen","year":"2024","unstructured":"Chen Z, Du W, Zhang W, Liu K, Liu J, Zheng M, Zhuo J, Zhang S, Lin D, Chen K, Zhao F. T-Eval: evaluating the tool utilization capability of large language models step by step. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024, 9510\u20139529"},{"key":"40678_CR33","volume-title":"On the tool manipulation capability of open-source large language models","author":"Q Xu","year":"2023","unstructured":"Xu Q, Hong F, Li B, Hu C, Chen Z, Zhang J. On the tool manipulation capability of open-source large language models. 2023, arXiv preprint arXiv: 2305.16504"},{"key":"40678_CR34","first-page":"18030","volume-title":"Proceedings of the 38th AAAI Conference on Artificial Intelligence","author":"S Gao","year":"2024","unstructured":"Gao S, Shi Z, Zhu M, Fang B, Xin X, Ren P, Chen Z, Ma J, Ren Z. Confucius: iterative tool learning from introspection feedback by easy-to-difficult curriculum. In: Proceedings of the 38th AAAI Conference on Artificial Intelligence. 2024, 18030\u201318038"},{"key":"40678_CR35","doi-asserted-by":"publisher","first-page":"1796","DOI":"10.1145\/3626772.3657828","volume-title":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"Y Zhao","year":"2024","unstructured":"Zhao Y, Wu J, Wang X, Tang W, Wang D, De Rijke M. Let me do it for you: towards LLM empowered recommendation via tool learning. In: Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2024, 1796\u20131806"},{"key":"40678_CR36","volume-title":"A survey of large language models","author":"W X Zhao","year":"2024","unstructured":"Zhao W X, Zhou K, Li J, Tang T, Wang X, et al. A survey of large language models. 2024, arXiv preprint arXiv: 2303.18223"},{"key":"40678_CR37","volume-title":"Understanding the planning of LLM agents: a survey","author":"X Huang","year":"2024","unstructured":"Huang X, Liu W, Chen X, Wang X, Wang H, Lian D, Wang Y, Tang R, Chen E. Understanding the planning of LLM agents: a survey. 2024, arXiv preprint arXiv: 2402.02716"},{"key":"40678_CR38","first-page":"5368","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics","author":"S Qiao","year":"2023","unstructured":"Qiao S, Ou Y, Zhang N, Chen X, Yao Y, Deng S, Tan C, Huang F, Chen H. Reasoning with language model prompting: a survey. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics. 2023, 5368\u20135393"},{"key":"40678_CR39","volume-title":"A survey of reasoning with foundation models","author":"J Sun","year":"2024","unstructured":"Sun J, Zheng C, Xie E, Liu Z, Chu R, et al. A survey of reasoning with foundation models. 2024, arXiv preprint arXiv: 2312.11562"},{"issue":"6","key":"40678_CR40","doi-asserted-by":"publisher","first-page":"186345","DOI":"10.1007\/s11704-024-40231-1","volume":"18","author":"L Wang","year":"2024","unstructured":"Wang L, Ma C, Feng X, Zhang Z, Yang H, Zhang J, Chen Z, Tang J, Chen X, Lin Y, Zhao W X, Wei Z, Wen J. A survey on large language model based autonomous agents. Frontiers of Computer Science, 2024, 18(6): 186345","journal-title":"Frontiers of Computer Science"},{"key":"40678_CR41","volume-title":"Transactions on Machine Learning Research","author":"T R Sumers","year":"2024","unstructured":"Sumers T R, Yao S, Narasimhan K, Griffiths T L. Cognitive architectures for language agents. Transactions on Machine Learning Research, 2024, ISSN: 2835-8856"},{"key":"40678_CR42","volume-title":"The rise and potential of large language model based agents: a survey","author":"Z Xi","year":"2023","unstructured":"Xi Z, Chen W, Guo X, He W, Ding Y, et al. The rise and potential of large language model based agents: a survey. 2023, arXiv preprint arXiv: 2309.07864"},{"key":"40678_CR43","volume-title":"Retrieval-augmented generation for large language models: a survey","author":"Y Gao","year":"2024","unstructured":"Gao Y, Xiong Y, Gao X, Jia K, Pan J, Bi Y, Dai Y, Sun J, Wang M, Wang H. Retrieval-augmented generation for large language models: a survey. 2024, arXiv preprint arXiv: 2312.10997"},{"key":"40678_CR44","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-7587-7","volume-title":"Retrieval-augmented generation for AI-generated content: a survey","author":"P Zhao","year":"2024","unstructured":"Zhao P, Zhang H, Yu Q, Wang Z, Geng Y, Fu F, Yang L, Zhang W, Jiang J, Cui B. Retrieval-augmented generation for AI-generated content: a survey. 2024, arXiv preprint arXiv: 2402.19473"},{"key":"40678_CR45","volume-title":"Transactions on Machine Learning Research","author":"G Mialon","year":"2023","unstructured":"Mialon G, Dess\u00ec R, Lomeli M, Nalmpantis C, Pasunuru R, Raileanu R, Rozi\u00e8re B, Schick T, Dwivedi-Yu J, Celikyilmaz A, Grave E, LeCun Y, Scialom T. Augmented language models: a survey. Transactions on Machine Learning Research, 2023, ISSN:2835-8856"},{"key":"40678_CR46","volume-title":"A survey from the language model perspective","author":"Z Wang","year":"2024","unstructured":"Wang Z, Cheng Z, Zhu H, Fried D, Neubig G. What are tools anyway? A survey from the language model perspective. 2024, arXiv preprint arXiv: 2403.15452"},{"key":"40678_CR47","first-page":"8460","volume-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics","author":"M Komeili","year":"2022","unstructured":"Komeili M, Shuster K, Weston J. Internet-augmented dialogue generation. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics. 2022, 8460\u20138478"},{"key":"40678_CR48","volume-title":"Toolcoder: Teach code generation models to use api search tools","author":"K Zhang","year":"2023","unstructured":"Zhang K, Zhang H, Li G, Li J, Li Z, Jin Z. Toolcoder: Teach code generation models to use api search tools. arXiv preprint arXiv:2305.04032, 2023"},{"key":"40678_CR49","first-page":"8371","volume-title":"Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"W Shi","year":"2024","unstructured":"Shi W, Min S, Yasunaga M, Seo M, James R, Lewis M, Zettlemoyer L, Yih W T. REPLUG: retrieval-augmented black-box language models. In: Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 2024, 8371\u20138384"},{"key":"40678_CR50","volume-title":"ART: automatic multi-step reasoning and tool-use for large language models","author":"B Paranjape","year":"2023","unstructured":"Paranjape B, Lundberg S, Singh S, Hajishirzi H, Zettlemoyer L, Ribeiro M T. ART: automatic multi-step reasoning and tool-use for large language models. 2023, arXiv preprint arXiv: 2303.09014"},{"key":"40678_CR51","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Z Gou","year":"2024","unstructured":"Gou Z, Shao Z, Gong Y, Shen Y, Yang Y, Duan N, Chen W. CRITIC: large language models can self-correct with tool-interactive critiquing. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR52","volume-title":"LaMDA: language models for dialog applications","author":"R Thoppilan","year":"2022","unstructured":"Thoppilan R, De Freitas D, Hall J, Shazeer N, Kulshreshtha A, et al. LaMDA: language models for dialog applications. 2022, arXiv preprint arXiv: 2201.08239"},{"key":"40678_CR53","volume-title":"Gorilla: large language model connected with massive APIs","author":"S G Patil","year":"2023","unstructured":"Patil S G, Zhang T, Wang X, Gonzalez J E. Gorilla: large language model connected with massive APIs. 2023, arXiv preprint arXiv: 2305.15334"},{"key":"40678_CR54","first-page":"36","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"S Hao","year":"2023","unstructured":"Hao S, Liu T, Wang Z, Hu Z. ToolkenGPT: augmenting frozen language models with massive tools via tool embeddings. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2023, 36"},{"key":"40678_CR55","first-page":"36","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"Y Zhuang","year":"2024","unstructured":"Zhuang Y, Yu Y, Wang K, Sun H, Zhang C. ToolQA: a dataset for LLM question answering with external tools. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2024, 36"},{"key":"40678_CR56","volume-title":"Syntax error-free and generalizable tool use for LLMS via finite-state decoding","author":"K Zhang","year":"2024","unstructured":"Zhang K, Chen H, Li L, Wang W. Syntax error-free and generalizable tool use for LLMS via finite-state decoding. 2024, arXiv preprint arXiv: 2310.07075v1"},{"key":"40678_CR57","volume-title":"Middleware for LLMs: tools are instrumental for language agents in complex environments","author":"Y Gu","year":"2024","unstructured":"Gu Y, Shu Y, Yu H, Liu X, Dong Y, Tang J, Srinivasa J, Latapie H, Su Y. Middleware for LLMs: tools are instrumental for language agents in complex environments. 2024, arXiv preprint arXiv: 2402.14672"},{"key":"40678_CR58","volume-title":"Training verifiers to solve math word problems","author":"K Cobbe","year":"2021","unstructured":"Cobbe K, Kosaraju V, Bavarian M, Chen M, Jun H, Kaiser L, Plappert M, Tworek J, Hilton J, Nakano R, Hesse C, Schulman J. Training verifiers to solve math word problems. 2021, arXiv preprint arXiv: 2110.14168"},{"key":"40678_CR59","first-page":"2533","volume-title":"Proceedings of Findings of the Association for Computational Linguistics","author":"Z Shao","year":"2022","unstructured":"Shao Z, Huang F, Huang M. Chaining simultaneous thoughts for numerical reasoning. In: Proceedings of Findings of the Association for Computational Linguistics. 2022, 2533\u20132547"},{"key":"40678_CR60","doi-asserted-by":"publisher","first-page":"12101","DOI":"10.18653\/v1\/2023.emnlp-main.742","volume-title":"Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing","author":"M Kadl\u010d\u00edk","year":"2023","unstructured":"Kadl\u010d\u00edk M, \u0160tef\u00e1nik M, Sotolar O, Martinek V. Calc-X and calcformers: empowering arithmetical chain-of-thought through interaction with symbolic systems. In: Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing. 2023, 12101\u201312108"},{"key":"40678_CR61","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"J He-Yueya","year":"2023","unstructured":"He-Yueya J, Poesia G, Wang R E, Goodman N D. Solving math word problems by combining language models with symbolic solvers. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2023"},{"key":"40678_CR62","first-page":"1023","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"B Zhang","year":"2024","unstructured":"Zhang B, Zhou K, Wei X, Zhao X, Sha J, Wang S, Wen J R. Evaluating and improving tool-augmented computation-intensive math reasoning. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. 2024, 1023"},{"key":"40678_CR63","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Z Gou","year":"2024","unstructured":"Gou Z, Shao Z, Gong Y, Shen Y, Yang Y, Huang M, Duan N, Chen W. ToRA: a tool-integrated reasoning agent for mathematical problem solving. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR64","first-page":"942","volume-title":"Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"D Das","year":"2024","unstructured":"Das D, Banerjee D, Aditya S, Kulkarni A. MATHSENSEI: a tool-augmented large language model for mathematical reasoning. In: Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 2024, 942\u2013966"},{"key":"40678_CR65","first-page":"1468","volume-title":"Proceedings of the 18th International Workshop on Semantic Evaluation","author":"V Veerendranath","year":"2024","unstructured":"Veerendranath V, Shah V, Ghate K. Calc-CMU at semeval-2024 task 7: pre-calc\u2013learning to use the calculator improves numeracy in language models. In: Proceedings of the 18th International Workshop on Semantic Evaluation. 2024, 1468\u20131475"},{"key":"40678_CR66","volume-title":"MathViz-E: A case-study in domain-specialized tool-using agents","author":"A Bulusu","year":"2024","unstructured":"Bulusu A, Man B, Jagmohan A, Vempaty A, Mari-Wyka J, Akkil D. MathViz-E: A case-study in domain-specialized tool-using agents. 2024, arXiv preprint arXiv: 2407.17544"},{"key":"40678_CR67","first-page":"10764","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"L Gao","year":"2023","unstructured":"Gao L, Madaan A, Zhou S, Alon U, Liu P, Yang Y, Callan J, Neubig G. PAL: program-aided language models. In: Proceedings of the 40th International Conference on Machine Learning. 2023, 10764\u201310799"},{"key":"40678_CR68","volume-title":"Transactions on Machine Learning Research","author":"W Chen","year":"2023","unstructured":"Chen W, Ma X, Wang X, Cohen W W. Program of thoughts prompting: disentangling computation from reasoning for numerical reasoning tasks. Transactions on Machine Learning Research, 2023, ISSN: 2835-8856"},{"key":"40678_CR69","first-page":"36","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"P Lu","year":"2023","unstructured":"Lu P, Peng B, Cheng H, Galley M, Chang K W, Wu Y N, Zhu S C, Gao J. Chameleon: plug-and-play compositional reasoning with large language models. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2023, 36"},{"key":"40678_CR70","first-page":"223","volume-title":"Proceedings of Findings of the Association for Computational Linguistics","author":"X Wang","year":"2024","unstructured":"Wang X, Peng H, Jabbarvand R, Ji H. LETI: learning to generate from textual interactions. In: Proceedings of Findings of the Association for Computational Linguistics. 2024, 223\u2013239"},{"key":"40678_CR71","first-page":"15362","volume-title":"Proceedings of 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation","author":"J Wu","year":"2024","unstructured":"Wu J, Zhu R, Chen N, Sun Q, Li X, Gao M. Structure-aware fine-tuning for code pre-trained models. In: Proceedings of 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation. 2024, 15362\u201315372"},{"key":"40678_CR72","first-page":"13643","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","author":"K Zhang","year":"2024","unstructured":"Zhang K, Li J, Li G, Shi X, Jin Z. CodeAgent: enhancing code generation with tool-integrated agent systems for real-world repo-level coding challenges. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024, 13643\u201313658"},{"key":"40678_CR73","first-page":"1522","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics","author":"T Inaba","year":"2023","unstructured":"Inaba T, Kiyomaru H, Cheng F, Kurohashi S. MultiTool-CoT: GPT-3 can use multiple external tools with chain of thought prompting. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics. 2023, 1522\u20131532"},{"issue":"5","key":"40678_CR74","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1038\/s42256-024-00832-8","volume":"6","author":"A M Bran","year":"2024","unstructured":"Bran A M, Cox S, Schilter O, Baldassari C, White A D, Schwaller P. Augmenting large language models with chemistry tools. Nature Machine Intelligence, 2024, 6(5): 525\u2013535","journal-title":"Nature Machine Intelligence"},{"key":"40678_CR75","volume-title":"A review of large language models and autonomous agents in chemistry","author":"M C Ramos","year":"2024","unstructured":"Ramos M C, Collison C J, White A D. A review of large language models and autonomous agents in chemistry. 2024, arXiv preprint arXiv: 2407.01603"},{"issue":"2","key":"40678_CR76","doi-asserted-by":"publisher","first-page":"btae075","DOI":"10.1093\/bioinformatics\/btae075","volume":"40","author":"Q Jin","year":"2024","unstructured":"Jin Q, Yang Y, Chen Q, Lu Z. GeneGPT: augmenting large language models with domain tools for improved access to biomedical information. Bioinformatics, 2024, 40(2): btae075","journal-title":"Bioinformatics"},{"key":"40678_CR77","first-page":"90","volume-title":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics","author":"A Theuma","year":"2024","unstructured":"Theuma A, Shareghi E. Equipping language models with tool use capability for tabular data analysis in finance. In: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics. 2024, 90\u2013103"},{"key":"40678_CR78","volume-title":"Simulating financial market via large language model based agents","author":"S Gao","year":"2024","unstructured":"Gao S, Wen Y, Zhu M, Wei J, Cheng Y, Zhang Q, Shang S. Simulating financial market via large language model based agents. 2024, arXiv preprint arXiv: 2406.19966"},{"key":"40678_CR79","doi-asserted-by":"publisher","first-page":"4314","DOI":"10.1145\/3637528.3671801","volume-title":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","author":"W Zhang","year":"2024","unstructured":"Zhang W, Zhao L, Xia H, Sun S, Sun J, Qin M, Li X, Zhao Y, Zhao Y, Cai X, Zheng L T, Wang X R, An B. A multimodal foundation agent for financial trading: tool-augmented, diversified, and generalist. In: Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining. 2024, 4314\u20134325"},{"key":"40678_CR80","volume-title":"AgentMD: empowering language agents for risk prediction with large-scale clinical tool learning","author":"Q Jin","year":"2024","unstructured":"Jin Q, Wang Z, Yang Y, Zhu Q, Wright D, Huang T, Wilbur W J, He Z, Taylor A, Chen Q, Lu Z. AgentMD: empowering language agents for risk prediction with large-scale clinical tool learning. 2024, arXiv preprint arXiv: 2402.13225"},{"key":"40678_CR81","volume-title":"MmedAgent: learning to use medical tools with multi-modal agent","author":"B Li","year":"2024","unstructured":"Li B, Yan T, Pan Y, Luo J, Ji R, Ding J, Xu Z, Liu S, Dong H, Lin Z, Wang Y. MmedAgent: learning to use medical tools with multi-modal agent. 2024, arXiv preprint arXiv: 2407.02483"},{"key":"40678_CR82","volume-title":"MM-REACT: prompting chatGPT for multimodal reasoning and action","author":"Z Yang","year":"2023","unstructured":"Yang Z, Li L, Wang J, Lin K, Azarnasab E, Ahmed F, Liu Z, Liu C, Zeng M, Wang L. MM-REACT: prompting chatGPT for multimodal reasoning and action. 2023, arXiv preprint arXiv: 2303.11381"},{"key":"40678_CR83","volume-title":"InternChat: solving vision-centric tasks by interacting with chatbots beyond language","author":"Z Liu","year":"2023","unstructured":"Liu Z, He Y, Wang W, Wang W, Wang Y, Chen S, Zhang Q, Yang Y, Li Q, Yu J, Li K, Chen Z, Yang X, Zhu X, Wang Y, Wang L, Luo P, Dai J, Qiao Y. InternChat: solving vision-centric tasks by interacting with chatbots beyond language. 2023, arXiv preprint arXiv: 2305.05662v2"},{"key":"40678_CR84","volume-title":"AssistGPT: a general multi-modal assistant that can plan, execute, inspect, and learn","author":"D Gao","year":"2023","unstructured":"Gao D, Ji L, Zhou L, Lin K Q, Chen J, Fan Z, Shou M Z. AssistGPT: a general multi-modal assistant that can plan, execute, inspect, and learn. 2023, arXiv preprint arXiv: 2306.08640"},{"key":"40678_CR85","first-page":"13258","volume-title":"Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Z Gao","year":"2024","unstructured":"Gao Z, Du Y, Zhang X, Ma X, Han W, Zhu S C, Li Q. CLOVA: a closed-loop visual assistant with tool usage and update. In: Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2024, 13258\u201313268"},{"key":"40678_CR86","first-page":"6390","volume-title":"Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"L Zhao","year":"2024","unstructured":"Zhao L, Yang Y, Zhang K, Shao W, Zhang Y, Qiao Y, Luo P, Ji R. DiffAgent: fast and accurate text-to-image API selection with large language model. In: Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2024, 6390\u20136399"},{"key":"40678_CR87","volume-title":"m&m\u2019s: a benchmark to evaluate tool-use for multi-step multi-modal tasks","author":"Z Ma","year":"2024","unstructured":"Ma Z, Huang W, Zhang J, Gupta T, Krishna R. m&m\u2019s: a benchmark to evaluate tool-use for multi-step multi-modal tasks. 2024, arXiv preprint arXiv: 2403.11085"},{"key":"40678_CR88","volume-title":"Tool-LMM: a large multi-modal model for tool agent learning","author":"C Wang","year":"2023","unstructured":"Wang C, Luo W, Chen Q, Mai H, Guo J, Dong S, Xuan X, Li Z, Ma L, Gao S. Tool-LMM: a large multi-modal model for tool agent learning. 2023, arXiv preprint arXiv: 2401.10727v1"},{"key":"40678_CR89","first-page":"36","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"Y Shen","year":"2024","unstructured":"Shen Y, Song K, Tan X, Li D, Lu W, Zhuang Y. HuggingGPT: solving AI tasks with chatGPT and its friends in hugging face. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2024, 36"},{"key":"40678_CR90","volume-title":"GitAgent: facilitating autonomous agent with GitHub by tool extension","author":"B Lyu","year":"2023","unstructured":"Lyu B, Cong X, Yu H, Yang P, Qin Y, Ye Y, Lu Y, Zhang Z, Yan Y, Lin Y, Liu Z, Sun M. GitAgent: facilitating autonomous agent with GitHub by tool extension. 2023, arXiv preprint arXiv: 2312.17294"},{"key":"40678_CR91","first-page":"24824","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"J Wei","year":"2022","unstructured":"Wei J, Wang X, Schuurmans D, Bosma M, Ichter B, Xia F, Chi E H, Le Q V, Zhou D. Chain-of-thought prompting elicits reasoning in large language models. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 24824\u201324837"},{"key":"40678_CR92","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"S Yao","year":"2023","unstructured":"Yao S, Zhao J, Yu D, Du N, Shafran I, Narasimhan K R, Cao Y. ReAct: synergizing reasoning and acting in language models. In: Proceedings of the 11th International Conference on Learning Representations. 2023"},{"key":"40678_CR93","volume-title":"RestGPT: connecting large language models with real-world applications via RESTful APIs","author":"Y Song","year":"2023","unstructured":"Song Y, Xiong W, Zhu D, Li C, Wang K, Tian Y, Li S. RestGPT: connecting large language models with real-world applications via RESTful APIs. 2023, arXiv preprint arXiv: 2306.06624v1"},{"key":"40678_CR94","volume-title":"TPTU: task planning and tool usage of large language modelbased AI agents","author":"J Ruan","year":"2023","unstructured":"Ruan J, Chen Y, Zhang B, Xu Z, Bao T, Du G, Shi S, Mao H, Zeng X, Zhao R. TPTU: task planning and tool usage of large language modelbased AI agents. 2023, arXiv preprint arXiv: 2308.03427v1"},{"key":"40678_CR95","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Y Zhuang","year":"2024","unstructured":"Zhuang Y, Chen X, Yu T, Mitra S, Bursztyn V S, Rossi R A, Sarkhel S, Zhang C. ToolChain*: efficient action space navigation in large language models with a* search. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR96","volume-title":"ControlLLM: augment language models with tools by searching on graphs","author":"Z Liu","year":"2023","unstructured":"Liu Z, Lai Z, Gao Z, Cui E, Li Z, Zhu X, Lu L, Chen Q, Qiao Y, Dai J, Wang W. ControlLLM: augment language models with tools by searching on graphs. 2023, arXiv preprint arXiv: 2310.17796"},{"key":"40678_CR97","first-page":"11160","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","author":"Y Chen","year":"2024","unstructured":"Chen Y, Lv A, Lin T E, Chen C, Wu Y, Huang F, Li Y, Yan R. Fortify the shortest stave in attention: enhancing context awareness of large language models for effective tool use. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024, 11160\u201311174"},{"key":"40678_CR98","first-page":"975","volume-title":"Proceedings of Findings of the Association for Computational Linguistics","author":"T Huang","year":"2024","unstructured":"Huang T, Jung D, Kumar V, Kachuee M, Li X, Xu P, Chen M. Planning and editing what you retrieve for enhanced tool learning. In: Proceedings of Findings of the Association for Computational Linguistics. 2024, 975\u2013988"},{"key":"40678_CR99","volume-title":"Chain of tools: large language model is an automatic multi-tool learner","author":"Z Shi","year":"2024","unstructured":"Shi Z, Gao S, Chen X, Feng Y, Yan L, Shi H, Yin D, Chen Z, Verberne S, Ren Z. Chain of tools: large language model is an automatic multi-tool learner. 2024, arXiv preprint arXiv: 2405.16533v1"},{"key":"40678_CR100","volume-title":"Can graph learning improve task planning?","author":"X Wu","year":"2024","unstructured":"Wu X, Shen Y, Shan C, Song K, Wang S, Zhang B, Feng J, Cheng H, Chen W, Xiong Y, Li D. Can graph learning improve task planning? 2024, arXiv preprint arXiv: 2405.19119v1"},{"key":"40678_CR101","volume-title":"From summary to action: enhancing large language models for complex tasks with open world APIs","author":"Y Liu","year":"2024","unstructured":"Liu Y, Yuan Y, Wang C, Han J, Ma Y, Zhang L, Zheng N, Xu H. From summary to action: enhancing large language models for complex tasks with open world APIs. 2024, arXiv preprint arXiv: 2402.18157"},{"key":"40678_CR102","doi-asserted-by":"publisher","first-page":"9039","DOI":"10.18653\/v1\/2024.findings-acl.536","volume-title":"Proceedings of Findings of the Association for Computational Linguistics ACL 2024","author":"Y Zheng","year":"2024","unstructured":"Zheng Y, Li P, Yan M, Zhang J, Huang F, Liu Y. Budget-constrained tool learning with planning. In: Proceedings of Findings of the Association for Computational Linguistics ACL 2024. 2024, 9039\u20139052"},{"key":"40678_CR103","volume-title":"From exploration to mastery: enabling LLMs to master tools via self-driven interactions","author":"C Qu","year":"2024","unstructured":"Qu C, Dai S, Wei X, Cai H, Wang S, Yin D, Xu J, Wen J R. From exploration to mastery: enabling LLMs to master tools via self-driven interactions. 2024, arXiv preprint arXiv: 2410.08197"},{"key":"40678_CR104","doi-asserted-by":"publisher","first-page":"0063","DOI":"10.34133\/icomputing.0063","volume":"3","author":"Y Liang","year":"2024","unstructured":"Liang Y, Wu C, Song T, Wu W, Xia Y, Liu Y, Ou Y, Lu S, Ji L, Mao S, Wang Y, Shou L, Gong M, Duan N. Taskmatrix. AI: Completing tasks by connecting foundation models with millions of APIs. Intelligent Computing, 2024, 3: 0063","journal-title":"Intelligent Computing"},{"key":"40678_CR105","first-page":"831","volume-title":"Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"C Qian","year":"2024","unstructured":"Qian C, Xiong C, Liu Z, Liu Z. Toolink: linking toolkit creation and using through chain-of-solving on open-source model. In: Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. 2024, 831\u2013854"},{"key":"40678_CR106","volume-title":"TPTU-v2: boosting task planning and tool usage of large language model-based agents in real-world systems","author":"Y Kong","year":"2023","unstructured":"Kong Y, Ruan J, Chen Y, Zhang B, Bao T, Shi S, Du G, Hu X, Mao H, Li Z, Zeng X, Zhao R. TPTU-v2: boosting task planning and tool usage of large language model-based agents in real-world systems. 2023, arXiv preprint arXiv: 2311.11315"},{"key":"40678_CR107","volume-title":"Small LLMs are weak tool learners: a multi-LLM agent","author":"W Shen","year":"2024","unstructured":"Shen W, Li C, Chen H, Yan M, Quan X, Chen H, Zhang J, Huang F. Small LLMs are weak tool learners: a multi-LLM agent. 2024, arXiv preprint arXiv: 2401.07324"},{"key":"40678_CR108","volume-title":"Efficient tool use with chain-of-abstraction reasoning","author":"S Gao","year":"2024","unstructured":"Gao S, Dwivedi-Yu J, Yu P, Tan X E, Pasunuru R, Golovneva O, Sinha K, Celikyilmaz A, Bosselut A, Wang T. Efficient tool use with chain-of-abstraction reasoning. 2024, arXiv preprint arXiv: 2401.17464"},{"key":"40678_CR109","volume-title":"Look before you leap: towards decision-aware and generalizable tool-usage for large language models","author":"A Gui","year":"2024","unstructured":"Gui A, Li J, Dai Y, Du N, Xiao H. Look before you leap: towards decision-aware and generalizable tool-usage for large language models. 2024, arXiv preprint arXiv: 2402.16696"},{"key":"40678_CR110","first-page":"36","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"Y Ge","year":"2023","unstructured":"Ge Y, Hua W, Mei K, Tan J, Xu S, Li Z, Zhang Y. OpenAGI: when LLM meets domain experts. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2023, 36"},{"key":"40678_CR111","volume-title":"A solution-based LLM API-using methodology for academic information seeking","author":"Y Wang","year":"2024","unstructured":"Wang Y, Yu J, Yao Z, Zhang J, Xie Y, Tu S, Fu Y, Feng Y, Zhang J, Zhang J, Huang B, Li Y, Yuan H, Hou L, Li J, Tang J. A solution-based LLM API-using methodology for academic information seeking. 2024, arXiv preprint arXiv: 2405.15165"},{"key":"40678_CR112","volume-title":"Advancing tool-augmented large language models: integrating insights from errors in inference trees","author":"S Chen","year":"2024","unstructured":"Chen S, Wang Y, Wu Y F, Chen Q G, Xu Z, Luo W, Zhang K, Zhang L. Advancing tool-augmented large language models: integrating insights from errors in inference trees. 2024, arXiv preprint arXiv: 2406.07115"},{"key":"40678_CR113","volume-title":"APIGen: automated pipeline for generating verifiable and diverse function-calling datasets","author":"Z Liu","year":"2024","unstructured":"Liu Z, Hoang T, Zhang J, Zhu M, Lan T, Kokane S, Tan J, Yao W, Liu Z, Feng Y, Murthy R, Yang L, Savarese S, Niebles J C, Wang H, Heinecke S, Xiong C. APIGen: automated pipeline for generating verifiable and diverse function-calling datasets. 2024, arXiv preprint arXiv: 2406.18518"},{"issue":"1","key":"40678_CR114","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1108\/eb026526","volume":"28","author":"K Sparck Jones","year":"1972","unstructured":"Sparck Jones K. A statistical interpretation of term specificity and its application in retrieval. Journal of Documentation, 1972, 28(1): 11\u201321","journal-title":"Journal of Documentation"},{"issue":"4","key":"40678_CR115","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1561\/1500000019","volume":"3","author":"S Robertson","year":"2009","unstructured":"Robertson S, Zaragoza H. The probabilistic relevance framework: Bm25 and beyond. Foundations and Trends\u00ae in Information Retrieval, 2009, 3(4): 333\u2013389","journal-title":"Foundations and Trends\u00ae in Information Retrieval"},{"key":"40678_CR116","first-page":"3982","volume-title":"Proceedings of 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing","author":"N Reimers","year":"2019","unstructured":"Reimers N, Gurevych I. Sentence-BERT: sentence embeddings using Siamese BERT-networks. In: Proceedings of 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing. 2019, 3982\u20133992"},{"key":"40678_CR117","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"L Xiong","year":"2021","unstructured":"Xiong L, Xiong C, Li Y, Tang K F, Liu J, Bennett P N, Ahmed J, Overwijk A. Approximate nearest neighbor negative contrastive learning for dense text retrieval. In: Proceedings of the 9th International Conference on Learning Representations. 2021"},{"key":"40678_CR118","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1145\/3404835.3462891","volume-title":"Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"S Hofst\u00e4tter","year":"2021","unstructured":"Hofst\u00e4tter S, Lin S C, Yang J H, Lin J, Hanbury A. Efficiently teaching an effective dense retriever with balanced topic aware sampling. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval. 2021, 113\u2013122"},{"key":"40678_CR119","volume-title":"Transactions on Machine Learning Research","author":"G Izacard","year":"2022","unstructured":"Izacard G, Caron M, Hosseini L, Riedel S, Bojanowski P, Joulin A, Grave E. Unsupervised dense information retrieval with contrastive learning. Transactions on Machine Learning Research, 2022, ISSN: 2385-8856"},{"key":"40678_CR120","first-page":"2843","volume-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics","author":"L Gao","year":"2022","unstructured":"Gao L, Callan J. Unsupervised corpus aware language model pretraining for dense passage retrieval. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics. 2022, 2843\u20132853"},{"key":"40678_CR121","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"L Yuan","year":"2024","unstructured":"Yuan L, Chen Y, Wang X, Fung Y R, Peng H, Ji H. CRAFT: customizing LLMs by creating and retrieving from specialized toolsets. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR122","volume-title":"ProTIP: progressive tool retrieval improves planning","author":"R Anantha","year":"2023","unstructured":"Anantha R, Bandyopadhyay B, Kashi A, Mahinder S, Hill A W, Chappidi S. ProTIP: progressive tool retrieval improves planning. 2023, arXiv preprint arXiv: 2312.10332"},{"key":"40678_CR123","first-page":"16263","volume-title":"Proceedings of 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation","author":"Y Zheng","year":"2024","unstructured":"Zheng Y, Li P, Liu W, Liu Y, Luan J, Wang B. ToolRerank: adaptive and hierarchy-aware reranking for tool retrieval. In: Proceedings of 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation. 2024, 16263\u201316273"},{"key":"40678_CR124","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.1145\/3627673.3679847","volume-title":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","author":"C Qu","year":"2024","unstructured":"Qu C, Dai S, Wei X, Cai H, Wang S, Yin D, Xu J, Wen J R. Towards completeness-oriented tool retrieval for large language models. In: Proceedings of the 33rd ACM International Conference on Information and Knowledge Management. 2024, 1930\u20131940"},{"key":"40678_CR125","doi-asserted-by":"publisher","first-page":"14777","DOI":"10.18653\/v1\/2023.findings-emnlp.985","volume-title":"Proceedings of Findings of the Association for Computational Linguistics: EMNLP 2023","author":"Z Chen","year":"2023","unstructured":"Chen Z, Zhou K, Zhang B, Gong Z, Zhao X, Wen J R. ChatCoT: tool-augmented chain-of-thought reasoning on chat-based large language models. In: Proceedings of Findings of the Association for Computational Linguistics: EMNLP 2023, 2023, 14777\u201314790"},{"key":"40678_CR126","volume-title":"ToolNet: connecting large language models with massive tools via tool graph","author":"X Liu","year":"2024","unstructured":"Liu X, Peng Z, Yi X, Xie X, Xiang L, Liu Y, Xu D. ToolNet: connecting large language models with massive tools via tool graph. 2024, arXiv preprint arXiv: 2403.00839"},{"key":"40678_CR127","volume-title":"TOOLVERIFIER: generalization to new tools via self-verification","author":"D Mekala","year":"2024","unstructured":"Mekala D, Weston J, Lanchantin J, Raileanu R, Lomeli M, Shang J, Dwivedi-Yu J. TOOLVERIFIER: generalization to new tools via self-verification. 2024, arXiv preprint arXiv: 2402.14158"},{"key":"40678_CR128","first-page":"3550","volume-title":"Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics","author":"S Qiao","year":"2024","unstructured":"Qiao S, Gui H, Lv C, Jia Q, Chen H, Zhang N. Making language models better tool learners with execution feedback. In: Proceedings of 2024 Conference of the North American Chapter of the Association for Computational Linguistics. 2024, 3550\u20133568"},{"key":"40678_CR129","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Y Du","year":"2024","unstructured":"Du Y, Wei F, Zhang H. AnyTool: self-reflective, hierarchical agents for large-scale API calls. In: Proceedings of the 41st International Conference on Machine Learning. 2024"},{"key":"40678_CR130","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1145\/3649476.3658784","volume-title":"Proceedings of Great Lakes Symposium on VLSI 2024","author":"M Fore","year":"2024","unstructured":"Fore M, Singh S, Stamoulis D. GeckOpt: LLM system efficiency via intent-based tool selection. In: Proceedings of Great Lakes Symposium on VLSI 2024. 2024, 353\u2013354"},{"key":"40678_CR131","first-page":"302","volume-title":"Proceedings of Findings of the Association for Computational Linguistics","author":"Y Zhang","year":"2024","unstructured":"Zhang Y, Cai H, Song X, Chen Y, Sun R, Zheng J. Reverse chain: a generic-rule for LLMs to master multi-API planning. In: Proceedings of Findings of the Association for Computational Linguistics. 2024, 302\u2013325"},{"key":"40678_CR132","volume-title":"EASYTOOL: enhancing LLM-based agents with concise tool instruction","author":"S Yuan","year":"2024","unstructured":"Yuan S, Song K, Chen J, Tan X, Shen Y, Kan R, Li D, Yang D. EASYTOOL: enhancing LLM-based agents with concise tool instruction. 2024, arXiv preprint arXiv: 2401.06201"},{"key":"40678_CR133","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.624","volume-title":"Learning to use tools via cooperative and interactive agents","author":"Z Shi","year":"2024","unstructured":"Shi Z, Gao S, Chen X, Feng Y, Yan L, Shi H, Yin D, Ren P, Verberne S, Ren Z. Learning to use tools via cooperative and interactive agents. 2024, arXiv preprint arXiv: 2403.03031v4"},{"key":"40678_CR134","first-page":"36","volume-title":"Proceedings of the 37th Conference on Neural Information Processing Systems","author":"R Yang","year":"2023","unstructured":"Yang R, Song L, Li Y, Zhao S, Ge Y, Li X, Shan Y. GPT4Tools: teaching large language model to use tools via self-instruction. In: Proceedings of the 37th Conference on Neural Information Processing Systems. 2023, 36"},{"key":"40678_CR135","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"L Li","year":"2024","unstructured":"Li L, Chai Y, Wang S, Sun Y, Tian H, Zhang N, Wu H. Tool-augmented reward modeling. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR136","first-page":"10583","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","author":"B Wang","year":"2024","unstructured":"Wang B, Fang H, Eisner J, Van Durme B, Su Y. LLMs in the imaginarium: tool learning through simulated trial and error. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024, 10583\u201310604"},{"key":"40678_CR137","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"F Xu","year":"2024","unstructured":"Xu F, Shi W, Choi E. RECOMP: improving retrieval-augmented LMS with compression and selective augmentation. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR138","volume-title":"ToolEyes: fine-grained evaluation for tool learning capabilities of large language models in real-world scenarios","author":"J Ye","year":"2024","unstructured":"Ye J, Li G, Gao S, Huang C, Wu Y, Li S, Fan X, Dou S, Zhang Q, Gui T, Huang X. ToolEyes: fine-grained evaluation for tool learning capabilities of large language models in real-world scenarios. 2024, arXiv preprint arXiv: 2401.00741"},{"key":"40678_CR139","doi-asserted-by":"publisher","first-page":"4363","DOI":"10.18653\/v1\/2024.findings-acl.259","volume-title":"Proceedings of Findings of the Association for Computational Linguistics ACL 2024","author":"S Huang","year":"2024","unstructured":"Huang S, Zhong W, Lu J, Zhu Q, Gao J, Liu W, Hou Y, Zeng X, Wang Y, Shang L, Jiang X, Xu R, Liu Q. Planning, creation, usage: benchmarking LLMs for comprehensive tool utilization in real-world complex scenarios. In: Proceedings of Findings of the Association for Computational Linguistics ACL 2024. 2024, 4363\u20134400"},{"key":"40678_CR140","volume-title":"Seal-tools: self-instruct tool learning dataset for agent tuning and detailed benchmark","author":"M Wu","year":"2024","unstructured":"Wu M, Zhu T, Han H, Tan C, Zhang X, Chen W. Seal-tools: self-instruct tool learning dataset for agent tuning and detailed benchmark. 2024, arXiv preprint arXiv: 2405.08355v1"},{"key":"40678_CR141","first-page":"12859","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","author":"K Basu","year":"2024","unstructured":"Basu K, Abdelaziz I, Chaudhury S, Dan S, Crouse M, Munawar A, Austel V, Kumaravel S, Muthusamy V, Kapanipathi P, Lastras L A. API-BLEND: a comprehensive corpora for training and benchmarking API LLMs. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024, 12859\u201312870"},{"key":"40678_CR142","volume-title":"ShortcutsBench: a large-scale real-world benchmark for API-based agents","author":"H Shen","year":"2024","unstructured":"Shen H, Li Y, Meng D, Cai D, Qi S, Zhang L, Xu M, Ma Y. ShortcutsBench: a large-scale real-world benchmark for API-based agents. 2024, arXiv preprint arXiv: 2407.00132v2"},{"key":"40678_CR143","volume-title":"GTA: a benchmark for general tool agents","author":"J Wang","year":"2024","unstructured":"Wang J, Ma Z, Li Y, Zhang S, Chen C, Chen K, Le X. GTA: a benchmark for general tool agents. 2024, arXiv preprint arXiv: 2407.08713"},{"key":"40678_CR144","volume-title":"WTU-EVAL: a whether-or-not tool usage evaluation benchmark for large language models","author":"K Ning","year":"2024","unstructured":"Ning K, Su Y, Lv X, Zhang Y, Liu J, Liu K, Xu J. WTU-EVAL: a whether-or-not tool usage evaluation benchmark for large language models. 2024, arXiv preprint arXiv: 2407.12823"},{"key":"40678_CR145","first-page":"16022","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","author":"H Trivedi","year":"2024","unstructured":"Trivedi H, Khot T, Hartmann M, Manku R, Dong V, Li E, Gupta S, Sabharwal A, Balasubramanian N. AppWorld: a controllable world of apps and people for benchmarking interactive coding agents. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024, 16022\u201316076"},{"key":"40678_CR146","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Y Ruan","year":"2024","unstructured":"Ruan Y, Dong H, Wang A, Pitis S, Zhou Y, Ba J, Dubois Y, Maddison C J, Hashimoto T. Identifying the risks of lm agents with an lmemulated sandbox. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR147","volume-title":"ToolTalk: evaluating tool-usage in a conversational setting","author":"N Farn","year":"2023","unstructured":"Farn N, Shin R. ToolTalk: evaluating tool-usage in a conversational setting. 2023, arXiv preprint arXiv: 2311.10775"},{"key":"40678_CR148","volume-title":"VioTGPT: learning to schedule vision tools towards intelligent video internet of things","author":"Y Zhong","year":"2023","unstructured":"Zhong Y, Qi M, Wang R, Qiu Y, Zhang Y, Ma H. VioTGPT: learning to schedule vision tools towards intelligent video internet of things. 2023, arXiv preprint arXiv: 2312.00401"},{"key":"40678_CR149","volume-title":"RoTBench: a multi-level benchmark for evaluating the robustness of large language models in tool learning","author":"J Ye","year":"2024","unstructured":"Ye J, Wu Y, Gao S, Huang C, Li S, Li G, Fan X, Zhang Q, Gui T, Huang X. RoTBench: a multi-level benchmark for evaluating the robustness of large language models in tool learning. 2024, arXiv preprint arXiv: 2401.08326"},{"key":"40678_CR150","first-page":"2181","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics","author":"J Ye","year":"2024","unstructured":"Ye J, Li S, Li G, Huang C, Gao S, Wu Y, Zhang Q, Gui T, Huang X. ToolSword: unveiling safety issues of large language models in tool learning across three stages. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. 2024, 2181\u20132211"},{"key":"40678_CR151","volume-title":"SciAgent: Tool-augmented language models for scientific reasoning","author":"Y Ma","year":"2024","unstructured":"Ma Y, Gou Z, Hao J, Xu R, Wang S, Pan L, Yang Y, Cao Y, Sun A, Awadalla H, Chen W. SciAgent: Tool-augmented language models for scientific reasoning. 2024, arXiv preprint arXiv: 2402.11451"},{"key":"40678_CR152","doi-asserted-by":"publisher","first-page":"11143","DOI":"10.18653\/v1\/2024.findings-acl.664","volume-title":"Proceedings of Findings of the Association for Computational Linguistics ACL 2024","author":"Z Guo","year":"2024","unstructured":"Guo Z, Cheng S, Wang H, Liang S, Qin Y, Li P, Liu Z, Sun M, Liu Y. StableToolBench: towards stable large-scale benchmarking on tool learning of large language models. In: Proceedings of Findings of the Association for Computational Linguistics ACL 2024. 2024, 11143\u201311156"},{"key":"40678_CR153","doi-asserted-by":"publisher","first-page":"10471","DOI":"10.18653\/v1\/2024.findings-acl.624","volume-title":"Proceedings of Findings of the Association for Computational Linguistics ACL 2024","author":"Q Zhan","year":"2024","unstructured":"Zhan Q, Liang Z, Ying Z, Kang D. InjecAgent: benchmarking indirect prompt injections in tool-integrated large language model agents. In: Proceedings of Findings of the Association for Computational Linguistics ACL 2024. 2024, 10471\u201310506"},{"key":"40678_CR154","doi-asserted-by":"publisher","first-page":"15711","DOI":"10.18653\/v1\/2024.findings-acl.928","volume-title":"Proceedings of Findings of the Association for Computational Linguistics ACL 2024","author":"Z Guo","year":"2024","unstructured":"Guo Z, Huang Y, Xiong D. CToolEval: a Chinese benchmark for LLM-powered agent evaluation in real-world API interactions. In: Proceedings of Findings of the Association for Computational Linguistics ACL 2024. 2024, 15711\u201315724"},{"key":"40678_CR155","volume-title":"ToolSandbox: a stateful, conversational, interactive evaluation benchmark for LLM tool use capabilities","author":"J Lu","year":"2024","unstructured":"Lu J, Holleis T, Zhang Y, Aumayer B, Nan F, Bai F, Ma S, Ma S, Li M, Yin G, Wang Z, Pang R. ToolSandbox: a stateful, conversational, interactive evaluation benchmark for LLM tool use capabilities. 2024, arXiv preprint arXiv: 2408.04682"},{"key":"40678_CR156","first-page":"6","volume-title":"University of Waterloo, Dissertation","author":"M Zhu","year":"2004","unstructured":"Zhu M. Recall, precision and average precision. University of Waterloo, Dissertation, 2004, 6"},{"issue":"4","key":"40678_CR157","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1145\/582415.582418","volume":"20","author":"K J\u00e4rvelin","year":"2002","unstructured":"J\u00e4rvelin K, Kek\u00e4l\u00e4inen J. Cumulated gain-based evaluation of IR techniques. ACM Transactions on Information Systems (TOIS), 2002, 20(4): 422\u2013446","journal-title":"ACM Transactions on Information Systems (TOIS)"},{"key":"40678_CR158","first-page":"311","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","author":"K Papineni","year":"2002","unstructured":"Papineni K, Roukos S, Ward T, Zhu W J. Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. 2002, 311\u2013318"},{"key":"40678_CR159","first-page":"74","volume-title":"Proceedings of Text Summarization Branches Out","author":"C Y Lin","year":"2004","unstructured":"Lin C Y. ROUGE: a package for automatic evaluation of summaries. In: Proceedings of Text Summarization Branches Out. 2004, 74\u201381"},{"issue":"4","key":"40678_CR160","doi-asserted-by":"publisher","first-page":"524","DOI":"10.1177\/1536867X0900900402","volume":"9","author":"M Blackwell","year":"2009","unstructured":"Blackwell M, Iacus S, King G, Porro G. CEM: coarsened exact matching in Stata. The Stata Journal: Promoting communications on statistics and Stata, 2009, 9(4): 524\u2013546","journal-title":"The Stata Journal: Promoting communications on statistics and Stata"},{"key":"40678_CR161","first-page":"2011","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X, Almeida D, Wainwright C, Mishkin P, Zhang C, Agarwal S, Slama K, Ray A, Schulman J, Hilton J, Kelton F, Miller L, Simens M, Askell A, Welinder P, Christiano P, Leike J, Lowe R. Training language models to follow instructions with human feedback. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 2011"},{"key":"40678_CR162","volume-title":"Investigating the effectiveness of chatGPT in mathematical reasoning and problem solving: evidence from the Vietnamese national high school graduation examination","author":"X Q Dao","year":"2023","unstructured":"Dao X Q, Le N B. Investigating the effectiveness of chatGPT in mathematical reasoning and problem solving: evidence from the Vietnamese national high school graduation examination. 2023, arXiv preprint arXiv: 2306.06331"},{"key":"40678_CR163","volume-title":"CMATH: can your language model pass Chinese elementary school math test?","author":"T Wei","year":"2023","unstructured":"Wei T, Luan J, Liu W, Dong S, Wang B. CMATH: can your language model pass Chinese elementary school math test? 2023, arXiv preprint arXiv: 2306.16636"},{"key":"40678_CR164","volume-title":"Evaluating large language models trained on code","author":"M Chen","year":"2021","unstructured":"Chen M, Tworek J, Jun H, Yuan Q, de Oliveira Pinto H P, et al. Evaluating large language models trained on code. 2021, arXiv preprint arXiv: 2107.03374"},{"key":"40678_CR165","volume-title":"Program synthesis with large language models","author":"J Austin","year":"2021","unstructured":"Austin J, Odena A, Nye M, Bosma M, Michalewski H, Dohan D, Jiang E, Cai C, Terry M, Le Q, Sutton C. Program synthesis with large language models. 2021, arXiv preprint arXiv: 2108.07732"},{"key":"40678_CR166","volume-title":"Evaluating tool-augmented agents in remote sensing platforms","author":"S Singh","year":"2024","unstructured":"Singh S, Fore M, Stamoulis D. Evaluating tool-augmented agents in remote sensing platforms. 2024, arXiv preprint arXiv: 2405.00709v1"},{"issue":"1","key":"40678_CR167","doi-asserted-by":"publisher","first-page":"18","DOI":"10.3390\/e23010018","volume":"23","author":"P Linardatos","year":"2021","unstructured":"Linardatos P, Papastefanopoulos V, Kotsiantis S. Explainable AI: a review of machine learning interpretability methods. Entropy, 2021, 23(1): 18","journal-title":"Entropy"},{"issue":"2","key":"40678_CR168","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/3639372","volume":"15","author":"H Zhao","year":"2024","unstructured":"Zhao H, Chen H, Yang F, Liu N, Deng H, Cai H, Wang S, Yin D, Du M. Explainability for large language models: a survey. ACM Transactions on Intelligent Systems and Technology, 2024, 15(2): 20","journal-title":"ACM Transactions on Intelligent Systems and Technology"},{"key":"40678_CR169","volume-title":"Ethical and social risks of harm from language models","author":"L Weidinger","year":"2021","unstructured":"Weidinger L, Mellor J, Rauh M, Griffin C, Uesato J, et al. Ethical and social risks of harm from language models. 2021, arXiv preprint arXiv: 2112.04359"},{"key":"40678_CR170","doi-asserted-by":"publisher","first-page":"6465","DOI":"10.18653\/v1\/2023.emnlp-main.398","volume-title":"Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing","author":"T Gao","year":"2023","unstructured":"Gao T, Yen H, Yu J, Chen D. Enabling large language models to generate text with citations. In: Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing. 2023, 6465\u20136488"},{"key":"40678_CR171","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.469","volume-title":"Towards verifiable text generation with evolving memory and self-reflection","author":"H Sun","year":"2024","unstructured":"Sun H, Cai H, Wang B, Hou Y, Wei X, Wang S, Zhang Y, Yin D. Towards verifiable text generation with evolving memory and self-reflection. 2024, arXiv preprint arXiv: 2312.09075"},{"key":"40678_CR172","first-page":"2153","volume-title":"Proceedings of 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing","author":"E Wallace","year":"2019","unstructured":"Wallace E, Feng S, Kandpal N, Gardner M, Singh S. Universal adversarial triggers for attacking and analyzing NLP. In: Proceedings of 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing. 2019, 2153\u20132162"},{"key":"40678_CR173","first-page":"8018","volume-title":"Proceedings of the 34th AAAI Conference on Artificial Intelligence","author":"D Jin","year":"2020","unstructured":"Jin D, Jin Z, Zhou J T, Szolovits P. Is Bert really robust? A strong baseline for natural language attack on text classification and entailment. In: Proceedings of the 34th AAAI Conference on Artificial Intelligence. 2020, 8018\u20138025"},{"key":"40678_CR174","volume-title":"A new era in LLM security: exploring security concerns in real-world LLM-based systems","author":"F Wu","year":"2024","unstructured":"Wu F, Zhang N, Jha S, McDaniel P, Xiao C. A new era in LLM security: exploring security concerns in real-world LLM-based systems. 2024, arXiv preprint arXiv: 2402.18649"},{"key":"40678_CR175","volume-title":"Graph-toolFormer: to empower LLMs with graph reasoning ability via prompt augmented by chatGPT","author":"J Zhang","year":"2023","unstructured":"Zhang J. Graph-toolFormer: to empower LLMs with graph reasoning ability via prompt augmented by chatGPT. 2023, arXiv preprint arXiv: 2304.11116"},{"key":"40678_CR176","volume-title":"STRIDE: a tool-assisted LLM agent framework for strategic and interactive decision-making","author":"C Li","year":"2024","unstructured":"Li C, Yang R, Li T, Bafarassat M, Sharifi K, Bergemann D, Yang Z. STRIDE: a tool-assisted LLM agent framework for strategic and interactive decision-making. 2024, arXiv preprint arXiv: 2405.16376"},{"key":"40678_CR177","first-page":"9118","volume-title":"Proceedings of the 39th International Conference on Machine Learning","author":"W Huang","year":"2022","unstructured":"Huang W, Abbeel P, Pathak D, Mordatch I. Language models as zero-shot planners: extracting actionable knowledge for embodied agents. In: Proceedings of the 39th International Conference on Machine Learning. 2022, 9118\u20139147"},{"key":"40678_CR178","volume-title":"FacTool: factuality detection in generative AI\u2013a tool augmented framework for multi-task and multi-domain scenarios","author":"I C Chern","year":"2023","unstructured":"Chern I C, Chern S, Chen S, Yuan W, Feng K, Zhou C, He J, Neubig G, Liu P. FacTool: factuality detection in generative AI\u2013a tool augmented framework for multi-task and multi-domain scenarios. 2023, arXiv preprint arXiv: 2307.13528"},{"key":"40678_CR179","doi-asserted-by":"publisher","first-page":"1362","DOI":"10.1145\/3589334.3645363","volume-title":"Proceedings of ACM Web Conference 2024","author":"S Xu","year":"2024","unstructured":"Xu S, Pang L, Shen H, Cheng X, Chua T S. Search-in-the-chain: interactively enhancing large language models with search for knowledge-intensive tasks. In: Proceedings of ACM Web Conference 2024. 2024, 1362\u20131373"},{"key":"40678_CR180","first-page":"1723","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"G Kim","year":"2024","unstructured":"Kim G, Baldi P, McAleer S. Language models can solve computer tasks. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. 2024, 1723"},{"key":"40678_CR181","volume-title":"Tool-planner: dynamic solution tree planning for large language model with tool clustering","author":"Y Liu","year":"2024","unstructured":"Liu Y, Peng X, Zhang Y, Cao J, Zhang X, Cheng S, Wang X, Yin J, Du T. Tool-planner: dynamic solution tree planning for large language model with tool clustering. 2024, arXiv preprint arXiv: 2406.03807v1"},{"key":"40678_CR182","first-page":"393","volume-title":"Proceedings of the 46th European Conference on Information Retrieval","author":"P Erbacher","year":"2024","unstructured":"Erbacher P, Falissard L, Guigue V, Soulier L. Navigating uncertainty: optimizing API dependency for hallucination reduction in closed-book QA. In: Proceedings of the 46th European Conference on Information Retrieval. 2024, 393\u2013402"},{"key":"40678_CR183","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.561","volume-title":"Enhancing tool retrieval with iterative feedback from large language models","author":"Q Xu","year":"2024","unstructured":"Xu Q, Li Y, Xia H, Li W. Enhancing tool retrieval with iterative feedback from large language models. 2024, arXiv preprint arXiv: 2406.17465"},{"key":"40678_CR184","volume-title":"C-pack: Pack-aged resources to advance general chinese embedding","author":"S Xiao","year":"2023","unstructured":"Xiao S, Liu Z, Zhang P, Muennighoff N. C-pack: Pack-aged resources to advance general chinese embedding, 2023"},{"key":"40678_CR185","volume-title":"Gemini: a family of highly capable multimodal models","author":"Gemini Team Google","year":"2024","unstructured":"Gemini Team Google. Gemini: a family of highly capable multimodal models. 2024, arXiv preprint arXiv: 2312.11805"},{"key":"40678_CR186","volume-title":"Tool documentation enables zero-shot tool-usage with large language models","author":"C Y Hsieh","year":"2023","unstructured":"Hsieh C Y, Chen S A, Li C L, Fujii Y, Ratner A, Lee C Y, Krishna R, Pfister T. Tool documentation enables zero-shot tool-usage with large language models. 2023, arXiv preprint arXiv: 2308.00675"},{"key":"40678_CR187","doi-asserted-by":"publisher","first-page":"16430","DOI":"10.18653\/v1\/2024.findings-acl.974","volume-title":"Proceedings of Findings of the Association for Computational Linguistics ACL 2024","author":"Y Xu","year":"2024","unstructured":"Xu Y, Feng Y, Mu H, Hou Y, Li Y, Wang X, Zhong W, Li Z, Tu D, Zhu Q, Zhang M, Che W. Concise and precise context compression for tool-using language models. In: Proceedings of Findings of the Association for Computational Linguistics ACL 2024. 2024, 16430\u201316441"},{"key":"40678_CR188","volume-title":"TaskBench: benchmarking large language models for task automation","author":"Y Shen","year":"2023","unstructured":"Shen Y, Song K, Tan X, Zhang W, Ren K, Yuan S, Lu W, Li D, Zhuang Y. TaskBench: benchmarking large language models for task automation. 2023, arXiv preprint arXiv: 2311.18760"},{"key":"40678_CR189","volume-title":"TPE: towards better compositional reasoning over conceptual tools with multi-persona collaboration","author":"H Wang","year":"2023","unstructured":"Wang H, Wang H, Wang L, Hu M, Wang R, Xue B, Lu H, Mi F, Wong K F. TPE: towards better compositional reasoning over conceptual tools with multi-persona collaboration. 2023, arXiv preprint arXiv: 2309.16090"},{"key":"40678_CR190","doi-asserted-by":"publisher","first-page":"6922","DOI":"10.18653\/v1\/2023.findings-emnlp.462","volume-title":"Proceedings of Findings of Association for Computational Linguistics: EMNLP 2023","author":"C Qian","year":"2023","unstructured":"Qian C, Han C, Fung Y, Qin Y, Liu Z, Ji H. CREATOR: tool creation for disentangling abstract and concrete reasoning of large language models. In: Proceedings of Findings of Association for Computational Linguistics: EMNLP 2023. 2023, 6922\u20136939"},{"key":"40678_CR191","doi-asserted-by":"publisher","first-page":"13856","DOI":"10.18653\/v1\/2023.findings-emnlp.926","volume-title":"Proceedings of Findings of Findings of the Association for Computational Linguistics: EMNLP 2023","author":"A Jacovi","year":"2023","unstructured":"Jacovi A, Caciularu A, Herzig J, Aharoni R, Bohnet B, Geva M. A comprehensive evaluation of tool-assisted generation strategies. In: Proceedings of Findings of Findings of the Association for Computational Linguistics: EMNLP 2023. 2023, 13856\u201313878"},{"key":"40678_CR192","doi-asserted-by":"publisher","first-page":"6591","DOI":"10.18653\/v1\/2023.emnlp-main.407","volume-title":"Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing","author":"D Nathani","year":"2023","unstructured":"Nathani D, Wang D, Pan L, Wang W Y. MAF: multi-aspect feedback for improving reasoning in large language models. In: Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing. 2023, 6591\u20136616"},{"key":"40678_CR193","doi-asserted-by":"publisher","first-page":"100211","DOI":"10.1016\/j.hcc.2024.100211","volume":"4","author":"Y Yao","year":"2024","unstructured":"Yao Y, Duan J, Xu K, Cai Y, Sun Z, Zhang Y. A survey on large language model (LLM) security and privacy: the good, the bad, and the ugly. High-Confidence Computing, 2024, 4: 100211","journal-title":"High-Confidence Computing"},{"key":"40678_CR194","volume-title":"Risk taxonomy, mitigation, and assessment benchmarks of large language model systems","author":"T Cui","year":"2024","unstructured":"Cui T, Wang Y, Fu C, Xiao Y, Li S, Deng X, Liu Y, Zhang Q, Qiu Z, Li P, Tan Z, Xiong J, Kong X, Wen Z, Xu K, Li Q. Risk taxonomy, mitigation, and assessment benchmarks of large language model systems. 2024, arXiv preprint arXiv: 2401.05778"},{"key":"40678_CR195","volume-title":"Security and privacy challenges of large language models: a survey","author":"B C Das","year":"2024","unstructured":"Das B C, Amini M H, Wu Y. Security and privacy challenges of large language models: a survey. 2024, arXiv preprint arXiv: 2402.00888"},{"key":"40678_CR196","first-page":"8968","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics","author":"Y Qin","year":"2023","unstructured":"Qin Y, Cai Z, Jin D, Yan L, Liang S, Zhu K, Lin Y, Han X, Ding N, Wang H, Xie R, Qi F, Liu Z, Sun M, Zhou J. WebCPM: interactive web search for Chinese long-form question answering. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics. 2023, 8968\u20138988"},{"key":"40678_CR197","volume-title":"Towards efficient generative large language model serving: a survey from algorithms to systems","author":"X Miao","year":"2023","unstructured":"Miao X, Oliaro G, Zhang Z, Cheng X, Jin H, Chen T, Jia Z. Towards efficient generative large language model serving: a survey from algorithms to systems. 2023, arXiv preprint arXiv: 2312.15234"},{"key":"40678_CR198","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"T Cai","year":"2024","unstructured":"Cai T, Wang X, Ma T, Chen X, Zhou D. Large language models as tool makers. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"40678_CR199","first-page":"51177","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Z Wang","year":"2024","unstructured":"Wang Z, Neubig G, Fried D. TroVE: inducing verifiable and efficient toolboxes for solving programmatic tasks. In: Proceedings of the 41st International Conference on Machine Learning. 2024, 51177\u201351191"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-024-40678-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-024-40678-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-024-40678-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,13]],"date-time":"2025-01-13T03:29:10Z","timestamp":1736738950000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-024-40678-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,13]]},"references-count":199,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["40678"],"URL":"https:\/\/doi.org\/10.1007\/s11704-024-40678-2","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,13]]},"assertion":[{"value":"5 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Competing interests The authors declare that they have no competing interests or financial conflicts to disclose.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics"}}],"article-number":"198343"}}