{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T19:00:25Z","timestamp":1769886025937,"version":"3.49.0"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1007\/s11704-025-41365-6","type":"journal-article","created":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T03:09:31Z","timestamp":1769828971000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Investigating effective LLM-based in-context tool use: what matters and how to improve"],"prefix":"10.1007","volume":"20","author":[{"given":"Yining","family":"Zheng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haiyang","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiahao","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Linqi","family":"Yin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunke","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengguo","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hetao","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianxiang","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xipeng","family":"Qiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,31]]},"reference":[{"key":"41365_CR1","first-page":"10764","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"L Gao","year":"2023","unstructured":"Gao L, Madaan A, Zhou S, Alon U, Liu P, Yang Y, Callan J, Neubig G. PAL: program-aided language models. In: Proceedings of the 40th International Conference on Machine Learning. 2023, 10764\u201310799"},{"key":"41365_CR2","unstructured":"Parisi A, Zhao Y, Fiedel N. TALM: tool augmented language models. 2022, arXiv preprint arXiv: 2205.12255"},{"key":"41365_CR3","first-page":"2997","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"T Schick","year":"2023","unstructured":"Schick T, Dwivedi-Yu J, Dess\u00ed R, Raileanu R, Lomeli M, Hambro E, Zettlemoyer L, Cancedda N, Scialom T. Toolformer: language models can teach themselves to use tools. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. 2023, 2997"},{"key":"41365_CR4","unstructured":"OpenAI. GPT-4 technical report. 2023, arXiv preprint arXiv: 2303.08774"},{"issue":"5","key":"41365_CR5","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1007\/s11633-024-1502-8","volume":"21","author":"T Sun","year":"2024","unstructured":"Sun T, Zhang X, He Z, Li P, Cheng Q, Liu X, Yan H, Shao Y, Tang Q, Zhang S, Zhao X, Chen K, Zheng Y, Zhou Z, Li R, Zhan J, Zhou Y, Li L, Yang X, Wu L, Yin Z, Huang X, Jiang Y G, Qiu X. MOSS: an open conversational large language model. Machine Intelligence Research, 2024, 21(5): 888\u2013905.","journal-title":"Machine Intelligence Research"},{"key":"41365_CR6","unstructured":"Grattafiori A, Dubey A, Jauhri A, Pandey A, Kadian A, et al. The Llama 3 herd of models. 2024, arXiv preprint arXiv: 2407.21783"},{"key":"41365_CR7","unstructured":"Tang Q, Deng Z, Lin H, Han X, Liang Q, Cao B, Sun L. ToolAlpaca: generalized tool learning for language models with 3000 simulated cases. 2023, arXiv preprint arXiv: 2306.05301"},{"key":"41365_CR8","doi-asserted-by":"publisher","first-page":"3102","DOI":"10.18653\/v1\/2023.emnlp-main.187","volume-title":"Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing","author":"M Li","year":"2023","unstructured":"Li M, Zhao Y, Yu B, Song F, Li H, Yu H, Li Z, Huang F, Li Y. API-bank: a comprehensive benchmark for tool-augmented LLMs. In: Proceedings of 2023 Conference on Empirical Methods in Natural Language Processing. 2023, 3102\u20133116"},{"key":"41365_CR9","first-page":"1","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Y Qin","year":"2024","unstructured":"Qin Y, Liang S, Ye Y, Zhu K, Yan L, Lu Y, Lin Y, Cong X, Tang X, Qian B, Zhao S, Hong L, Tian R, Xie R, Zhou J, Gerstein M, Li D, Liu Z, Sun M. ToolLLM: facilitating large language models to master 16000+ real-world APIs. In: Proceedings of the 12th International Conference on Learning Representations. 2024, 1\u201323"},{"key":"41365_CR10","unstructured":"RapidAPI. RapidAPI: a platform for discovering and connecting to APIs. Available at the website of rapidapi.com"},{"key":"41365_CR11","first-page":"1","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Y Huang","year":"2024","unstructured":"Huang Y, Shi J, Li Y, Fan C, Wu S, Zhang Q, Liu Y, Zhou P, Wan Y, Gong N Z, Sun L. MetaTool benchmark for large language models: deciding whether to use tools and which to use. In: Proceedings of the 12th International Conference on Learning Representations. 2024, 1\u201330"},{"key":"41365_CR12","doi-asserted-by":"crossref","unstructured":"Lu J, Holleis T, Zhang Y, Aumayer B, Nan F, Bai F, Ma S, Ma S, Li M, Yin G, Wang Z, Pang R. ToolSandbox: a stateful, conversational, interactive evaluation benchmark for LLM tool use capabilities. 2024, arXiv preprint arXiv: 2408.04682","DOI":"10.18653\/v1\/2025.findings-naacl.65"},{"key":"41365_CR13","first-page":"18","volume-title":"Proceedings of the 18th European Conference on Computer Vision","author":"Z Ma","year":"2024","unstructured":"Ma Z, Huang W, Zhang J, Gupta T, Krishna R. m&m\u2019s: a benchmark to evaluate tool-use for multi-step multi-modal tasks. In: Proceedings of the 18th European Conference on Computer Vision. 2024, 18\u201334"},{"key":"41365_CR14","unstructured":"Moon S, Jha S, Erdogan L E, Kim S, Lim W, Keutzer K, Gholami A. Efficient and scalable estimation of tool representations in vector space. 2024, arXiv preprint arXiv: 2409.02141"},{"key":"41365_CR15","unstructured":"Yuan S, Song K, Chen J, Tan X, Shen Y, Kan R, Li D, Yang D. EASYTOOL: enhancing LLM-based agents with concise tool instruction. 2024, arXiv preprint arXiv: 2401.06201"},{"key":"41365_CR16","first-page":"1131","volume-title":"Proceedings of 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track","author":"I Abdelaziz","year":"2024","unstructured":"Abdelaziz I, Basu K, Agarwal M, Kumaravel S, Stallone M, Panda R, Rizk Y, Shrivatsa Bhargav G P, Crouse M, Gunasekara C, Ikbal S, Joshi S, Karanam H, Kumar V, Munawar A, Neelam S, Raghu D, Sharma U, Soria A M, Sreedhar D, Venkateswaran P, Unuvar M, Cox D D, Roukos S, Lastras L A, Kapanipathi P. Granite-function calling model: introducing function calling abilities via multi-task learning of granular tasks. In: Proceedings of 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track. 2024, 1131\u20131139"},{"key":"41365_CR17","doi-asserted-by":"crossref","unstructured":"Erdogan L E, Lee N, Jha S, Kim S, Tabrizi R, Moon S, Hooper C, Anumanchipalli G, Keutzer K, Gholami A. TinyAgent: function calling at the edge. 2024, arXiv preprint arXiv: 2409.00608","DOI":"10.18653\/v1\/2024.emnlp-demo.9"},{"key":"41365_CR18","unstructured":"Fanjia Y, Huanzhi M, Charlie Cheng-Jie J, Tianjun Z, Shishir G P, Ion S, Joseph E G. Berkeley Function Calling Leaderboard. Available at the website of gorilla.cs.berkeley.edu\/blogs\/8_berkeley_function_calling_leaderboard.html"},{"key":"41365_CR19","unstructured":"Hsieh C Y, Chen S A, Li C L, Fujii Y, Ratner A, Lee C Y, Krishna R, Pfister T. Tool documentation enables zero-shot tool-usage with large language models. 2023, arXiv preprint arXiv: 2308.00675"},{"key":"41365_CR20","first-page":"11143","volume-title":"Proceedings of the Findings of the Association for Computational Linguistics","author":"Z Guo","year":"2024","unstructured":"Guo Z, Cheng S, Wang H, Liang S, Qin Y, Li P, Liu Z, Sun M, Liu Y. StableToolBench: towards stable large-scale benchmarking on tool learning of large language models. In: Proceedings of the Findings of the Association for Computational Linguistics. 2024, 11143\u201311156"},{"key":"41365_CR21","first-page":"372","volume-title":"Proceedings of the 13th National CCF Conference on Natural Language Processing and Chinese Computing","author":"M Wu","year":"2024","unstructured":"Wu M, Zhu T, Han H, Tan C, Zhang X, Chen W. Seal-tools: self-instruct tool learning dataset for agent tuning and detailed benchmark. In: Proceedings of the 13th National CCF Conference on Natural Language Processing and Chinese Computing. 2024, 372\u2013384"},{"key":"41365_CR22","doi-asserted-by":"publisher","first-page":"313","DOI":"10.18653\/v1\/2024.emnlp-main.19","volume-title":"Proceedings of 2024 Conference on Empirical Methods in Natural Language Processing","author":"J Ye","year":"2024","unstructured":"Ye J, Wu Y, Gao S, Huang C, Li S, Li G, Fan X, Zhang Q, Gui T, Huang X. RoTBench: a multi-level benchmark for evaluating the robustness of large language models in tool learning. In: Proceedings of 2024 Conference on Empirical Methods in Natural Language Processing. 2024, 313\u2013333"},{"key":"41365_CR23","first-page":"156","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics","author":"J Ye","year":"2024","unstructured":"Ye J, Li G, Gao S, Huang C, Wu Y, Li S, Fan X, Dou S, Ji T, Zhang Q, Gui T, Huang X. ToolEyes: fine-grained evaluation for tool learning capabilities of large language models in real-world scenarios. In: Proceedings of the 31st International Conference on Computational Linguistics. 2024, 156\u2013187"},{"key":"41365_CR24","first-page":"2180","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing","author":"Y Zhuang","year":"2023","unstructured":"Zhuang Y, Yu Y, Wang K, Sun H, Zhang C. ToolQA: a dataset for LLM question answering with external tools. In: Proceedings of the 37th International Conference on Neural Information Processing. 2023, 2180"},{"key":"41365_CR25","first-page":"1657","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Y Shen","year":"2023","unstructured":"Shen Y, Song K, Tan X, Li D, Lu W, Zhuang Y. HuggingGPT: solving AI tasks with ChatGPT and its friends in hugging face. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. 2023, 1657"},{"key":"41365_CR26","doi-asserted-by":"publisher","first-page":"9510","DOI":"10.18653\/v1\/2024.acl-long.515","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","author":"Z Chen","year":"2024","unstructured":"Chen Z, Du W, Zhang W, Liu K, Liu J, Zheng M, Zhuo J, Zhang S, Lin D, Chen K, Zhao F. T-eval: evaluating the tool utilization capability of large language models step by step. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024, 9510\u20139529"},{"key":"41365_CR27","first-page":"1","volume-title":"Proceedings of the 38th Conference on Neural Information Processing Systems","author":"Y Shen","year":"2023","unstructured":"Shen Y, Song K, Tan X, Zhang W, Ren K, Yuan S, Lu W, Li D, Zhuang Y. TaskBench: benchmarking large language models for task automation. In: Proceedings of the 38th Conference on Neural Information Processing Systems. 2023, 1\u201335"},{"key":"41365_CR28","unstructured":"Bassamzadeh N, Methani C. Plan with code: comparing approaches for robust NL to DSL generation. 2024, arXiv preprint arXiv:2408.08335"},{"key":"41365_CR29","unstructured":"Liu Y, Peng X, Cao J, Bo S, Zhang Y, Zhang X, Cheng S, Wang X, Yin J, Du T. Tool-planner: task planning with clusters across multiple tools. 2024, arXiv preprint arXiv: 2406.03807"},{"key":"41365_CR30","doi-asserted-by":"publisher","first-page":"975","DOI":"10.18653\/v1\/2024.findings-naacl.61","volume-title":"Proceedings of the Findings of the Association for Computational Linguistics: NAACL 2024","author":"T Huang","year":"2024","unstructured":"Huang T, Jung D, Kumar V, Kachuee M, Li X, Xu P, Chen M. Planning and editing what you retrieve for enhanced tool learning. In: Proceedings of the Findings of the Association for Computational Linguistics: NAACL 2024. 2024, 975\u2013988"},{"key":"41365_CR31","doi-asserted-by":"publisher","first-page":"12859","DOI":"10.18653\/v1\/2024.acl-long.694","volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","author":"K Basu","year":"2024","unstructured":"Basu K, Abdelaziz I, Chaudhury S, Dan S, Crouse M, Munawar A, Austel V, Kumaravel S, Muthusamy V, Kapanipathi P, Lastras L. API-BLEND: a comprehensive corpora for training and benchmarking API LLMs. In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2024, 12859\u201312870"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-025-41365-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-025-41365-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-025-41365-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T03:09:37Z","timestamp":1769828977000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-025-41365-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,31]]},"references-count":31,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2026,7]]}},"alternative-id":["41365"],"URL":"https:\/\/doi.org\/10.1007\/s11704-025-41365-6","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,31]]},"assertion":[{"value":"14 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no competing interests or financial conflicts to disclose","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"2007323"}}