{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T06:34:13Z","timestamp":1778394853749,"version":"3.51.4"},"reference-count":218,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T00:00:00Z","timestamp":1767657600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T00:00:00Z","timestamp":1768867200000},"content-version":"vor","delay-in-days":14,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100001803","name":"Charles Darwin University","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100001803","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-025-11471-9","type":"journal-article","created":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T06:44:08Z","timestamp":1767681848000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["From language to action: a review of large language models as autonomous agents and tool users"],"prefix":"10.1007","volume":"59","author":[{"given":"Sadia Sultana","family":"Chowa","sequence":"first","affiliation":[]},{"given":"Riasad","family":"Alvi","sequence":"additional","affiliation":[]},{"given":"Subhey Sadi","family":"Rahman","sequence":"additional","affiliation":[]},{"given":"Md Abdur","family":"Rahman","sequence":"additional","affiliation":[]},{"given":"Mohaimenul Azam Khan","family":"Raiaan","sequence":"additional","affiliation":[]},{"given":"Md Rafiqul","family":"Islam","sequence":"additional","affiliation":[]},{"given":"Mukhtar","family":"Hussain","sequence":"additional","affiliation":[]},{"given":"Sami","family":"Azam","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,6]]},"reference":[{"issue":"4","key":"11471_CR2","doi-asserted-by":"crossref","first-page":"ooaf067","DOI":"10.1093\/jamiaopen\/ooaf067","volume":"8","author":"M Abbasian","year":"2025","unstructured":"Abbasian M, Azimi I, Rahmani AM, Jain R (2025) Conversational health agents: a personalized large language model-powered agent framework. JAMIA Open 8(4):ooaf067","journal-title":"JAMIA Open"},{"key":"11471_CR3","doi-asserted-by":"crossref","unstructured":"Abdaljalil S, Kurban H, Qaraqe K, Serpedin E (2025) Theorem-of-thought: a multi-agent framework for abductive, deductive, and inductive reasoning in language models. In: Proceedings of the 3rd workshop on towards knowledgeable foundation models (KnowFM). Association for Computational Linguistics, Vienna, Austria, pp 111\u2013119","DOI":"10.18653\/v1\/2025.knowllm-1.10"},{"key":"11471_CR4","doi-asserted-by":"publisher","unstructured":"Agarwal V, Kelley D (2022) Saluki: Predicting mrna half-life from mrna sequence. Data set, Zenodo, [Online]. Available: https:\/\/doi.org\/10.5281\/zenodo.6326409","DOI":"10.5281\/zenodo.6326409"},{"key":"11471_CR5","doi-asserted-by":"crossref","unstructured":"Anderson P, Wu Q, Teney D, Bruce J, Johnson M et\u00a0al (2018) Vision-and-language navigation: Interpreting visually-grounded navigation instructions in real environments. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2018.00387"},{"key":"11471_CR6","unstructured":"Arumugam D, Griffiths TL (2025) Toward efficient exploration by large language model agents. In: The exploration in AI today workshop at ICML 2025"},{"key":"11471_CR7","unstructured":"Austin J, Odena A, Nye M, Bosma M, Michalewski H et\u00a0al (2021) Program synthesis with large language models. arXiv preprint arXiv:2108.07732"},{"key":"11471_CR9","unstructured":"Bai Y, Jones A, Ndousse K, Askell A, Chen A et\u00a0al (2022) Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862"},{"key":"11471_CR8","volume":"269","author":"X Bai","year":"2025","unstructured":"Bai X, Huang S, Wei C, Wang R (2025) Collaboration between intelligent agents and large language models: a novel approach for enhancing code generation capability. Expert Syst Appl 269:126357","journal-title":"Expert Syst Appl"},{"issue":"1","key":"11471_CR10","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1186\/s41077-025-00357-z","volume":"10","author":"FL Barra","year":"2025","unstructured":"Barra FL, Rodella G, Costa A, Scalogna A, Carenzo L et al (2025) From prompt to platform: an agentic ai workflow for healthcare simulation scenario design. Adv Simul 10(1):29","journal-title":"Adv Simul"},{"key":"11471_CR11","doi-asserted-by":"crossref","first-page":"3964","DOI":"10.52202\/079017-0130","volume":"37","author":"N Behari","year":"2024","unstructured":"Behari N, Zhang E, Zhao Y, Taneja A, Nagaraj D, Tambe M (2024) A decision-language model (dlm) for dynamic restless multi-armed bandit tasks in public health. Adv Neural Inf Process Syst 37:3964\u20134002","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR12","unstructured":"Bhardwaj R, Poria S (2023) Red-teaming large language models using chain of utterances for safety-alignment. arXiv preprint arXiv:2308.09662"},{"key":"11471_CR13","doi-asserted-by":"crossref","first-page":"138595","DOI":"10.52202\/079017-4397","volume":"37","author":"X Bo","year":"2024","unstructured":"Bo X, Zhang Z, Dai Q, Feng X, Wang L et al (2024) Reflective multi-agent collaboration based on large language models. Adv Neural Inf Process Syst 37:138595\u2013138631","journal-title":"Adv Neural Inf Process Syst"},{"issue":"7992","key":"11471_CR14","doi-asserted-by":"crossref","first-page":"570","DOI":"10.1038\/s41586-023-06792-0","volume":"624","author":"DA Boiko","year":"2023","unstructured":"Boiko DA, MacKnight R, Kline B, Gomes G (2023) Autonomous chemical research with large language models. Nature 624(7992):570\u2013578","journal-title":"Nature"},{"issue":"5","key":"11471_CR15","doi-asserted-by":"crossref","first-page":"525","DOI":"10.1038\/s42256-024-00832-8","volume":"6","author":"AM Bran","year":"2024","unstructured":"Bran AM, Cox S, Schilter O, Baldassari C, White AD, Schwaller P (2024) Augmenting large language models with chemistry tools. Nat Mach Intell 6(5):525\u2013535","journal-title":"Nat Mach Intell"},{"key":"11471_CR16","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD et al (2020) Language models are few-shot learners. Adv Neural Inf Process Syst 33:1877\u20131901","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR17","unstructured":"Cai Y, Li X, Goswami M, Wili\u0144ski M, Welter G, Dubrawski A (2025) LLM agents struggle at time series machine learning engineering. In: 1st ICML Workshop on foundation models for structured data"},{"key":"11471_CR18","first-page":"74325","volume":"37","author":"M Chang","year":"2024","unstructured":"Chang M, Zhang J, Zhu Z, Yang C, Yang Y, Jin Y, Lan Z, Kong L, He J (2024) Agentboard: An analytical evaluation board of multi-turn llm agents. Adv Neural Inf Process Syst 37:74325\u201374362","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR23","doi-asserted-by":"crossref","unstructured":"Chen H, Suhr A, Misra D, Snavely N, Artzi Y (2019) Touchdown: Natural language navigation and spatial reasoning in visual street environments. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12\u00a0538\u201312\u00a0547","DOI":"10.1109\/CVPR.2019.01282"},{"key":"11471_CR25","unstructured":"Chen M, Tworek J, Jun H, Yuan Q et\u00a0al (2021) Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374"},{"key":"11471_CR24","unstructured":"Chen W, Su Y, Zuo J, Yang C, Yuan C et\u00a0al (2024) Agentverse: Facilitating multi-agent collaboration and exploring emergent behaviors in agents"},{"issue":"2","key":"11471_CR19","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1016\/j.imed.2025.03.002","volume":"5","author":"X Chen","year":"2025","unstructured":"Chen X, Xiang J, Lu S, Liu Y, He M, Shi D (2025a) Evaluating large language models and agents in healthcare: key challenges in clinical applications. Intell Med 5(2):151\u2013163","journal-title":"Intell Med"},{"issue":"1","key":"11471_CR20","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1038\/s41746-025-01550-0","volume":"8","author":"X Chen","year":"2025","unstructured":"Chen X, Yi H, You M, Liu W, Wang L et al (2025b) Enhancing diagnostic capability with multi-agents conversational large language models. NPJ Digital Med 8(1):159","journal-title":"NPJ Digital Med"},{"key":"11471_CR26","unstructured":"Chen J, Yu C, Zhou X, Xu T, Mu Y, et\u00a0al (2025c) Emos: Embodiment-aware heterogeneous multi-robot operating system with llm agents. arXiv preprint arXiv:2410.22662v2"},{"key":"11471_CR21","unstructured":"Cheng J, Chin P (2024) Sociodojo: Building lifelong analytical agents with real-world text and time series. In: The twelfth international conference on learning representations"},{"key":"11471_CR22","unstructured":"Cheng Y, Zhang C, Zhang Z, Meng X, Hong S, et\u00a0al (2024) Exploring large language model based intelligent agents: definitions, methods, and prospects. arXiv preprint arXiv:2401.03428"},{"key":"11471_CR27","unstructured":"Cobbe K, Kosaraju V, Bavarian M, Chen M, Jun H et\u00a0al (2021) Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168"},{"key":"11471_CR28","doi-asserted-by":"crossref","first-page":"43","DOI":"10.29007\/1zt5","volume":"2","author":"L Codeca","year":"2018","unstructured":"Codeca L, H\u00e4rri J (2018) Monaco sumo traffic (most) scenario: a 3d mobility scenario for cooperative its. EPiC Ser Eng 2:43\u201355","journal-title":"EPiC Ser Eng"},{"key":"11471_CR29","doi-asserted-by":"crossref","unstructured":"Cuadra A, Breuch J, Estrada S, Ihim D, Hung I et al (2024) Digital forms for all: A holistic multimodal large language model agent for health data entry. In: Proceedings of the ACM on interactive mobile wearable and ubiquitous technologies 8(2):1\u201339","DOI":"10.1145\/3659624"},{"key":"11471_CR30","unstructured":"Dasari S, Ebert F, Tian S, Nair S, Bucher B et\u00a0al (2019) Robonet: Large-scale multi-robot learning. arXiv preprint arXiv:1910.11215"},{"key":"11471_CR31","doi-asserted-by":"crossref","unstructured":"Dasigi P, Lo K, Beltagy I, Cohan A, Smith NA, Gardner M (2021) A dataset of information-seeking questions and answers anchored in research papers. In: Proceedings of the 2021 conference of the North American chapter of the association for computational linguistics (NAACL)","DOI":"10.18653\/v1\/2021.naacl-main.365"},{"key":"11471_CR32","doi-asserted-by":"crossref","unstructured":"de\u00a0Almeida BP, Richard G, Dalla-Torre H, Blum C, Hexemer L et\u00a0al (2025) A multimodal conversational agent for dna, rna and protein tasks. Nat Mach Intell, pp 1\u201314","DOI":"10.1101\/2024.04.30.591835"},{"key":"11471_CR33","first-page":"28091","volume":"36","author":"X Deng","year":"2023","unstructured":"Deng X, Gu Y, Zheng B, Chen S, Stevens S et al (2023) Mind2web: Towards a generalist agent for the web. Adv Neural Inf Process Syst 36:28091\u201328114","journal-title":"Adv Neural Inf Process Syst"},{"issue":"7","key":"11471_CR34","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3672459","volume":"33","author":"Y Dong","year":"2024","unstructured":"Dong Y, Jiang X, Jin Z, Li G (2024) Self-collaboration code generation via chatgpt. ACM Trans Softw Eng Methodol 33(7):1\u201338","journal-title":"ACM Trans Softw Eng Methodol"},{"key":"11471_CR35","unstructured":"Driess D, Xia F, Sajjadi MSM, Lynch C, Chowdhery A et\u00a0al (2023) Palm-e: An embodied multimodal language model. arXiv preprint arXiv:2303.03378"},{"key":"11471_CR36","doi-asserted-by":"crossref","unstructured":"Feldt R, Kang S, Yoon J, Yoo S (2023) Towards autonomous testing agents via conversational large language models. In: 2023 38th IEEE\/ACM international conference on automated software engineering (ASE). IEEE, pp 1688\u20131693","DOI":"10.1109\/ASE56229.2023.00148"},{"key":"11471_CR37","doi-asserted-by":"crossref","unstructured":"Feng X, Chen Z-Y, Qin Y, Lin Y, Chen X et al (2024) Large language model-based human-agent collaboration for complex task solving. In: Findings of the association for computational linguistics: EMNLP. Assoc Comput Linguist 2024:1336\u20131357","DOI":"10.18653\/v1\/2024.findings-emnlp.72"},{"key":"11471_CR38","doi-asserted-by":"crossref","unstructured":"Ferber D, El\u00a0Nahhas OS, W\u00f6lflein G, Wiest IC, Clusmann J et\u00a0al (2025) Development and validation of an autonomous artificial intelligence agent for clinical decision-making in oncology. Nat Cancer, pp 1\u201313","DOI":"10.1038\/s43018-025-00991-6"},{"key":"11471_CR39","unstructured":"Ferrag MA, Tihanyi N, Debbah M (2025) From llm reasoning to autonomous ai agents: a comprehensive review. arXiv preprint arXiv:2504.19678"},{"issue":"5","key":"11471_CR40","doi-asserted-by":"crossref","first-page":"3785","DOI":"10.1007\/s00146-024-02127-3","volume":"40","author":"G Franceschelli","year":"2025","unstructured":"Franceschelli G, Musolesi M (2025) On the creativity of large language models. AI Soc 40(5):3785\u20133795","journal-title":"AI Soc"},{"key":"11471_CR41","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.109771","volume":"141","author":"L Frering","year":"2025","unstructured":"Frering L, Steinbauer-Wagner G, Holzinger A (2025) Integrating belief-desire-intention agents with large language models for reliable human-robot interaction and explainable artificial intelligence. Eng Appl Artif Intell 141:109771","journal-title":"Eng Appl Artif Intell"},{"key":"11471_CR42","first-page":"119919","volume":"37","author":"Y Fu","year":"2024","unstructured":"Fu Y, Kim D-K, Kim J, Sohn S, Logeswaran L et al (2024) Autoguide: Automated generation and selection of context-aware guidelines for large language model agents. Adv Neural Inf Process Syst 37:119919\u2013119948","journal-title":"Adv Neural Inf Process Syst"},{"issue":"1","key":"11471_CR43","first-page":"1","volume":"11","author":"C Gao","year":"2024","unstructured":"Gao C, Lan X, Li N, Yuan Y, Ding J et al (2024) Large language models empowered agent-based modeling and simulation: a survey and perspectives. Human Soc Sci Commun 11(1):1\u201324","journal-title":"Human Soc Sci Commun"},{"issue":"9","key":"11471_CR44","doi-asserted-by":"crossref","first-page":"266","DOI":"10.1007\/s10462-025-11255-1","volume":"58","author":"J Gao","year":"2025","unstructured":"Gao J, Zhang Y, Chen Y, Dong Y, Chen Y et al (2025) Agent-in-the-loop to distill expert knowledge into artificial intelligence models: a survey. Artif Intell Rev 58(9):266","journal-title":"Artif Intell Rev"},{"key":"11471_CR45","doi-asserted-by":"crossref","unstructured":"George S, Sypherd C, Cashman D (2024) Probing the capacity of language model agents to operationalize disparate experiential context despite distraction. In: Findings of the association for computational linguistics: EMNLP 2024. Miami, Florida, USA: Association for Computational Linguistics, pp 15\u00a0447\u201315\u00a0459","DOI":"10.18653\/v1\/2024.findings-emnlp.905"},{"key":"11471_CR46","doi-asserted-by":"crossref","unstructured":"Geva M, Khashabi D, Segal E, Khot T, Roth D, Berant J (2021) Did Aristotle Use a Laptop? A question answering benchmark with implicit reasoning strategies. In: Transactions of the association for computational linguistics (TACL)","DOI":"10.1162\/tacl_a_00370"},{"issue":"7","key":"11471_CR47","doi-asserted-by":"crossref","first-page":"1389","DOI":"10.1039\/D4DD00013G","volume":"3","author":"A Ghafarollahi","year":"2024","unstructured":"Ghafarollahi A, Buehler MJ (2024) Protagents: protein discovery via large language model multi-agent collaborations combining physics and machine learning. Digital Discovery 3(7):1389\u20131409","journal-title":"Digital Discovery"},{"key":"11471_CR48","doi-asserted-by":"crossref","first-page":"503","DOI":"10.1038\/s41586-019-1186-3","volume":"569","author":"M Ghandi","year":"2019","unstructured":"Ghandi M, Huang FW, Jan\u00e9-Valbuena J, Kryukov GV, Golub TR et al (2019) Next-generation characterization of the cancer cell line encyclopedia. Nature 569:503\u2013508","journal-title":"Nature"},{"issue":"1","key":"11471_CR49","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1038\/s41746-025-01475-8","volume":"8","author":"AJ Goodell","year":"2025","unstructured":"Goodell AJ, Chu SN, Rouholiman D, Chu LF (2025) Large language model agents can use tools to perform clinical calculations. npj Digital Med 8(1):163","journal-title":"npj Digital Med"},{"key":"11471_CR50","unstructured":"Gottweis J, Weng WH, Daryin A, Tu T, Palepu A et\u00a0al (2025) Towards an ai co-scientist. arXiv preprint arXiv:2502.18864"},{"key":"11471_CR51","doi-asserted-by":"crossref","unstructured":"Guo Z, Cheng S, Wang H, Liang S, Qin Y et\u00a0al (2024a) Stabletoolbench: Towards stable large-scale benchmarking on tool learning of large language models. arXiv preprint arXiv:2403.07714","DOI":"10.18653\/v1\/2024.findings-acl.664"},{"key":"11471_CR52","unstructured":"Guo T, Chen X, Wang Y, Chang R, Pei S et\u00a0al (2024b) Large language model based multi-agents: a survey of progress and challenges. arXiv preprint arXiv:2402.01680"},{"key":"11471_CR53","unstructured":"Han S, Zhang Q, Yao Y, Jin W, Xu Z (2025) Llm multi-agent systems: Challenges and open problems. arXiv preprint arXiv:2402.03578v2"},{"key":"11471_CR54","doi-asserted-by":"crossref","unstructured":"Hemken N, Koneru S, Jacob F, Hartenstein H, Niehues J (2025) Can a large language model keep my secrets? a study on LLM-controlled agents. In: Proceedings of the 63rd annual meeting of the association for computational linguistics (Volume 4: Student Research Workshop). Association for Computational Linguistics, pp 746\u2013759","DOI":"10.18653\/v1\/2025.acl-srw.49"},{"key":"11471_CR55","unstructured":"Hendrycks D, Burns C, Basart S, Zou A, Mazeika M et\u00a0al (2021) Measuring massive multitask language understanding. In: Proceedings of the international conference on learning representations (ICLR)"},{"key":"11471_CR57","unstructured":"Hong S, Zhuge M, Chen J, Zheng X, Cheng Y et\u00a0al (2023) Metagpt: Meta programming for a multi-agent collaborative framework. In: The twelfth international conference on learning representations"},{"key":"11471_CR56","doi-asserted-by":"crossref","unstructured":"Hong W, Wang W, Lv Q, Xu J, Yu W et\u00a0al (2024) Cogagent: A visual language model for gui agents. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14281\u201314290","DOI":"10.1109\/CVPR52733.2024.01354"},{"key":"11471_CR60","doi-asserted-by":"crossref","unstructured":"Huang L, Cao S, Parulian N, Ji H, Wang L (2021) Efficient attentions for long document summarization. In: Proceedings of the 2021 conference of the North American chapter of the association for computational linguistics: human language technologies. Association for Computational Linguistics, pp 1419\u20131436","DOI":"10.18653\/v1\/2021.naacl-main.112"},{"key":"11471_CR61","unstructured":"Huang Q, Vora J, Liang P, Leskovec J (2023) Benchmarking large language models as ai research agents. In: NeurIPS 2023 foundation models for decision making workshop"},{"issue":"1","key":"11471_CR58","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1021\/acs.jcim.4c01345","volume":"65","author":"H Huang","year":"2024","unstructured":"Huang H, Shi X, Lei H, Hu F, Cai Y (2024) Protchat: An ai multi-agent for automated protein analysis leveraging gpt-4 and protein language model. J Chem Inf Model 65(1):62\u201370","journal-title":"J Chem Inf Model"},{"issue":"4","key":"11471_CR59","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3731446","volume":"43","author":"X Huang","year":"2025","unstructured":"Huang X, Lian J, Lei Y, Yao J, Lian D, Xie X (2025) Recommender ai agent: integrating large language models for interactive recommendations. ACM Trans Inform Syst 43(4):1\u201333","journal-title":"ACM Trans Inform Syst"},{"issue":"3","key":"11471_CR62","doi-asserted-by":"crossref","first-page":"462","DOI":"10.1093\/bioinformatics\/bty635","volume":"35","author":"J Jankauskait\u0117","year":"2019","unstructured":"Jankauskait\u0117 J, Jim\u00e9nez-Garc\u00eda B, Dapk\u016bnas J, Fern\u00e1ndez-Recio J, Moal IH (2019) Skempi 2.0: an updated benchmark of changes in protein-protein binding energy, kinetics and thermodynamics upon mutation. Bioinformatics 35(3):462\u2013469","journal-title":"Bioinformatics"},{"key":"11471_CR63","doi-asserted-by":"crossref","unstructured":"Jeong I-Y, Park J (2022) Cochlscene: Acquisition of acoustic scene data using crowdsourcing. In: Asia-pacific signal and information processing association annual summit and conference (APSIPA ASC). IEEE 2022:17\u201321","DOI":"10.23919\/APSIPAASC55919.2022.9979822"},{"key":"11471_CR64","first-page":"24678","volume":"36","author":"J Ji","year":"2023","unstructured":"Ji J, Liu M, Dai J, Pan X, Zhang C et al (2023) Beavertails: Towards improved safety alignment of llm via a human-preference dataset. Adv Neural Inf Process Syst 36:24678\u201324704","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR65","doi-asserted-by":"crossref","unstructured":"Ji J, Hong D, Zhang B, Chen B, Dai J et\u00a0al (2024) Pku-saferlhf: Towards multi-level safety alignment for llms with human preference. arXiv preprint arXiv:2406.15513","DOI":"10.18653\/v1\/2025.acl-long.1544"},{"key":"11471_CR66","volume":"117","author":"Y Jin","year":"2024","unstructured":"Jin Y, Ma J (2024) Large language model as parking planning agent in the context of mixed period of autonomous vehicles and human-driven vehicles. Sustain Cities Soc 117:105940","journal-title":"Sustain Cities Soc"},{"key":"11471_CR67","doi-asserted-by":"crossref","unstructured":"Jin A, Ye Y, Lee B, Qiao Y (2024) Decoagent: Large language model empowered decentralized autonomous collaboration agents based on smart contracts. IEEE Access","DOI":"10.1109\/ACCESS.2024.3481641"},{"key":"11471_CR68","doi-asserted-by":"crossref","unstructured":"Jo E, Epstein DA, Jung H, Kim YH (2023) Understanding the benefits and challenges of deploying conversational ai leveraging large language models for public health intervention. In: Proceedings of the 2023 CHI conference on human factors in computing systems, pp 1\u201316","DOI":"10.1145\/3544548.3581503"},{"key":"11471_CR69","doi-asserted-by":"crossref","unstructured":"Jorgensen S, Nadizar G, Pietropolli G, Manzoni L, Medvet E et\u00a0al (2024) Large language model-based test case generation for gp agents. In: Proceedings of the genetic and evolutionary computation conference, pp 914\u2013923","DOI":"10.1145\/3638529.3654056"},{"key":"11471_CR70","unstructured":"Kang J, Laroche R, Yuan X, Trischler A, Liu X, Fu J (2024) Think before you act: Decision transformers with working memory. arXiv preprint arXiv:2305.16338v3"},{"key":"11471_CR71","doi-asserted-by":"crossref","unstructured":"Kannan SS, Venkatesh VL, Min BC (2024) Smart-llm: Smart multi-agent robot task planning using large language models. In: 2024 IEEE\/RSJ international conference on intelligent robots and systems (IROS). IEEE, pp 12140\u201312147","DOI":"10.1109\/IROS58592.2024.10802322"},{"key":"11471_CR72","unstructured":"Kapitanov A, Kvanchiani K, xNagaev K, Kraynov R, Makhliarchuk A (2024) Hagrid \u2013 hand gesture recognition image dataset. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision (WACV), pp 4572\u20134581"},{"key":"11471_CR73","unstructured":"Khan MA, Amani M, Das S, Ghosh B, Wu Q et\u00a0al (2025) In agents we trust, but who do agents trust? latent source preferences steer LLM generations. In: ICML 2025 workshop on reliable and responsible foundation models"},{"key":"11471_CR74","first-page":"79410","volume":"37","author":"Y Kim","year":"2024","unstructured":"Kim Y, Park C, Jeong H, Chan YS, Xu X et al (2024) Mdagents: An adaptive collaboration of llms for medical decision-making. Adv Neural Inf Process Syst 37:79410\u201379452","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR75","unstructured":"Klein LH, Potamitis N, Aydin R, West R, Gulcehre C, Arora A (2025) Fleet of agents: coordinated problem solving with large language models. In: Forty-second international conference on machine learning"},{"key":"11471_CR76","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1162\/tacl_a_00023","volume":"6","author":"T Ko\u010disk\u1ef3","year":"2018","unstructured":"Ko\u010disk\u1ef3 T, Schwarz J, Blunsom P, Dyer C, Hermann KM, Melis G, Grefenstette E (2018) The NarrativeQA reading comprehension challenge. Trans Assoc Comput Linguist 6:317\u2013328","journal-title":"Trans Assoc Comput Linguist"},{"key":"11471_CR77","first-page":"47669","volume":"36","author":"A K\u00f6pf","year":"2023","unstructured":"K\u00f6pf A, Kilcher Y, Von R\u00fctte D, Anagnostidis S, Tam ZR et al (2023) Openassistant conversations-democratizing large language model alignment. Adv Neural Inf Process Syst 36:47669\u201347681","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR78","unstructured":"Koupaee M, Wang WY (2018) Wikihow: A large scale text summarization dataset. arXiv preprint arXiv:1810.09305"},{"key":"11471_CR79","doi-asserted-by":"crossref","unstructured":"Kry\u015bci\u0144ski W, Rajani N, Agarwal D, Xiong C, Radev D (2021) Booksum: A collection of datasets for long-form narrative summarization. arXiv preprint arXiv:2105.08209","DOI":"10.18653\/v1\/2022.findings-emnlp.488"},{"issue":"10","key":"11471_CR80","doi-asserted-by":"crossref","first-page":"260","DOI":"10.1007\/s10462-024-10888-y","volume":"57","author":"P Kumar","year":"2024","unstructured":"Kumar P (2024) Large language models (llms): survey, technical frameworks, and future challenges. Artif Intell Rev 57(10):260","journal-title":"Artif Intell Rev"},{"key":"11471_CR81","first-page":"1283","volume":"36","author":"A Lampinen","year":"2023","unstructured":"Lampinen A, Chan S, Dasgupta I, Nam A, Wang J (2023) Passive learning of active causal strategies in agents and language models. Adv Neural Inf Process Syst 36:1283\u20131297","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR82","unstructured":"Li X (2025) A review of prominent paradigms for llm-based agents: tool use, planning (including rag), and feedback learning. In: Proceedings of the 31st international conference on computational linguistics, pp 9760\u20139779"},{"key":"11471_CR87","doi-asserted-by":"crossref","unstructured":"Li H, Chong Y, Stepputtis S, Campbell J, Hughes D et\u00a0al (2023a) Theory of mind for multi-agent collaboration via large language models. In: Proceedings of the 2023 conference on empirical methods in natural language processing. Association for Computational Linguistics, pp 180\u2013192","DOI":"10.18653\/v1\/2023.emnlp-main.13"},{"key":"11471_CR84","first-page":"51991","volume":"36","author":"G Li","year":"2023","unstructured":"Li G, Hammoud H, Itani H, Khizbullin D, Ghanem B (2023b) Camel: Communicative agents for \u201cmind\u201d exploration of large language model society. Adv Neural Inf Process Syst 36:51991\u201352008","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR88","doi-asserted-by":"crossref","unstructured":"Li N, Gao C, Li M, Li Y, Liao Q (2024a) EconAgent: Large language model-empowered agents for simulating macroeconomic activities. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 15523\u201315536","DOI":"10.18653\/v1\/2024.acl-long.829"},{"key":"11471_CR89","doi-asserted-by":"crossref","unstructured":"Li R, Luo Z, Du X (2024b) Fg-prm: Fine-grained hallucination detection and mitigation in language model mathematical reasoning. arXiv preprint arXiv:2410.06304","DOI":"10.18653\/v1\/2025.findings-emnlp.228"},{"key":"11471_CR97","doi-asserted-by":"crossref","unstructured":"Li L, Wang Y, Xu R, Wang P, Feng X et\u00a0al (2024c) Multimodal ArXiv: A dataset for improving scientific comprehension of large vision-language models. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 14 369\u201314 387","DOI":"10.18653\/v1\/2024.acl-long.775"},{"key":"11471_CR83","unstructured":"Li XL, Chowdhury N, Johnson DD, Hashimoto T, Liang P, et\u00a0al (2025a) Eliciting language model behaviors with investigator agents. In: Forty-second international conference on machine learning"},{"key":"11471_CR85","volume":"638","author":"Z Li","year":"2025","unstructured":"Li Z, Zhang R, Wang Z, Xie Z, Song Y (2025b) Llm-guided decision-making toolkit for multi-agent reinforcement learning. Neurocomputing 638:130105","journal-title":"Neurocomputing"},{"key":"11471_CR86","unstructured":"Lian W, Goodson B, Pentland E, Cook A, Vong C (2023) Teknium, Openorca: An open dataset of gpt augmented flan reasoning traces. HuggingFace repository, [Online]. Available: https:\/\/huggingface.co\/datasets\/Open-Orca\/OpenOrca"},{"key":"11471_CR92","doi-asserted-by":"crossref","unstructured":"Liu Z, Luo P, Wang X, Tang X (2015) Deep learning face attributes in the wild. In: Proceedings of international conference on computer vision (ICCV). pp 3730\u20133738","DOI":"10.1109\/ICCV.2015.425"},{"key":"11471_CR93","unstructured":"Liu T, Xu C, McAuley J (2023) Repobench: Benchmarking repository-level code auto-completion systems. arXiv preprint arXiv:2306.03091"},{"key":"11471_CR91","unstructured":"Liu Z, Hu H, Zhang S, Guo H, Ke S, et\u00a0al (2024a) Reason for future, act for now: a principled architecture for autonomous LLM agents. In: Proceedings of the 41st international conference on machine learning, ser. Proceedings of Machine Learning Research, vol. 235. PMLR, pp 31\u00a0186\u201331\u00a0261"},{"key":"11471_CR94","unstructured":"Liu R, Yang R, Jia C, Zhang G, Yang D, Vosoughi S (2024b) Training socially aligned language models on simulated social interactions. In: The twelfth international conference on learning representations"},{"key":"11471_CR95","unstructured":"Liu X, Yu H, Zhang H, Xu Y, Lei X et\u00a0al (2024c) Agentbench: Evaluating LLMs as agents. In: The twelfth international conference on learning representations"},{"issue":"1","key":"11471_CR90","doi-asserted-by":"crossref","first-page":"2256","DOI":"10.1038\/s41467-025-57430-4","volume":"16","author":"W Liu","year":"2025","unstructured":"Liu W, Li J, Tang Y, Zhao Y, Liu C et al (2025a) Drbioright 2.0: an llm-powered bioinformatics chatbot for large-scale cancer functional proteomics analysis. Nat Commun 16(1):2256","journal-title":"Nat Commun"},{"key":"11471_CR96","unstructured":"Liu X, ZHANG J, Shang H, Guo S, Chengxu Y, Zhu Q (2025b) Exploring prosocial irrationality for LLM agents: a social cognition view. In: The thirteenth international conference on learning representations"},{"key":"11471_CR101","unstructured":"Lu P, Qiu L, Chang KW, Wu YN, Zhu SC et\u00a0al (2023) Dynamic prompt learning via policy gradient for semi-structured mathematical reasoning. In: International conference on learning representations (ICLR)"},{"key":"11471_CR98","doi-asserted-by":"crossref","unstructured":"Lu M, Ho B, Ren D, Wang X (2024) Triageagent: Towards better multi-agents collaborations for large language model-based clinical triage. Findings of the association for computational linguistics EMNLP 2024:5747\u20135764","DOI":"10.18653\/v1\/2024.findings-emnlp.329"},{"key":"11471_CR99","unstructured":"Luo Z, Xu C, Zhao P, Sun Q, Geng X et\u00a0al (2023) Wizardcoder: Empowering code large language models with evol-instruct. arXiv preprint arXiv:2306.08568"},{"key":"11471_CR100","unstructured":"Luo J, Zhang W, Yuan Y, Zhao Y, Yang J et\u00a0al (2025) Large language model agent: a survey on methodology, applications and challenges. arXiv preprint arXiv:2503.21460"},{"key":"11471_CR102","first-page":"15497","volume":"37","author":"H Ma","year":"2024","unstructured":"Ma H, Hu T, Pu Z, Boyin L, Ai X et al (2024) Coevolving with the other you: fine-tuning LLM with sequential cooperative multi-agent reinforcement learning. Adv Neural Inf Process Syst 37:15497\u201315525","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR103","unstructured":"Mao S, Cai Y, Xia Y, Wu W, Wang X et\u00a0al (2023) Alympics: Llm agents meet game theory \u2013 exploring strategic decision-making with ai agents. arXiv preprint arXiv:2311.03220"},{"issue":"3","key":"11471_CR104","doi-asserted-by":"crossref","first-page":"173","DOI":"10.23919\/JSC.2025.0014","volume":"6","author":"R Meier","year":"2025","unstructured":"Meier R (2025) Balancing minds and data: the privacy dilemma of llms and anthropomorphism in llms. J Soc Comput 6(3):173\u2013183","journal-title":"J Soc Comput"},{"key":"11471_CR105","unstructured":"Mialon G, Fourrier C, Wolf T, LeCun Y, Scialom T (2023) Gaia: a benchmark for general ai assistants. In: The twelfth international conference on learning representations"},{"key":"11471_CR1","unstructured":"\u201cMultiarith dataset,\u201d Kaggle dataset, 2023, accessed: 2025-08-22. [Online]. Available: https:\/\/www.kaggle.com\/datasets\/dschettler8845\/multiarith-dataset"},{"key":"11471_CR106","doi-asserted-by":"crossref","unstructured":"Muthusamy V, Rizk Y, Kate K, Venkateswaran P et al (2023) Towards large language model-based personal agents in the enterprise: current trends and open problems. Findings of the association for computational linguistics EMNLP 2023:6909\u20136921","DOI":"10.18653\/v1\/2023.findings-emnlp.461"},{"key":"11471_CR108","volume":"67","author":"B Ni","year":"2024","unstructured":"Ni B, Buehler MJ (2024) Mechagents: Large language model multi-agent collaborations can solve mechanics problems, generate new data, and integrate knowledge. Extreme Mech Lett 67:102131","journal-title":"Extreme Mech Lett"},{"key":"11471_CR109","first-page":"123127","volume":"37","author":"Y Ning","year":"2024","unstructured":"Ning Y, Liu H (2024) Urbankgent: A unified large language model agent framework for urban knowledge graph construction. Adv Neural Inf Process Syst 37:123127\u2013123154","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR110","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X, Almeida D, Wainwright C et al (2022) Training language models to follow instructions with human feedback. Adv Neural Inf Process Syst 35:27730\u201327744","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR111","first-page":"126620","volume":"37","author":"J-C Pang","year":"2024","unstructured":"Pang J-C, Yang S-H, Li K, Zhang J, Chen X-H et al (2024a) Kalm: Knowledgeable agents by offline reinforcement learning from large language model rollouts. Adv Neural Inf Process Syst 37:126620\u2013126652","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR112","unstructured":"Pang X, Tang S, Ye R, Xiong Y, Zhang B et\u00a0al (2024b) Self-alignment of large language models via monopolylogue-based social scene simulation. In: Proceedings of the 41st international conference on machine learning, ser. Proceedings of Machine Learning Research, vol. 235. PMLR, pp 39\u00a0416\u201339\u00a0447"},{"key":"11471_CR113","doi-asserted-by":"crossref","unstructured":"Park JS, O\u2019Brien J, Cai CJ, Morris MR, Liang P, Bernstein MS (2023) Generative agents: Interactive simulacra of human behavior. In: Proceedings of the 36th annual acm symposium on user interface software and technology, pp 1\u201322","DOI":"10.1145\/3586183.3606763"},{"key":"11471_CR115","unstructured":"Penedo G, Malartic Q, Hesslow D, Cojocaru R, Cappelli A et\u00a0al (2023) The refinedweb dataset for falcon llm: outperforming curated corpora with web data, and web data only. arXiv preprint arXiv:2306.01116"},{"key":"11471_CR114","unstructured":"Penedo G, Kydl\u00ed\u010dek H, allal LB, Lozhkov A, Mitchell M et\u00a0al (2024) The fineweb datasets: Decanting the web for the finest text data at scale. In: The thirty-eight conference on neural information processing systems datasets and benchmarks track"},{"key":"11471_CR116","doi-asserted-by":"crossref","unstructured":"Perincherry A, Krantz J, Lee S (2025) Do visual imaginations improve vision-and-language navigation agents?. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 3846\u20133855","DOI":"10.1109\/CVPR52734.2025.00364"},{"issue":"22","key":"11471_CR117","doi-asserted-by":"crossref","DOI":"10.1073\/pnas.2415898122","volume":"122","author":"S Peter","year":"2025","unstructured":"Peter S, Riemer K, West JD (2025) The benefits and dangers of anthropomorphic conversational agents. Proc Natl Acad Sci USA 122(22):e2415898122","journal-title":"Proc Natl Acad Sci USA"},{"key":"11471_CR118","doi-asserted-by":"crossref","unstructured":"Press O, Zhang M, Min S, Schmidt L, Smith N, Lewis M (2023) Measuring and narrowing the compositionality gap in language models. In: Findings of the association for computational linguistics: EMNLP. Association for Computational Linguistics 2023:5687\u20135711","DOI":"10.18653\/v1\/2023.findings-emnlp.378"},{"key":"11471_CR120","unstructured":"Qian C, Liu W, Liu H, Chen N, Dang Y et\u00a0al (2023) Chatdev: Communicative agents for software development. arXiv preprint arXiv:2307.07924v5"},{"key":"11471_CR119","doi-asserted-by":"crossref","unstructured":"Qian C, He B, Zhuang Z, Deng J, Qin Y et\u00a0al (2024) Tell me more! towards implicit user intention understanding of language model driven agents. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 1088\u20131113","DOI":"10.18653\/v1\/2024.acl-long.61"},{"key":"11471_CR121","first-page":"114843","volume":"37","author":"S Qiao","year":"2024","unstructured":"Qiao S, Fang R, Zhang N, Zhu Y, Chen X et al (2024a) Agent planning with world knowledge model. Adv Neural Inf Process Syst 37:114843\u2013114871","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR122","doi-asserted-by":"crossref","unstructured":"Qiao S, Zhang N, Fang R, Luo Y, Zhou W et\u00a0al (2024b) Autoact: Automatic agent learning from scratch for qa via self-planning","DOI":"10.18653\/v1\/2024.acl-long.165"},{"issue":"4","key":"11471_CR123","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3704435","volume":"57","author":"Y Qin","year":"2024","unstructured":"Qin Y, Hu S, Lin Y, Chen W, Ding N et al (2024a) Tool learning with foundation models. ACM Comput Surv 57(4):1\u201340","journal-title":"ACM Comput Surv"},{"key":"11471_CR124","unstructured":"Qin Y, Liang S, Ye Y, Zhu K, Yan L et\u00a0al (2024b) ToolLLM: Facilitating large language models to master 16000+ real-world APIs. In: The twelfth international conference on learning representations"},{"key":"11471_CR125","doi-asserted-by":"crossref","unstructured":"Qi Y, Wu Q, Anderson P, Liu M, Shen C, van\u00a0den Hengel A (2020) Reverie: Remote embodied visual referring expression in real indoor environments. arXiv preprint arXiv:1904.10151v2, vol.\u00a02","DOI":"10.1109\/CVPR42600.2020.01000"},{"key":"11471_CR126","first-page":"55249","volume":"37","author":"Y Qu","year":"2024","unstructured":"Qu Y, Zhang T, Garg N, Kumar A (2024) Recursive introspection: teaching language model agents how to self-improve. Adv Neural Inf Process Syst 37:55249\u201355285","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR127","doi-asserted-by":"crossref","unstructured":"Ramos MC, Collison CJ, White AD (2025) A review of large language models and autonomous agents in chemistry. Chem Sci","DOI":"10.1039\/D4SC03921A"},{"key":"11471_CR128","unstructured":"Ritore A, Oprescu AM, Estirado\u00a0Bronchalo A, Armengol de\u00a0la Hoz MA (2024) Covid data for shared learning (cdsl): a comprehensive, multimodal covid-19 dataset from hm hospitales (version 1.0.0). PhysioNet"},{"key":"11471_CR129","unstructured":"Ruan Y, Dong H, Wang A, Pitis S, Zhou Y et\u00a0al (2024) Identifying the risks of LM agents with an LM-emulated sandbox. In: The twelfth international conference on learning representations"},{"key":"11471_CR130","unstructured":"Saxton, Grefenstette, Hill, and Kohli, Analysing mathematical reasoning abilities of neural models. arXiv:1904.01557, 2019"},{"key":"11471_CR131","first-page":"68539","volume":"36","author":"T Schick","year":"2023","unstructured":"Schick T, Dwivedi-Yu J, Dess\u00ec R, Raileanu R, Lomeli M et al (2023) Toolformer: Language models can teach themselves to use tools. Adv Neural Inf Process Syst 36:68539\u201368551","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR132","doi-asserted-by":"crossref","unstructured":"Schmidgall S, Su Y, Wang Z, Sun X, Wu J et\u00a0al (2025) Agent laboratory: Using llm agents as research assistants. arXiv preprint arXiv:2501.04227","DOI":"10.18653\/v1\/2025.findings-emnlp.320"},{"issue":"17","key":"11471_CR133","first-page":"18924","volume":"38","author":"R Schumann","year":"2024","unstructured":"Schumann R, Zhu W, Feng W, Fu T-J, Riezler S, Wang WY (2024) Velma: Verbalization embodiment of llm agents for vision and language navigation in street view. Proc AAAI Conf Artif Intell 38(17):18924\u201318933","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"11471_CR134","doi-asserted-by":"crossref","unstructured":"Shen X, Chen Z, Backes M, Shen Y, Zhang Y (2024) \"Do anything now\": characterizing and evaluating in-the-wild jailbreak prompts on large language models. In: Proceedings of the 2024 on ACM SIGSAC conference on computer and communications security, pp 1671\u20131685","DOI":"10.1145\/3658644.3670388"},{"key":"11471_CR135","first-page":"8634","volume":"36","author":"N Shinn","year":"2023","unstructured":"Shinn N, Cassano F, Gopinath A, Narasimhan K, Yao S (2023) Reflexion: Language agents with verbal reinforcement learning. Adv Neural Inf Process Syst 36:8634\u20138652","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR136","unstructured":"Shi F, Suzgun M, Freitag M, Wang X, Srivats S et\u00a0al (2022) Language models are multilingual chain-of-thought reasoners. arXiv preprint arXiv:2210.03057"},{"key":"11471_CR137","doi-asserted-by":"crossref","unstructured":"Shridhar M, Thomason J, Gordon D, Bisk Y, Han W et\u00a0al (2020) ALFRED: A benchmark for interpreting grounded instructions for everyday tasks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10\u00a0740\u201310\u00a0749","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"11471_CR138","unstructured":"Shridhar M, Yuan X, C\u00f4t\u00e9 MA, Bisk Y, Trischler A, Hausknecht M (2021) ALFWorld: aligning text and embodied environments for interactive learning. In: Proceedings of the international conference on learning representations (ICLR)"},{"key":"11471_CR139","doi-asserted-by":"crossref","unstructured":"Singh H, Verma N, Wang Y, Bharadwaj M, Fashandi H et\u00a0al (2024) Personal large language model agents: a case study on tailored travel planning. In: Proceedings of the 2024 conference on empirical methods in natural language processing: industry track, pp 486\u2013514","DOI":"10.18653\/v1\/2024.emnlp-industry.37"},{"key":"11471_CR140","doi-asserted-by":"crossref","unstructured":"Song CH, Wu J, Washington C, Sadler BM, Chao WL, Su Y (2023) Llm-planner: Few-shot grounded planning for embodied agents with large language models. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2998\u20133009","DOI":"10.1109\/ICCV51070.2023.00280"},{"key":"11471_CR141","unstructured":"Srivastava A, Rastogi A, Rao A, Shoeb AAM, Abid A et\u00a0al (2023) Beyond the imitation game: quantifying and extrapolating the capabilities of language models. Trans Mach Learn Res"},{"issue":"6","key":"11471_CR142","doi-asserted-by":"crossref","first-page":"5681","DOI":"10.1109\/LRA.2025.3562371","volume":"10","author":"C Sun","year":"2025","unstructured":"Sun C, Huang S, Pompili D (2025) Llm-based multi-agent decision-making: challenges and future directions. IEEE Robot Automat Lett 10(6):5681\u20135688","journal-title":"IEEE Robot Automat Lett"},{"key":"11471_CR143","doi-asserted-by":"crossref","unstructured":"Su Y, Yang D, Yao S, Yu T (2024) Language agents: foundations, prospects, and risks. In: Proceedings of the 2024 conference on empirical methods in natural language processing: tutorial abstracts. Miami, Florida, USA: Association for Computational Linguistics, pp 17\u201324","DOI":"10.18653\/v1\/2024.emnlp-tutorials.3"},{"key":"11471_CR144","unstructured":"Talebirad Y, Nadiri A (2023) Multi-agent collaboration: Harnessing the power of intelligent llm agents. arXiv preprint arXiv:2306.03314"},{"key":"11471_CR145","unstructured":"Taori R, Gulrajani I, Zhang T, Dubois Y, Li X, Guestrin C, Liang P, Hashimoto TB (2023) Stanford alpaca: An instruction-following llama model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca"},{"key":"11471_CR146","unstructured":"Tennant E, Hailes S, Musolesi M (2025) Moral alignment for LLM agents. In: The thirteenth international conference on learning representations"},{"key":"11471_CR147","doi-asserted-by":"crossref","unstructured":"Thorne J, Vlachos A, Christodoulopoulos C, Mittal A (2018) FEVER: a large-scale dataset for fact extraction and VERification. In: NAACL-HLT","DOI":"10.18653\/v1\/N18-1074"},{"key":"11471_CR148","doi-asserted-by":"crossref","unstructured":"Tony C, Mutas M, D\u00edaz\u00a0Ferreyra N, Scandariato R (2023) Llmseceval: A dataset of natural language prompts for security evaluations. In: 2023 IEEE\/ACM 20th international conference on mining software repositories (MSR)","DOI":"10.1109\/MSR59073.2023.00084"},{"key":"11471_CR149","doi-asserted-by":"crossref","first-page":"539","DOI":"10.1162\/tacl_a_00475","volume":"10","author":"H Trivedi","year":"2022","unstructured":"Trivedi H, Balasubramanian N, Khot T, Sabharwal A (2022) Musique: Multihop questions via single-hop question composition. Trans Assoc Comput Linguist 10:539\u2013554","journal-title":"Trans Assoc Comput Linguist"},{"key":"11471_CR150","unstructured":"Tse Huang J, Zhou J, Jin T, Zhou X, Chen Z et\u00a0al (2025) On the resilience of LLM-based multi-agent collaboration with faulty agents. In: Forty-second international conference on machine learning"},{"key":"11471_CR107","unstructured":"U.S. National Library of Medicine, Pubmed baseline repository. https:\/\/www.nlm.nih.gov\/databases\/download\/pubmed_medline.html, 2023, courtesy of the National Library of Medicine"},{"key":"11471_CR151","first-page":"38975","volume":"36","author":"K Valmeekam","year":"2023","unstructured":"Valmeekam K, Marquez M, Olmo A, Sreedharan S, Kambhampati S (2023) Planbench: An extensible benchmark for evaluating large language models on planning and reasoning about change. Adv Neural Inf Process Syst 36:38975\u201338987","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR152","unstructured":"Walke HR, Black K, Zhao TZ, Vuong Q, Zheng C et\u00a0al (2023) Bridgedata v2: A dataset for robot learning at scale. In: Conference on robot learning. PMLR, pp 1723\u20131736"},{"key":"11471_CR158","unstructured":"Wang Z, Cai S, Chen G, Liu A, Ma X et\u00a0al (2023a) Describe, explain, plan and select: interactive planning with large language models enables open-world multi-task agents. In: Proceedings of the 37th international conference on neural information processing systems, ser. NIPS \u201923. Red Hook, NY, USA: Curran Associates Inc"},{"key":"11471_CR159","unstructured":"Wang P, Li L, Shao Z, Xu R, Dai D et\u00a0al (2023b) Math-shepherd: Verify and reinforce llms step-by-step without human annotations. arXiv preprint arXiv:2312.08935v3"},{"key":"11471_CR161","unstructured":"Wang G, Xie Y, Jiang Y, Mandlekar A, Xiao C et\u00a0al (2023c) Voyager: An open-ended embodied agent with large language models. arXiv preprint arXiv:2305.16291"},{"key":"11471_CR163","unstructured":"Wang Z, Zhang G, Yang K, Shi N, Zhou W et\u00a0al (2023d) Interactive natural language processing. arXiv preprint arXiv:2305.13246"},{"key":"11471_CR153","first-page":"73278","volume":"37","author":"Z Wang","year":"2024","unstructured":"Wang Z, Cai S, Mu Z, Lin H, Zhang C et al (2024a) Omnijarvis: Unified vision-language-action tokenization enables open-world instruction following agents. Adv Neural Inf Process Syst 37:73278\u201373308","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR154","volume":"18","author":"L Wang","year":"2024","unstructured":"Wang L, Ma C, Feng X et al (2024b) A survey on large language model based autonomous agents. Front Comp Sci 18:186345","journal-title":"Front Comp Sci"},{"key":"11471_CR160","doi-asserted-by":"crossref","unstructured":"Wang Z, Liu Z, Zhang Y, Zhong A, Wang J et\u00a0al (2024c) Rcagent: Cloud root cause analysis by autonomous agents with tool-augmented large language models. In: Proceedings of the 33rd ACM international conference on information and knowledge management, pp 4966\u2013497","DOI":"10.1145\/3627673.3680016"},{"key":"11471_CR162","doi-asserted-by":"crossref","unstructured":"Wang R, Yu H, Zhang W, Qi Z, Sap M et\u00a0al (2024d) SOTOPIA-$$\\pi $$: Interactive learning of socially intelligent language agents. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 12912\u201312940","DOI":"10.18653\/v1\/2024.acl-long.698"},{"issue":"8","key":"11471_CR155","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1007\/s10462-025-11222-w","volume":"58","author":"X Wang","year":"2025","unstructured":"Wang X, Jiang H, Yu Y, Yu J, Lin Y et al (2025a) Building intelligence identification system via large language model watermarking: a survey and beyond. Artif Intell Rev 58(8):249","journal-title":"Artif Intell Rev"},{"issue":"2","key":"11471_CR156","first-page":"1","volume":"43","author":"L Wang","year":"2025","unstructured":"Wang L, Zhang J, Yang H, Chen Z-Y, Tang J et al (2025b) User behavior simulation with large language model-based agents. ACM Trans Inform Syst 43(2):1\u201337","journal-title":"ACM Trans Inform Syst"},{"key":"11471_CR157","first-page":"2250","volume":"2025","author":"Z Wang","year":"2025","unstructured":"Wang Z, Zhu Y, Zhao H, Zheng X, Sui D et al (2025c) Colacare: Enhancing electronic health record modeling through large language model-driven multi-agent collaboration. Proc ACM Web Conf 2025:2250\u20132261","journal-title":"Proc ACM Web Conf"},{"key":"11471_CR164","unstructured":"Weber M, Fu DY, Anthony Q, Oren Y, Adams S et\u00a0al (2024) Redpajama: an open dataset for training large language models. NeurIPS Datasets and Benchmarks Track"},{"key":"11471_CR166","unstructured":"Wei J, Wang X, Schuurmans D, Bosma M, Ichter B et\u00a0al (2022) Chain-of-thought prompting elicits reasoning in large language models. In: Proceedings of the 36th international conference on neural information processing systems, ser. NIPS \u201922. Red Hook, NY, USA: Curran Associates Inc"},{"key":"11471_CR165","doi-asserted-by":"crossref","unstructured":"Wei Y, Wang Z, Lu Y, Xu C, Liu C et\u00a0al (2024) Editable scene simulation for autonomous driving via collaborative llm-agents. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 15077\u201315087","DOI":"10.1109\/CVPR52733.2024.01428"},{"key":"11471_CR167","unstructured":"Wijk H, Lin T, Becker J, Jawhar S, Parikh N et\u00a0al (2025) RE-bench: Evaluating frontier AI r &d capabilities of language model agents against human experts. In: Forty-second international conference on machine learning"},{"issue":"10","key":"11471_CR168","doi-asserted-by":"crossref","first-page":"3184","DOI":"10.1109\/TCAD.2024.3383347","volume":"43","author":"H Wu","year":"2024","unstructured":"Wu H, He Z, Zhang X, Yao X, Zheng S et al (2024a) Chateda: A large language model powered autonomous agent for eda. IEEE Trans Comput Aided Des Integr Circ Syst 43(10):3184\u20133197","journal-title":"IEEE Trans Comput Aided Des Integr Circ Syst"},{"key":"11471_CR169","first-page":"25981","volume":"37","author":"S Wu","year":"2024","unstructured":"Wu S, Zhao S, Huang Q, Huang K, Yasunaga M et al (2024b) Avatar: Optimizing llm agents for tool usage via contrastive reasoning. Adv Neural Inf Process Syst 37:25981\u201326010","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR170","unstructured":"Wu Q, Bansal G, Zhang J, Wu Y, Li B et\u00a0al (2024c) Autogen: Enabling next-gen LLM applications via multi-agent conversations. In: First conference on language modeling"},{"key":"11471_CR171","unstructured":"Wu S, Zhao S, Yasunaga M, Huang K, Cao K et\u00a0al (2024d) Stark: Benchmarking llm retrieval on textual and relational knowledge bases. In: NeurIPS datasets and benchmarks track"},{"key":"11471_CR172","volume":"68","author":"Z Xi","year":"2025","unstructured":"Xi Z, Chen W, Guo X et al (2025) The rise and potential of large language model based agents: a survey. Sci China Inf Sci 68:121101","journal-title":"Sci China Inf Sci"},{"key":"11471_CR173","doi-asserted-by":"crossref","first-page":"84863","DOI":"10.1109\/ACCESS.2024.3415470","volume":"12","author":"Y Xia","year":"2024","unstructured":"Xia Y, Xiao Z, Jazdi N, Weyrich M (2024) Generation of asset administration shell with large language model agents: toward semantic interoperability in digital twins in the context of industry 4.0. IEEE Access 12:84863\u201384877","journal-title":"IEEE Access"},{"key":"11471_CR175","unstructured":"Xia Y, Shen W, Wang Y, Liu JK, Sun H et\u00a0al (2025) Leetcodedataset: A temporal dataset for robust evaluation and efficient training of code llms. arXiv preprint arXiv:2504.14655"},{"key":"11471_CR174","unstructured":"Xiang Z, Zheng L, Li Y, Hong J, Li Q et\u00a0al (2025) Guardagent: safeguard LLM agents via knowledge-enabled reasoning. In: ICML 2025 workshop on computer use agents"},{"key":"11471_CR176","first-page":"15674","volume":"37","author":"C Xie","year":"2024","unstructured":"Xie C, Chen C, Jia F, Ye Z, Lai S et al (2024a) Can large language model agents simulate human trust behavior? Adv Neural Inf Process Syst 37:15674\u201315729","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR177","unstructured":"Xie J, Zhang K, Chen J, Zhu T, Lou R et\u00a0al (2024b) Travelplanner: A benchmark for real-world planning with language agents. arXiv preprint arXiv: 2402.01622"},{"key":"11471_CR178","doi-asserted-by":"crossref","unstructured":"Xing M, Zhang R, Xue H, Chen Q, Yang F, Xiao Z (2024) Understanding the weakness of large language model agents within a complex android environment. In: Proceedings of the 30th ACM SIGKDD conference on knowledge discovery and data mining, pp 6061\u20136072","DOI":"10.1145\/3637528.3671650"},{"key":"11471_CR179","doi-asserted-by":"crossref","unstructured":"Xue X, Lu Z, Huang D, Wang Z, Ouyang W, Bai L (2025) Comfybench: Benchmarking llm-based agents in comfyui for autonomously designing collaborative ai systems. In: Proceedings of the computer vision and pattern recognition conference, pp 24\u00a0614\u201324\u00a0624","DOI":"10.1109\/CVPR52734.2025.02292"},{"key":"11471_CR182","doi-asserted-by":"crossref","unstructured":"Xu M, Niyato D, Kang J, Xiong Z, Mao S et\u00a0al (2024a) When large language model agents meet 6g networks: perception, grounding, and alignment. IEEE Wirel Commun","DOI":"10.1109\/MWC.005.2400019"},{"key":"11471_CR183","unstructured":"Xu Y, Su H, Xing C, Mi B, Liu Q et\u00a0al (2024b) Lemur: Harmonizing natural language and code for language agents. In: The twelfth international conference on learning representations"},{"key":"11471_CR180","unstructured":"Xu W, Huang C, Gao S et\u00a0al (2025a) Llm-based agents for tool learning: a survey. Data Sci Eng, pp 1\u201331"},{"key":"11471_CR181","unstructured":"Xu W, Mei K, Gao H, Tan J, Liang Z, Zhang Y (2025b) A-mem: Agentic memory for llm agents. arXiv preprint arXiv:2502.12110"},{"key":"11471_CR187","doi-asserted-by":"crossref","unstructured":"Yang S, Luo P, Loy CC, Tang X (2016) Wider face: a face detection benchmark. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5525\u20135533","DOI":"10.1109\/CVPR.2016.596"},{"key":"11471_CR188","doi-asserted-by":"crossref","unstructured":"Yang Z, Qi P, Zhang S, Bengio Y, Cohen W et\u00a0al (2018) HotpotQA: A dataset for diverse, explainable multi-hop question answering. In: Proceedings of the 2018 conference on empirical methods in natural language processing. Association for Computational Linguistics, pp 2369\u20132380","DOI":"10.18653\/v1\/D18-1259"},{"key":"11471_CR184","first-page":"23826","volume":"36","author":"J Yang","year":"2023","unstructured":"Yang J, Prabhakar A, Narasimhan K, Yao S (2023) Intercode: Standardizing and benchmarking interactive coding with execution feedback. Adv Neural Inf Process Syst 36:23826\u201323854","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR185","first-page":"100938","volume":"37","author":"W Yang","year":"2024","unstructured":"Yang W, Bi X, Lin Y, Chen S, Zhou J, Sun X (2024a) Watch out for your agents! investigating backdoor threats to llm-based agents. Adv Neural Inf Process Syst 37:100938\u2013100964","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR186","doi-asserted-by":"crossref","unstructured":"Yang J, Chen X, Qian S, Madaan N, Iyengar M et\u00a0al (2024b) Llm-grounder: Open-vocabulary 3d visual grounding with large language model as an agent. In: 2024 IEEE international conference on robotics and automation (ICRA). IEEE, pp 7694\u20137701","DOI":"10.1109\/ICRA57147.2024.10610443"},{"key":"11471_CR189","doi-asserted-by":"crossref","unstructured":"Yang Z, Xu X, Yao B, Rogers E, Zhang S et al (2024c) Talk2care: An llm-based voice assistant for communication between healthcare providers and older adults. In: Proceedings of the ACM on interactive mobile wearable and ubiquitous technologies 8(2):1\u201335","DOI":"10.1145\/3659625"},{"key":"11471_CR190","unstructured":"Yang Y, Zhou T, Li K, Tao D, Li L et\u00a0al (2024d) Embodied multi-modal agent trained by an llm from a parallel textworld. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 26\u00a0275\u201326\u00a0285"},{"key":"11471_CR191","first-page":"11809","volume":"36","author":"S Yao","year":"2023","unstructured":"Yao S, Yu D, Zhao J, Shafran I, Griffiths T et al (2023a) Tree of thoughts: deliberate problem solving with large language models. Adv Neural Inf Process Syst 36:11809\u201311822","journal-title":"Adv Neural Inf Process Syst"},{"key":"11471_CR192","unstructured":"Yao S, Zhao J, Yu D, Du N, Shafran I et\u00a0al (2023b) React: Synergizing reasoning and acting in language models. In: International conference on learning representations (ICLR)"},{"key":"11471_CR193","doi-asserted-by":"publisher","unstructured":"Yim WW, Fu Y, Ben\u00a0Abacha A, Snider N, Lin T, Yetisgen M (2023) aci-bench-corpus.zip. Figshare. Dataset, [Online]. Available: https:\/\/doi.org\/10.6084\/m9.figshare.22494601.v1","DOI":"10.6084\/m9.figshare.22494601.v1"},{"key":"11471_CR195","doi-asserted-by":"crossref","unstructured":"Yin P, Deng B, Chen E, Vasilescu B, Neubig G (2018) Learning to mine aligned code and natural language pairs from stack overflow. In: IEEE\/ACM 15th international conference on mining software repositories (MSR). IEEE 2018:476\u2013486","DOI":"10.1145\/3196398.3196408"},{"key":"11471_CR194","doi-asserted-by":"crossref","unstructured":"Yin D, Brahman F, Ravichander A, Chandu K, Chang KW et\u00a0al (2024) Agent lumos: Unified and modular training for open-source language agents. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 12380\u201312403","DOI":"10.18653\/v1\/2024.acl-long.670"},{"key":"11471_CR196","doi-asserted-by":"crossref","unstructured":"Yuan S, Song K, Chen J, Tan X, Shen Y et\u00a0al (2025) EASYTOOL: enhancing LLM-based agents with concise tool instruction. In: Proceedings of the 2025 conference of the nations of the Americas chapter of the association for computational linguistics: human language technologies (Volume 1: Long Papers). Association for Computational Linguistics, pp 951\u2013972","DOI":"10.18653\/v1\/2025.naacl-long.44"},{"key":"11471_CR197","doi-asserted-by":"crossref","unstructured":"Zeng X, Wang X, Zhang T, Yu C, Zhao S, Chen Y (2024) Gesturegpt: Toward zero-shot free-form hand gesture understanding with large language model agents. In: Proceedings of the ACM on human-computer interaction 8(ISS):462\u2013499","DOI":"10.1145\/3698145"},{"key":"11471_CR198","first-page":"110935","volume":"37","author":"S Zhai","year":"2024","unstructured":"Zhai S, Bai H, Lin Z, Pan J, Tong P et al (2024) Fine-tuning large vision-language models as decision-making agents via reinforcement learning. Adv Neural Inf Process Syst 37:110935\u2013110971","journal-title":"Adv Neural Inf Process Syst"},{"issue":"5","key":"11471_CR199","doi-asserted-by":"crossref","first-page":"1038","DOI":"10.1109\/TMM.2018.2808769","volume":"20","author":"Y Zhang","year":"2018","unstructured":"Zhang Y, Cao C, Cheng J, Lu H (2018) Egogesture: A new dataset and benchmark for egocentric hand gesture recognition. IEEE Trans Multimedia 20(5):1038\u20131050","journal-title":"IEEE Trans Multimedia"},{"key":"11471_CR200","first-page":"78227","volume":"36","author":"D Zhang","year":"2023","unstructured":"Zhang D, Chen L, Zhang S, Xu H, Zhao Z, Yu K (2023) Large language models are semi-parametric reinforcement learning agents. Adv Neural Inf Process Syst 36:78227\u201378239","journal-title":"Adv Neural Inf Process Syst"},{"issue":"32","key":"11471_CR201","doi-asserted-by":"crossref","first-page":"2405163","DOI":"10.1002\/adma.202405163","volume":"36","author":"Q Zhang","year":"2024","unstructured":"Zhang Q, Hu Y, Yan J, Zhang H, Xie X et al (2024a) Large-language-model-based ai agent for organic semiconductor device research. Adv Mater 36(32):2405163","journal-title":"Adv Mater"},{"key":"11471_CR202","first-page":"132208","volume":"37","author":"Y Zhang","year":"2024","unstructured":"Zhang Y, Sun R, Chen Y, Pfister T, Zhang R, Arik S (2024b) Chain of agents: large language models collaborating on long-context tasks. Adv Neural Inf Process Syst 37:132208\u2013132237","journal-title":"Adv Neural Inf Process Syst"},{"issue":"6","key":"11471_CR203","doi-asserted-by":"crossref","first-page":"5242","DOI":"10.1109\/TDSC.2024.3372777","volume":"21","author":"X Zhang","year":"2024","unstructured":"Zhang X, Xu H, Ba Z, Wang Z, Hong Y et al (2024c) Privacyasst: Safeguarding user privacy in tool-using large language model agents. IEEE Trans Depend Secure Comput 21(6):5242\u20135258","journal-title":"IEEE Trans Depend Secure Comput"},{"issue":"16","key":"11471_CR204","first-page":"17591","volume":"38","author":"C Zhang","year":"2024","unstructured":"Zhang C, Yang K, Hu S, Wang Z, Li G et al (2024d) Proagent: building proactive cooperative agents with large language models. Proceedings AAAI Conf Artif Intell 38(16):17591\u201317599","journal-title":"Proceedings AAAI Conf Artif Intell"},{"key":"11471_CR205","doi-asserted-by":"crossref","unstructured":"Zhang S, Dong Y, Zhang Y, Payne TR, Zhang J (2024e) Large language model assissted multi-agent dialogue for ontology alignment. In: Proceedings of the 23rd international conference on autonomous agents and multiagent systems, pp 2594\u20132596","DOI":"10.65109\/PXEC2992"},{"key":"11471_CR206","doi-asserted-by":"crossref","unstructured":"Zhang R, Du H, Liu Y, Niyato D, Kang J et\u00a0al (2024f) Generative ai agents with large language model for satellite networks via a mixture of experts transmission. IEEE J Selected Areas Commun","DOI":"10.1109\/JSAC.2024.3459037"},{"key":"11471_CR207","unstructured":"Zhang H, Du W, Shan J, Zhou Q, Du Y et\u00a0al (2024g) Building cooperative embodied agents modularly with large language models. In: The Twelfth international conference on learning representations"},{"key":"11471_CR208","doi-asserted-by":"crossref","unstructured":"Zhang Y, Jiang Q, XingyuHan X, Chen N, Yang Y, Ren K (2024h) Benchmarking data science agents. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 5677\u20135700","DOI":"10.18653\/v1\/2024.acl-long.308"},{"key":"11471_CR209","doi-asserted-by":"crossref","unstructured":"Zhang J, Xu X, Zhang N, Liu R, Hooi B, Deng S (2024i) Exploring collaboration mechanisms for LLM agents: a social psychology view. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 14 544\u201314 607","DOI":"10.18653\/v1\/2024.acl-long.782"},{"key":"11471_CR210","doi-asserted-by":"crossref","unstructured":"Zhang Y, Yuan S, Hu C, Richardson K, Xiao Y, Chen J (2024j) TimeArena: Shaping efficient multitasking language agents in a time-aware simulation. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics, pp 3894\u20133916","DOI":"10.18653\/v1\/2024.acl-long.215"},{"key":"11471_CR211","unstructured":"Zhang S, Zhang J, Liu J, Song L, Wang C et\u00a0al (2024k) Offline training of language model agents with functions as learnable weights. In: Proceedings of the 41st international conference on machine learning, ser. Proceedings of Machine Learning Research, vol. 235. PMLR, 21\u201327, pp 60315\u201360335"},{"key":"11471_CR212","unstructured":"Zhao W, Ren X, Hessel J, Cardie C, Choi Y, Deng Y (2024a) Wildchat: 1m chatGPT interaction logs in the wild. In: the twelfth international conference on learning representations"},{"key":"11471_CR213","unstructured":"Zhao Q, Wang J, Zhang Y, Jin Y, Zhu K, et\u00a0al (2024b) CompeteAI: Understanding the competition dynamics of large language model-based agents. In: Proceedings of the 41st international conference on machine learning, ser. Proceedings of Machine Learning Research, vol. 235. PMLR, pp 61\u00a0092\u201361\u00a0107"},{"key":"11471_CR214","unstructured":"Zheng S, jiazheng liu, Feng Y, Lu Z (2024) Steve-eye: Equipping LLM-based embodied agents with visual perception in open worlds. In: The Twelfth international conference on learning representations"},{"key":"11471_CR215","doi-asserted-by":"crossref","unstructured":"Zhong M, Yin D, Yu T, Zaidi A, Mutuma M et\u00a0al (2021) QMSum: a new benchmark for query-based multi-domain meeting summarization. In: North American association for computational linguistics (NAACL)","DOI":"10.18653\/v1\/2021.naacl-main.472"},{"key":"11471_CR216","first-page":"4575","volume":"37","author":"H Zhou","year":"2024","unstructured":"Zhou H, Tang Y, Qin H, Yang Y, Jin R et al (2024) Star-agents: Automatic data optimization with llm agents for instruction tuning. Adv Neural Inf Process Syst 37:4575\u20134597","journal-title":"Adv Neural Inf Process Syst"},{"issue":"1","key":"11471_CR217","first-page":"655","volume":"38","author":"F Zhu","year":"2024","unstructured":"Zhu F, Simmons R (2024) Bootstrapping cognitive agents with a large language model. Proc AAAI Conf Artif Intell 38(1):655\u2013663","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"11471_CR218","unstructured":"Zou A, Wang Z, Carlini N, Nasr M, Kolter JZ, Fredrikson M (2023) Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-025-11471-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11471-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11471-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T05:46:48Z","timestamp":1771480008000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-025-11471-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,6]]},"references-count":218,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2026,2]]}},"alternative-id":["11471"],"URL":"https:\/\/doi.org\/10.1007\/s10462-025-11471-9","relation":{},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,6]]},"assertion":[{"value":"25 August 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}],"article-number":"71"}}