{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:05:29Z","timestamp":1776107129428,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":95,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709321","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:48:32Z","timestamp":1743792512000},"page":"496-507","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["<scp>AgentGen:<\/scp>\n            Enhancing Planning Abilities for Large Language Model based Agent via Environment and Task Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3779-3378","authenticated-orcid":false,"given":"Mengkang","family":"Hu","sequence":"first","affiliation":[{"name":"The University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4518-323X","authenticated-orcid":false,"given":"Pu","family":"Zhao","sequence":"additional","affiliation":[{"name":"Microsoft, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1949-5715","authenticated-orcid":false,"given":"Can","family":"Xu","sequence":"additional","affiliation":[{"name":"Microsoft, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4000-6717","authenticated-orcid":false,"given":"Qingfeng","family":"Sun","sequence":"additional","affiliation":[{"name":"Microsoft, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8496-033X","authenticated-orcid":false,"given":"Jian-Guang","family":"Lou","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2559-2383","authenticated-orcid":false,"given":"Qingwei","family":"Lin","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6685-7950","authenticated-orcid":false,"given":"Ping","family":"Luo","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2019-213X","authenticated-orcid":false,"given":"Saravan","family":"Rajmohan","sequence":"additional","affiliation":[{"name":"Microsoft, Seattle, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Can large language models be good path planners? a benchmark and investigation on spatial-temporal reasoning. arXiv preprint arXiv:2310.03249","author":"Aghzal Mohamed","year":"2023","unstructured":"Mohamed Aghzal, Erion Plaku, and Ziyu Yao. 2023. Can large language models be good path planners? a benchmark and investigation on spatial-temporal reasoning. arXiv preprint arXiv:2310.03249 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Ajay Anurag","year":"2024","unstructured":"Anurag Ajay, Seungwook Han, Yilun Du, Shuang Li, Abhi Gupta, Tommi Jaakkola, Josh Tenenbaum, Leslie Kaelbling, Akash Srivastava, and Pulkit Agrawal. 2024. Compositional foundation models for hierarchical planning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_3_1","volume-title":"Learning and leveraging verifiers to improve planning capabilities of pre-trained language models. arXiv preprint arXiv:2305.17077","author":"Arora Daman","year":"2023","unstructured":"Daman Arora and Subbarao Kambhampati. 2023. Learning and leveraging verifiers to improve planning capabilities of pre-trained language models. arXiv preprint arXiv:2305.17077 (2023)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3643757"},{"key":"e_1_3_2_1_5_1","volume-title":"Conference on robot learning. PMLR, 287--318","author":"Brohan Anthony","year":"2023","unstructured":"Anthony Brohan, Yevgen Chebotar, Chelsea Finn, Karol Hausman, Alexander Herzog, Daniel Ho, Julian Ibarz, Alex Irpan, Eric Jang, Ryan Julian, et al. 2023. Do as i can, not as i say: Grounding language in robotic affordances. In Conference on robot learning. PMLR, 287--318."},{"key":"e_1_3_2_1_6_1","volume-title":"Fireact: Toward language agent fine-tuning. arXiv preprint arXiv:2310.05915","author":"Chen Baian","year":"2023","unstructured":"Baian Chen, Chang Shu, Ehsan Shareghi, Nigel Collier, Karthik Narasimhan, and Shunyu Yao. 2023. Fireact: Toward language agent fine-tuning. arXiv preprint arXiv:2310.05915 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al.","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde De Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. 2021. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)."},{"key":"e_1_3_2_1_8_1","volume-title":"Agent-FLAN: Designing Data and Methods of Effective Agent Tuning for Large Language Models. arXiv preprint arXiv:2403.12881","author":"Chen Zehui","year":"2024","unstructured":"Zehui Chen, Kuikun Liu, Qiuchen Wang, Wenwei Zhang, Jiangning Liu, Dahua Lin, Kai Chen, and Feng Zhao. 2024. Agent-FLAN: Designing Data and Methods of Effective Agent Tuning for Large Language Models. arXiv preprint arXiv:2403.12881 (2024)."},{"key":"e_1_3_2_1_9_1","unstructured":"Zhoujun Cheng Tianbao Xie Peng Shi Chengzu Li Rahul Nadkarni Yushi Hu Caiming Xiong Dragomir Radev Mari Ostendorf Luke Zettlemoyer et al. 2022. Binding language models in symbolic languages. arXiv preprint arXiv:2210.02875 (2022)."},{"key":"e_1_3_2_1_10_1","volume-title":"Thien Huu Nguyen, and Yoshua Bengio","author":"Chevalier-Boisvert Maxime","year":"2018","unstructured":"Maxime Chevalier-Boisvert, Dzmitry Bahdanau, Salem Lahlou, Lucas Willems, Chitwan Saharia, Thien Huu Nguyen, and Yoshua Bengio. 2018. Babyai: A platform to study the sample efficiency of grounded language learning. arXiv preprint arXiv:1810.08272 (2018)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Yan Ding Xiaohan Zhang Chris Paxton and Shiqi Zhang. 2023. Task and Motion Planning with Large Language Models for Object Rearrangement. arxiv: 2303.06247 [cs.RO]","DOI":"10.1109\/IROS55552.2023.10342169"},{"key":"e_1_3_2_1_12_1","volume-title":"DAG-Plan: Generating Directed Acyclic Dependency Graphs for Dual-Arm Cooperative Planning. arXiv preprint arXiv:2406.09953","author":"Gao Zeyu","year":"2024","unstructured":"Zeyu Gao, Yao Mu, Jinye Qu, Mengkang Hu, Lingyue Guo, Ping Luo, and Yanfeng Lu. 2024. DAG-Plan: Generating Directed Acyclic Dependency Graphs for Dual-Arm Cooperative Planning. arXiv preprint arXiv:2406.09953 (2024)."},{"key":"e_1_3_2_1_13_1","first-page":"79081","article-title":"Leveraging pre-trained large language models to construct and utilize world models for model-based task planning","volume":"36","author":"Guan Lin","year":"2023","unstructured":"Lin Guan, Karthik Valmeekam, Sarath Sreedharan, and Subbarao Kambhampati. 2023. Leveraging pre-trained large language models to construct and utilize world models for model-based task planning. Advances in Neural Information Processing Systems, Vol. 36 (2023), 79081--79094.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_14_1","volume-title":"Zhen Wang, Daisy Zhe Wang, and Zhiting Hu.","author":"Hao Shibo","year":"2023","unstructured":"Shibo Hao, Yi Gu, Haodi Ma, Joshua Jiahua Hong, Zhen Wang, Daisy Zhe Wang, and Zhiting Hu. 2023. Reasoning with language model is planning with world model. arXiv preprint arXiv:2305.14992 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6297"},{"key":"e_1_3_2_1_16_1","volume-title":"Zijuan Lin, Liyang Zhou, et al.","author":"Hong Sirui","year":"2023","unstructured":"Sirui Hong, Xiawu Zheng, Jonathan Chen, Yuheng Cheng, Jinlin Wang, Ceyao Zhang, Zili Wang, Steven Ka Shing Yau, Zijuan Lin, Liyang Zhou, et al. 2023. MetaGPT: Meta programming for multi-agent collaborative framework. arXiv preprint arXiv:2308.00352 (2023)."},{"key":"e_1_3_2_1_17_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_18_1","volume-title":"Tree-planner: Efficient close-loop task planning with large language models. arXiv preprint arXiv:2310.08582","author":"Hu Mengkang","year":"2023","unstructured":"Mengkang Hu, Yao Mu, Xinmiao Yu, Mingyu Ding, Shiguang Wu, Wenqi Shao, Qiguang Chen, Bin Wang, Yu Qiao, and Ping Luo. 2023. Tree-planner: Efficient close-loop task planning with large language models. arXiv preprint arXiv:2310.08582 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"International conference on machine learning. PMLR, 9118--9147","author":"Huang Wenlong","year":"2022","unstructured":"Wenlong Huang, Pieter Abbeel, Deepak Pathak, and Igor Mordatch. 2022a. Language models as zero-shot planners: Extracting actionable knowledge for embodied agents. In International conference on machine learning. PMLR, 9118--9147."},{"key":"e_1_3_2_1_20_1","volume-title":"Grounded Decoding: Guiding Text Generation with Grounded Models for Robot Control. arxiv: 2303.00855 [cs.RO]","author":"Huang Wenlong","year":"2023","unstructured":"Wenlong Huang, Fei Xia, Dhruv Shah, Danny Driess, Andy Zeng, Yao Lu, Pete Florence, Igor Mordatch, Sergey Levine, Karol Hausman, and Brian Ichter. 2023b. Grounded Decoding: Guiding Text Generation with Grounded Models for Robot Control. arxiv: 2303.00855 [cs.RO]"},{"key":"e_1_3_2_1_21_1","volume-title":"Inner Monologue: Embodied Reasoning through Planning with Language Models. arxiv: 2207.05608 [cs.RO]","author":"Huang Wenlong","year":"2022","unstructured":"Wenlong Huang, Fei Xia, Ted Xiao, Harris Chan, Jacky Liang, Pete Florence, Andy Zeng, Jonathan Tompson, Igor Mordatch, Yevgen Chebotar, Pierre Sermanet, Noah Brown, Tomas Jackson, Linda Luu, Sergey Levine, Karol Hausman, and Brian Ichter. 2022b. Inner Monologue: Embodied Reasoning through Planning with Language Models. arxiv: 2207.05608 [cs.RO]"},{"key":"e_1_3_2_1_22_1","volume-title":"Recommender ai agent: Integrating large language models for interactive recommendations. arXiv preprint arXiv:2308.16505","author":"Huang Xu","year":"2023","unstructured":"Xu Huang, Jianxun Lian, Yuxuan Lei, Jing Yao, Defu Lian, and Xing Xie. 2023a. Recommender ai agent: Integrating large language models for interactive recommendations. arXiv preprint arXiv:2308.16505 (2023)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"e_1_3_2_1_24_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980391"},{"key":"e_1_3_2_1_26_1","volume-title":"But Can Help Planning in LLM-Modulo Frameworks. arXiv preprint arXiv:2402.01817","author":"Kambhampati Subbarao","year":"2024","unstructured":"Subbarao Kambhampati, Karthik Valmeekam, Lin Guan, Kaya Stechly, Mudit Verma, Siddhant Bhambri, Lucas Saldyt, and Anil Murthy. 2024. LLMs Can't Plan, But Can Help Planning in LLM-Modulo Frameworks. arXiv preprint arXiv:2402.01817 (2024)."},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Lai Yuhang","year":"2023","unstructured":"Yuhang Lai, Chengxi Li, Yiming Wang, Tianyi Zhang, Ruiqi Zhong, Luke Zettlemoyer, Wen-tau Yih, Daniel Fried, Sida Wang, and Tao Yu. 2023. DS-1000: A natural and reliable benchmark for data science code generation. In International Conference on Machine Learning. PMLR, 18319--18345."},{"key":"e_1_3_2_1_28_1","volume-title":"Api-bank: A comprehensive benchmark for tool-augmented llms. arXiv preprint arXiv:2304.08244","author":"Li Minghao","year":"2023","unstructured":"Minghao Li, Yingxiu Zhao, Bowen Yu, Feifan Song, Hangyu Li, Haiyang Yu, Zhoujun Li, Fei Huang, and Yongbin Li. 2023. Api-bank: A comprehensive benchmark for tool-augmented llms. arXiv preprint arXiv:2304.08244 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Unleashing infinite-length input capacity for large-scale language models with self-controlled memory system. arXiv e-prints","author":"Liang Xinnian","year":"2023","unstructured":"Xinnian Liang, Bing Wang, Hui Huang, Shuangzhi Wu, Peihao Wu, Lu Lu, Zejun Ma, and Zhoujun Li. 2023. Unleashing infinite-length input capacity for large-scale language models with self-controlled memory system. arXiv e-prints (2023), arXiv-2304."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i11.26549"},{"key":"e_1_3_2_1_31_1","volume-title":"Llm p: Empowering large language models with optimal planning proficiency. arXiv preprint arXiv:2304.11477","author":"Liu Bo","year":"2023","unstructured":"Bo Liu, Yuqian Jiang, Xiaohan Zhang, Qiang Liu, Shiqi Zhang, Joydeep Biswas, and Peter Stone. 2023a. Llm p: Empowering large language models with optimal planning proficiency. arXiv preprint arXiv:2304.11477 (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"Think-in-memory: Recalling and post-thinking enable llms with long-term memory. arXiv preprint arXiv:2311.08719","author":"Liu Lei","year":"2023","unstructured":"Lei Liu, Xiaoyan Yang, Yue Shen, Binbin Hu, Zhiqiang Zhang, Jinjie Gu, and Guannan Zhang. 2023b. Think-in-memory: Recalling and post-thinking enable llms with long-term memory. arXiv preprint arXiv:2311.08719 (2023)."},{"key":"e_1_3_2_1_33_1","unstructured":"Xiao Liu Hao Yu Hanchen Zhang Yifan Xu Xuanyu Lei Hanyu Lai Yu Gu Hangliang Ding Kaiwen Men Kejuan Yang et al. 2023c. AgentBench: Evaluating llms as agents. arXiv preprint arXiv:2308.03688 (2023)."},{"key":"e_1_3_2_1_34_1","volume-title":"Tool-Planner: Dynamic Solution Tree Planning for Large Language Model with Tool Clustering. arXiv preprint arXiv:2406.03807","author":"Liu Yanming","year":"2024","unstructured":"Yanming Liu, Xinyue Peng, Yuwei Zhang, Jiannan Cao, Xuhong Zhang, Sheng Cheng, Xun Wang, Jianwei Yin, and Tianyu Du. 2024. Tool-Planner: Dynamic Solution Tree Planning for Large Language Model with Tool Clustering. arXiv preprint arXiv:2406.03807 (2024)."},{"key":"e_1_3_2_1_35_1","volume-title":"Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct. arXiv preprint arXiv:2308.09583","author":"Luo Haipeng","year":"2023","unstructured":"Haipeng Luo, Qingfeng Sun, Can Xu, Pu Zhao, Jianguang Lou, Chongyang Tao, Xiubo Geng, Qingwei Lin, Shifeng Chen, and Dongmei Zhang. 2023a. Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct. arXiv preprint arXiv:2308.09583 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Wizardcoder: Empowering code large language models with evol-instruct. arXiv preprint arXiv:2306.08568","author":"Luo Ziyang","year":"2023","unstructured":"Ziyang Luo, Can Xu, Pu Zhao, Qingfeng Sun, Xiubo Geng, Wenxiang Hu, Chongyang Tao, Jing Ma, Qingwei Lin, and Daxin Jiang. 2023b. Wizardcoder: Empowering code large language models with evol-instruct. arXiv preprint arXiv:2306.08568 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"AgentBoard: An Analytical Evaluation Board of Multi-turn LLM Agents. arXiv preprint arXiv:2401.13178","author":"Ma Chang","year":"2024","unstructured":"Chang Ma, Junlei Zhang, Zhihao Zhu, Cheng Yang, Yujiu Yang, Yaohui Jin, Zhenzhong Lan, Lingpeng Kong, and Junxian He. 2024. AgentBoard: An Analytical Evaluation Board of Multi-turn LLM Agents. arXiv preprint arXiv:2401.13178 (2024)."},{"key":"e_1_3_2_1_38_1","volume-title":"Wilkins","author":"McDermott Drew","year":"1998","unstructured":"Drew McDermott, Malik Ghallab, Adele E. Howe, Craig A. Knoblock, Ashwin Ram, Manuela M. Veloso, Daniel S. Weld, and David E. Wilkins. 1998. PDDL-the planning domain definition language. https:\/\/api.semanticscholar.org\/CorpusID:59656859"},{"key":"e_1_3_2_1_39_1","unstructured":"Meta AI. 2024. Introducing Meta Llama 3: The most capable openly available LLM to date. https:\/\/ai.meta.com\/blog\/meta-llama-3\/ Accessed: 2024-04--18."},{"key":"e_1_3_2_1_40_1","unstructured":"Yao Mu Junting Chen Qinglong Zhang Shoufa Chen Qiaojun Yu Chongjian Ge Runjian Chen Zhixuan Liang Mengkang Hu Chaofan Tao et al. 2024a. RoboCodeX: Multimodal Code Generation for Robotic Behavior Synthesis. arXiv preprint arXiv:2402.16117 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Mu Yao","year":"2024","unstructured":"Yao Mu, Qinglong Zhang, Mengkang Hu, Wenhai Wang, Mingyu Ding, Jun Jin, Bin Wang, Jifeng Dai, Yu Qiao, and Ping Luo. 2024b. Embodiedgpt: Vision-language pre-training via embodied chain of thought. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_42_1","unstructured":"OpenAI. 2022. OpenAI: Introducing ChatGPT. https:\/\/openai.com\/blog\/chatgpt"},{"key":"e_1_3_2_1_43_1","unstructured":"OpenAI. 2023a. GPT-4 Technical Report. arxiv: 2303.08774 [cs.CL]"},{"key":"e_1_3_2_1_44_1","first-page":"13","article-title":"Gpt-4 technical report. arxiv 2303.08774","volume":"2","author":"R","year":"2023","unstructured":"R OpenAI. 2023b. Gpt-4 technical report. arxiv 2303.08774. View in Article, Vol. 2 (2023), 13.","journal-title":"View in Article"},{"key":"e_1_3_2_1_45_1","volume-title":"Talm: Tool augmented language models. arXiv preprint arXiv:2205.12255","author":"Parisi Aaron","year":"2022","unstructured":"Aaron Parisi, Yao Zhao, and Noah Fiedel. 2022. Talm: Tool augmented language models. arXiv preprint arXiv:2205.12255 (2022)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00886"},{"key":"e_1_3_2_1_47_1","unstructured":"Yujia Qin Shihao Liang Yining Ye Kunlun Zhu Lan Yan Yaxi Lu Yankai Lin Xin Cong Xiangru Tang Bill Qian et al. 2023. ToolLLM: Facilitating large language models to master 16000 real-world apis. arXiv preprint arXiv:2307.16789 (2023)."},{"key":"e_1_3_2_1_48_1","volume-title":"Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al.","author":"Roziere Baptiste","year":"2023","unstructured":"Baptiste Roziere, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:2308.12950 (2023)."},{"key":"e_1_3_2_1_49_1","volume-title":"Tptu: Task planning and tool usage of large language model-based ai agents. arXiv preprint arXiv:2308.03427","author":"Ruan Jingqing","year":"2023","unstructured":"Jingqing Ruan, Yihong Chen, Bin Zhang, Zhiwei Xu, Tianpeng Bao, Guoqing Du, Shiwei Shi, Hangyu Mao, Xingyu Zeng, and Rui Zhao. 2023. Tptu: Task planning and tool usage of large language model-based ai agents. arXiv preprint arXiv:2308.03427 (2023)."},{"key":"e_1_3_2_1_50_1","unstructured":"Stuart J Russell and Peter Norvig. 2016. Artificial intelligence: a modern approach. Pearson."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2302.04761nolinkurl10.48550\/ARXIV.2302.04761showeprint[arXiv]2302.04761"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2303.17580nolinkurl10.48550\/ARXIV.2303.17580showeprint[arXiv]2303.17580"},{"key":"e_1_3_2_1_53_1","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems.","author":"Shinn Noah","year":"2023","unstructured":"Noah Shinn, Federico Cassano, Ashwin Gopinath, Karthik R Narasimhan, and Shunyu Yao. 2023. Reflexion: Language agents with verbal reinforcement learning. In Thirty-seventh Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_54_1","volume-title":"Alfworld: Aligning text and embodied environments for interactive learning. arXiv preprint arXiv:2010.03768","author":"Shridhar Mohit","year":"2020","unstructured":"Mohit Shridhar, Xingdi Yuan, Marc-Alexandre C\u00f4t\u00e9, Yonatan Bisk, Adam Trischler, and Matthew Hausknecht. 2020. Alfworld: Aligning text and embodied environments for interactive learning. arXiv preprint arXiv:2010.03768 (2020)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Chan Hee Song Jiaman Wu Clayton Washington Brian M. Sadler Wei-Lun Chao and Yu Su. 2023. LLM-Planner: Few-Shot Grounded Planning for Embodied Agents with Large Language Models. arxiv: 2212.04088 [cs.AI]","DOI":"10.1109\/ICCV51070.2023.00280"},{"key":"e_1_3_2_1_57_1","volume-title":"Trial and error: Exploration-based trajectory optimization for llm agents. arXiv preprint arXiv:2403.02502","author":"Song Yifan","year":"2024","unstructured":"Yifan Song, Da Yin, Xiang Yue, Jie Huang, Sujian Li, and Bill Yuchen Lin. 2024. Trial and error: Exploration-based trajectory optimization for llm agents. arXiv preprint arXiv:2403.02502 (2024)."},{"key":"e_1_3_2_1_58_1","volume-title":"Cognitive architectures for language agents. arXiv preprint arXiv:2309.02427","author":"Sumers Theodore R","year":"2023","unstructured":"Theodore R Sumers, Shunyu Yao, Karthik Narasimhan, and Thomas L Griffiths. 2023. Cognitive architectures for language agents. arXiv preprint arXiv:2309.02427 (2023)."},{"key":"e_1_3_2_1_59_1","volume-title":"AdaPlanner: Adaptive Planning from Feedback with Language Models. arXiv preprint arXiv:2305.16653","author":"Sun Haotian","year":"2023","unstructured":"Haotian Sun, Yuchen Zhuang, Lingkai Kong, Bo Dai, and Chao Zhang. 2023b. AdaPlanner: Adaptive Planning from Feedback with Language Models. arXiv preprint arXiv:2305.16653 (2023)."},{"key":"e_1_3_2_1_60_1","volume-title":"PEARL: Prompting Large Language Models to Plan and Execute Actions Over Long Documents. arXiv preprint arXiv:2305.14564","author":"Sun Simeng","year":"2023","unstructured":"Simeng Sun, Yang Liu, Shuohang Wang, Chenguang Zhu, and Mohit Iyyer. 2023a. PEARL: Prompting Large Language Models to Plan and Execute Actions Over Long Documents. arXiv preprint arXiv:2305.14564 (2023)."},{"key":"e_1_3_2_1_61_1","volume-title":"Yee Whye Teh, and Jonathan Richard Schwarz","author":"Tack Jihoon","year":"2024","unstructured":"Jihoon Tack, Jaehyung Kim, Eric Mitchell, Jinwoo Shin, Yee Whye Teh, and Jonathan Richard Schwarz. 2024. Online adaptation of language models with a memory of amortized contexts. arXiv preprint arXiv:2403.04317 (2024)."},{"key":"e_1_3_2_1_62_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_63_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Valmeekam Karthik","year":"2024","unstructured":"Karthik Valmeekam, Matthew Marquez, Alberto Olmo, Sarath Sreedharan, and Subbarao Kambhampati. 2024. Planbench: An extensible benchmark for evaluating large language models on planning and reasoning about change. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_64_1","volume-title":"Benjamin Van Durme, and Yu Su","author":"Wang Boshi","year":"2024","unstructured":"Boshi Wang, Hao Fang, Jason Eisner, Benjamin Van Durme, and Yu Su. 2024a. LLMs in the Imaginarium: tool learning through simulated trial and error. arXiv preprint arXiv:2403.04746 (2024)."},{"key":"e_1_3_2_1_65_1","volume-title":"Gensim: Generating robotic simulation tasks via large language models. arXiv preprint arXiv:2310.01361","author":"Wang Lirui","year":"2023","unstructured":"Lirui Wang, Yiyang Ling, Zhecheng Yuan, Mohit Shridhar, Chen Bao, Yuzhe Qin, Bailin Wang, Huazhe Xu, and Xiaolong Wang. 2023c. Gensim: Generating robotic simulation tasks via large language models. arXiv preprint arXiv:2310.01361 (2023)."},{"key":"e_1_3_2_1_66_1","volume-title":"2023 d. A survey on large language model based autonomous agents. arXiv preprint arXiv:2308.11432","author":"Wang Lei","year":"2023","unstructured":"Lei Wang, Chen Ma, Xueyang Feng, Zeyu Zhang, Hao Yang, Jingsen Zhang, Zhiyuan Chen, Jiakai Tang, Xu Chen, Yankai Lin, et al. 2023 d. A survey on large language model based autonomous agents. arXiv preprint arXiv:2308.11432 (2023)."},{"key":"e_1_3_2_1_67_1","volume-title":"Roy Ka-Wei Lee, and Ee-Peng Lim. 2023 g. Plan-and-solve prompting: Improving zero-shot chain-of-thought reasoning by large language models. arXiv preprint arXiv:2305.04091","author":"Wang Lei","year":"2023","unstructured":"Lei Wang, Wanyu Xu, Yihuai Lan, Zhiqiang Hu, Yunshi Lan, Roy Ka-Wei Lee, and Ee-Peng Lim. 2023 g. Plan-and-solve prompting: Improving zero-shot chain-of-thought reasoning by large language models. arXiv preprint arXiv:2305.04091 (2023)."},{"key":"e_1_3_2_1_68_1","volume-title":"Learning From Failure: Integrating Negative Examples when Fine-tuning Large Language Models as Agents. arXiv preprint arXiv:2402.11651","author":"Wang Renxi","year":"2024","unstructured":"Renxi Wang, Haonan Li, Xudong Han, Yixuan Zhang, and Timothy Baldwin. 2024b. Learning From Failure: Integrating Negative Examples when Fine-tuning Large Language Models as Agents. arXiv preprint arXiv:2402.11651 (2024)."},{"key":"e_1_3_2_1_69_1","volume-title":"2023 e. ByteSized32: A corpus and challenge task for generating task-specific world models expressed as text games. arXiv preprint arXiv:2305.14879","author":"Wang Ruoyao","year":"2023","unstructured":"Ruoyao Wang, Graham Todd, Eric Yuan, Ziang Xiao, Marc-Alexandre C\u00f4t\u00e9, and Peter Jansen. 2023 e. ByteSized32: A corpus and challenge task for generating task-specific world models expressed as text games. arXiv preprint arXiv:2305.14879 (2023)."},{"key":"e_1_3_2_1_70_1","volume-title":"Promptagent: Strategic planning with language models enables expert-level prompt optimization. arXiv preprint arXiv:2310.16427","author":"Wang Xinyuan","year":"2023","unstructured":"Xinyuan Wang, Chenxi Li, Zhen Wang, Fan Bai, Haotian Luo, Jiayou Zhang, Nebojsa Jojic, Eric P Xing, and Zhiting Hu. 2023b. Promptagent: Strategic planning with language models enables expert-level prompt optimization. arXiv preprint arXiv:2310.16427 (2023)."},{"key":"e_1_3_2_1_71_1","volume-title":"2023 f. Robogen: Towards unleashing infinite data for automated robot learning via generative simulation. arXiv preprint arXiv:2311.01455","author":"Wang Yufei","year":"2023","unstructured":"Yufei Wang, Zhou Xian, Feng Chen, Tsun-Hsuan Wang, Yian Wang, Katerina Fragkiadaki, Zackory Erickson, David Held, and Chuang Gan. 2023 f. Robogen: Towards unleashing infinite data for automated robot learning via generative simulation. arXiv preprint arXiv:2311.01455 (2023)."},{"key":"e_1_3_2_1_72_1","unstructured":"Zihao Wang Shaofei Cai Anji Liu Xiaojian Ma and Yitao Liang. 2023a. Describe Explain Plan and Select: Interactive Planning with Large Language Models Enables Open-World Multi-Task Agents. arxiv: 2302.01560 [cs.AI]"},{"key":"e_1_3_2_1_73_1","volume-title":"Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le.","author":"Wei Jason","year":"2021","unstructured":"Jason Wei, Maarten Bosma, Vincent Y Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le. 2021. Finetuned language models are zero-shot learners. arXiv preprint arXiv:2109.01652 (2021)."},{"key":"e_1_3_2_1_74_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 24824--24837.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_75_1","volume-title":"AutoGen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155","author":"Wu Qingyun","year":"2023","unstructured":"Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Shaokun Zhang, Erkang Zhu, Beibin Li, Li Jiang, Xiaoyun Zhang, and Chi Wang. 2023a. AutoGen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155 (2023)."},{"key":"e_1_3_2_1_76_1","unstructured":"Zhenyu Wu Ziwei Wang Xiuwei Xu Jiwen Lu and Haibin Yan. 2023b. Embodied Task Planning with Large Language Models. arxiv: 2307.01848 [cs.CV]"},{"key":"e_1_3_2_1_77_1","unstructured":"Zhiheng Xi Wenxiang Chen Xin Guo Wei He Yiwen Ding Boyang Hong Ming Zhang Junzhe Wang Senjie Jin Enyu Zhou et al. 2023. The rise and potential of large language model based agents: A survey. arXiv preprint arXiv:2309.07864 (2023)."},{"key":"e_1_3_2_1_78_1","volume-title":"Travelplanner: A benchmark for real-world planning with language agents. arXiv preprint arXiv:2402.01622","author":"Xie Jian","year":"2024","unstructured":"Jian Xie, Kai Zhang, Jiangjie Chen, Tinghui Zhu, Renze Lou, Yuandong Tian, Yanghua Xiao, and Yu Su. 2024. Travelplanner: A benchmark for real-world planning with language agents. arXiv preprint arXiv:2402.01622 (2024)."},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2310.10634nolinkurl10.48550\/ARXIV.2310.10634showeprint[arXiv]2310.10634"},{"key":"e_1_3_2_1_80_1","volume-title":"Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244","author":"Xu Can","year":"2023","unstructured":"Can Xu, Qingfeng Sun, Kai Zheng, Xiubo Geng, Pu Zhao, Jiazhan Feng, Chongyang Tao, and Daxin Jiang. 2023. Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244 (2023)."},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01536"},{"key":"e_1_3_2_1_82_1","volume-title":"Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Thomas L Griffiths, Yuan Cao, and Karthik Narasimhan. 2023b. Tree of thoughts: Deliberate problem solving with large language models. arXiv preprint arXiv:2305.10601 (2023)."},{"key":"e_1_3_2_1_83_1","volume-title":"React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629","author":"Yao Shunyu","year":"2022","unstructured":"Shunyu Yao, Jeffrey Zhao, Dian Yu, Nan Du, Izhak Shafran, Karthik Narasimhan, and Yuan Cao. 2022. React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629 (2022)."},{"key":"e_1_3_2_1_84_1","volume-title":"Zhiwei Liu, Yihao Feng, Le Xue, Rithesh Murthy, Zeyuan Chen, Jianguo Zhang, Devansh Arpit, et al.","author":"Yao Weiran","year":"2023","unstructured":"Weiran Yao, Shelby Heinecke, Juan Carlos Niebles, Zhiwei Liu, Yihao Feng, Le Xue, Rithesh Murthy, Zeyuan Chen, Jianguo Zhang, Devansh Arpit, et al. 2023a. Retroformer: Retrospective large language agents with policy gradient optimization. arXiv preprint arXiv:2308.02151 (2023)."},{"key":"e_1_3_2_1_85_1","volume-title":"Lumos: Learning agents with unified data, modular design, and open-source llms. arXiv preprint arXiv:2311.05657","author":"Yin Da","year":"2023","unstructured":"Da Yin, Faeze Brahman, Abhilasha Ravichander, Khyathi Chandu, Kai-Wei Chang, Yejin Choi, and Bill Yuchen Lin. 2023. Lumos: Learning agents with unified data, modular design, and open-source llms. arXiv preprint arXiv:2311.05657 (2023)."},{"key":"e_1_3_2_1_86_1","volume-title":"Agenttuning: Enabling generalized agent abilities for llms. arXiv preprint arXiv:2310.12823","author":"Zeng Aohan","year":"2023","unstructured":"Aohan Zeng, Mingdao Liu, Rui Lu, Bowen Wang, Xiao Liu, Yuxiao Dong, and Jie Tang. 2023. Agenttuning: Enabling generalized agent abilities for llms. arXiv preprint arXiv:2310.12823 (2023)."},{"key":"e_1_3_2_1_87_1","unstructured":"Jianguo Zhang Tian Lan Rithesh Murthy Zhiwei Liu Weiran Yao Juntao Tan Thai Hoang Liangwei Yang Yihao Feng Zuxin Liu et al. 2024. AgentOhana: Design Unified Data and Training Pipeline for Effective Agent Learning. arXiv preprint arXiv:2402.15506 (2024)."},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29936"},{"key":"e_1_3_2_1_89_1","volume-title":"Wee Sun Lee, and David Hsu","author":"Zhao Zirui","year":"2024","unstructured":"Zirui Zhao, Wee Sun Lee, and David Hsu. 2024b. Large language models as commonsense knowledge for large-scale task planning. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_90_1","volume-title":"H Chi, et al","author":"Zheng Huaixiu Steven","year":"2024","unstructured":"Huaixiu Steven Zheng, Swaroop Mishra, Hugh Zhang, Xinyun Chen, Minmin Chen, Azade Nova, Le Hou, Heng-Tze Cheng, Quoc V Le, Ed H Chi, et al. 2024. NATURAL PLAN: Benchmarking LLMs on Natural Language Planning. arXiv preprint arXiv:2406.04520 (2024)."},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29946"},{"key":"e_1_3_2_1_92_1","volume-title":"Language agent tree search unifies reasoning acting and planning in language models. arXiv preprint arXiv:2310.04406","author":"Zhou Andy","year":"2023","unstructured":"Andy Zhou, Kai Yan, Michal Shlapentokh-Rothman, Haohan Wang, and Yu-Xiong Wang. 2023. Language agent tree search unifies reasoning acting and planning in language models. arXiv preprint arXiv:2310.04406 (2023)."},{"key":"e_1_3_2_1_93_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Zhou Chunting","year":"2024","unstructured":"Chunting Zhou, Pengfei Liu, Puxin Xu, Srinivasan Iyer, Jiao Sun, Yuning Mao, Xuezhe Ma, Avia Efrat, Ping Yu, Lili Yu, et al. 2024. Lima: Less is more for alignment. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_94_1","unstructured":"Denny Zhou Nathanael Sch\u00e4rli Le Hou Jason Wei Nathan Scales Xuezhi Wang Dale Schuurmans Claire Cui Olivier Bousquet Quoc Le et al. 2022. Least-to-most prompting enables complex reasoning in large language models. arXiv preprint arXiv:2205.10625 (2022)."},{"key":"e_1_3_2_1_95_1","unstructured":"Xizhou Zhu Yuntao Chen Hao Tian Chenxin Tao Weijie Su Chenyu Yang Gao Huang Bin Li Lewei Lu Xiaogang Wang et al. 2023. Ghost in the minecraft: Generally capable agents for open-world environments via large language models with text-based knowledge and memory. arXiv preprint arXiv:2305.17144 (2023)."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709321","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709321","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:43:03Z","timestamp":1755358983000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709321"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":95,"alternative-id":["10.1145\/3690624.3709321","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709321","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}