{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T16:54:59Z","timestamp":1783097699718,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":146,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671452","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:54:55Z","timestamp":1724561695000},"page":"6480-6490","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["Reasoning and Planning with Large Language Models in Code Development"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9932-2971","authenticated-orcid":false,"given":"Hao","family":"Ding","sequence":"first","affiliation":[{"name":"AWS AI Labs, Santa Clara, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5445-2203","authenticated-orcid":false,"given":"Ziwei","family":"Fan","sequence":"additional","affiliation":[{"name":"AWS AI Labs, Santa Clara, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3947-6498","authenticated-orcid":false,"given":"Ingo","family":"Guehring","sequence":"additional","affiliation":[{"name":"AWS AI Labs, Berlin, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2828-0916","authenticated-orcid":false,"given":"Gaurav","family":"Gupta","sequence":"additional","affiliation":[{"name":"AWS AI Labs, Santa Clara, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9069-854X","authenticated-orcid":false,"given":"Wooseok","family":"Ha","sequence":"additional","affiliation":[{"name":"AWS AI Labs, Santa Clara, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6440-4732","authenticated-orcid":false,"given":"Jun","family":"Huan","sequence":"additional","affiliation":[{"name":"AWS AI Labs, Santa Clara, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7368-4974","authenticated-orcid":false,"given":"Linbo","family":"Liu","sequence":"additional","affiliation":[{"name":"AWS AI labs, Santa Clara, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9103-0114","authenticated-orcid":false,"given":"Behrooz","family":"Omidvar-Tehrani","sequence":"additional","affiliation":[{"name":"AWS AI Labs, Santa Clara, CA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6338-1432","authenticated-orcid":false,"given":"Shiqi","family":"Wang","sequence":"additional","affiliation":[{"name":"AWS AI Labs, New York, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9230-5466","authenticated-orcid":false,"given":"Hao","family":"Zhou","sequence":"additional","affiliation":[{"name":"AWS AI Labs, New York, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. Amazon Q Developer. https:\/\/aws.amazon.com\/q\/developer\/."},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. Amazon Q Developer Agent for Code Transformation. https:\/\/aws.amazon.com\/q\/developer\/code-transformation\/. Accessed: 2024-06-03."},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. Bedrock Agents. https:\/\/aws.amazon.com\/bedrock\/agents\/."},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. Github Copilot. https:\/\/github.com\/features\/copilot\/. Accessed: 2024-03- 03."},{"key":"e_1_3_2_1_5_1","unstructured":"[n. d.]. Introducing Devin the first AI software engineer. https:\/\/www.cognition. ai\/blog\/introducing-devin."},{"key":"e_1_3_2_1_6_1","unstructured":"[n. d.]. LangChainAgents. https:\/\/python.langchain.com\/docs\/modules\/agents\/."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Yalemisew Abgaz Andrew McCarren Peter Elger David Solan Neil Lapuz Marin Bivol Glenn Jackson Murat Yilmaz et al. 2023. Decomposition of Monolith Applications Into Microservices Architectures: A Systematic Review. IEEE Transactions on Software Engineering (2023).","DOI":"10.1109\/TSE.2023.3287297"},{"key":"e_1_3_2_1_8_1","volume-title":"Juice: A large scale distantly supervised dataset for open domain context-based code generation. arXiv preprint arXiv:1910.02216","author":"Agashe Rajas","year":"2019","unstructured":"Rajas Agashe, Srinivasan Iyer, and Luke Zettlemoyer. 2019. Juice: A large scale distantly supervised dataset for open domain context-based code generation. arXiv preprint arXiv:1910.02216 (2019)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3551349.3559555"},{"key":"e_1_3_2_1_10_1","volume-title":"Premkumar Devanbu, and Earl T. Barr.","author":"Ahmed Toufique","year":"2023","unstructured":"Toufique Ahmed, Kunal Suresh Pai, Premkumar Devanbu, and Earl T. Barr. 2023. Automatic Semantic Augmentation of Language Model Prompts (for Code Summarization). arXiv preprint arXiv:2304.06815 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"GQA: Training Generalized Multi- Query Transformer Models from Multi-Head Checkpoints. arXiv preprint arXiv:2305.13245","author":"Ainslie Joshua","year":"2023","unstructured":"Joshua Ainslie, James Lee-Thorp, Michiel de Jong, Yury Zemlyanskiy, Federico Lebr\u00f3n, and Sumit Sanghai. 2023. GQA: Training Generalized Multi- Query Transformer Models from Multi-Head Checkpoints. arXiv preprint arXiv:2305.13245 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"Niklas Muennighoff, Mayank Mishra, et al.","author":"Allal Loubna Ben","year":"2023","unstructured":"Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, et al. 2023. SantaCoder: don't reach for the stars! arXiv preprint arXiv:2301.03988 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Guidelines for Human-AI Interaction (CHI '19)","author":"Amershi Saleema","year":"2019","unstructured":"Saleema Amershi, Dan Weld, Mihaela Vorvoreanu, Adam Fourney, Besmira Nushi, Penny Collisson, Jina Suh, Shamsi Iqbal, et al. 2019. Guidelines for Human-AI Interaction (CHI '19)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2210.14868"},{"key":"e_1_3_2_1_15_1","unstructured":"Jacob Austin Augustus Odena Maxwell I. Nye Maarten Bosma Henryk Michalewski David Dohan Ellen Jiang Carrie J. Cai et al. 2021. Program Synthesis with Large Language Models. CoRR abs\/2108.07732 (2021). arXiv:2108.07732 https:\/\/arxiv.org\/abs\/2108.07732"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586030"},{"key":"e_1_3_2_1_17_1","volume-title":"Longformer: The longdocument transformer. arXiv preprint arXiv:2004.05150","author":"Beltagy Iz","year":"2020","unstructured":"Iz Beltagy, Matthew E Peters, and Arman Cohan. 2020. Longformer: The longdocument transformer. arXiv preprint arXiv:2004.05150 (2020)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Sid Black Stella Biderman Eric Hallahan Quentin Anthony Leo Gao Laurence Golding Horace He Connor Leahy et al. 2022. Gpt-neox-20b: An open-source autoregressive language model. arXiv preprint arXiv:2204.06745 (2022).","DOI":"10.18653\/v1\/2022.bigscience-1.9"},{"key":"e_1_3_2_1_19_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_20_1","volume-title":"Donald Pinckney, Ming-Ho Yee, Yangtian Zi, et al.","author":"Cassano Federico","year":"2022","unstructured":"Federico Cassano, John Gouwar, Daniel Nguyen, Sydney Nguyen, Luna Phipps- Costin, Donald Pinckney, Ming-Ho Yee, Yangtian Zi, et al. 2022. MultiPL-E: A Scalable and Extensible Approach to Benchmarking Neural Code Generation. arXiv preprint arXiv:2208.08227 (2022)."},{"key":"e_1_3_2_1_21_1","volume-title":"Jun Shern Chan, Samuel R Bowman, Kyunghyun Cho, and Ethan Perez.","author":"Chen Angelica","year":"2023","unstructured":"Angelica Chen, J\u00e9r\u00e9my Scheurer, Tomasz Korbak, Jon Ander Campos, Jun Shern Chan, Samuel R Bowman, Kyunghyun Cho, and Ethan Perez. 2023. Improving code generation by training with natural language feedback. arXiv preprint arXiv:2303.16749 (2023)."},{"key":"e_1_3_2_1_22_1","unstructured":"Bei Chen Fengji Zhang Anh Nguyen Daoguang Zan Zeqi Lin Jian-Guang Lou and Weizhu Chen. 2022. CodeT: Code Generation with Generated Tests. arXiv:2207.10397 [cs.CL]"},{"key":"e_1_3_2_1_23_1","volume-title":"GPTutor: a ChatGPT-powered programming tool for code explanation. arXiv preprint arXiv:2305.01863","author":"Chen Eason","year":"2023","unstructured":"Eason Chen, Ray Huang, Han-Shin Chen, Yuen-Hsien Tseng, and Liang-Yi Li. 2023. GPTutor: a ChatGPT-powered programming tool for code explanation. arXiv preprint arXiv:2305.01863 (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"Autoagents: A framework for automatic agent generation. arXiv preprint arXiv:2309.17288","author":"Chen Guangyao","year":"2023","unstructured":"Guangyao Chen, Siwei Dong, Yu Shu, Ge Zhang, Jaward Sesay, B\u00f6rje F Karlsson, Jie Fu, and Yemin Shi. 2023. Autoagents: A framework for automatic agent generation. arXiv preprint arXiv:2309.17288 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda, et al.","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, et al. 2021. Evaluating Large Language Models Trained on Code. arXiv:2107.03374 [cs.LG]"},{"key":"e_1_3_2_1_26_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda, et al.","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, et al. 2021. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)."},{"key":"e_1_3_2_1_27_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Chen Weize","year":"2023","unstructured":"Weize Chen, Yusheng Su, Jingwei Zuo, Cheng Yang, Chenfei Yuan, Chi-Min Chan, Heyang Yu, Yaxi Lu, et al. 2023. Agentverse: Facilitating multi-agent collaboration and exploring emergent behaviors. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_28_1","volume-title":"Teaching large language models to self-debug. arXiv preprint arXiv:2304.05128","author":"Chen Xinyun","year":"2023","unstructured":"Xinyun Chen, Maxwell Lin, Nathanael Sch\u00e4rli, and Denny Zhou. 2023. Teaching large language models to self-debug. arXiv preprint arXiv:2304.05128 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509","author":"Child Rewon","year":"2019","unstructured":"Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. 2019. Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509 (2019)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Nazli Cila. 2022. Designing Human-Agent Collaborations: Commitment Responsiveness and Support (CHI '22).","DOI":"10.1145\/3491102.3517500"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3064663.3064667"},{"key":"e_1_3_2_1_32_1","volume-title":"Rlprompt: Optimizing discrete text prompts with reinforcement learning. arXiv preprint arXiv:2205.12548","author":"Deng Mingkai","year":"2022","unstructured":"Mingkai Deng, JianyuWang, Cheng-Ping Hsieh, YihanWang, Han Guo, Tianmin Shu, Meng Song, Eric P Xing, et al. 2022. Rlprompt: Optimizing discrete text prompts with reinforcement learning. arXiv preprint arXiv:2205.12548 (2022)."},{"key":"e_1_3_2_1_33_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_34_1","volume-title":"Ramesh Nallapati, et al.","author":"Ding Yangruibo","year":"2024","unstructured":"Yangruibo Ding, Zijian Wang, Wasi Ahmad, Hantian Ding, Ming Tan, Nihal Jain, Murali Krishna Ramanathan, Ramesh Nallapati, et al. 2024. Crosscodeeval: A diverse and multilingual benchmark for cross-file code completion. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_35_1","unstructured":"Qingxiu Dong Lei Li Damai Dai Ce Zheng Zhiyong Wu Baobao Chang Xu Sun Jingjing Xu et al. 2022. A survey for in-context learning. arXiv preprint arXiv:2301.00234 (2022)."},{"key":"e_1_3_2_1_36_1","unstructured":"Shihan Dou Junjie Shan Haoxiang Jia Wenhao Deng Zhiheng Xi Wei He Yueming Wu Tao Gui et al. 2023. Towards Understanding the Capability of Large Language Models on Code Clone Detection: A Survey. arXiv:2308.01191 [cs.SE]"},{"key":"e_1_3_2_1_37_1","volume-title":"Improving Factuality and Reasoning in Language Models through Multiagent Debate. arXiv preprint arXiv:2305.14325","author":"Du Yilun","year":"2023","unstructured":"Yilun Du, Shuang Li, Antonio Torralba, Joshua B Tenenbaum, and Igor Mordatch. 2023. Improving Factuality and Reasoning in Language Models through Multiagent Debate. arXiv preprint arXiv:2305.14325 (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"Weisz","author":"Ehsan Upol","year":"2021","unstructured":"Upol Ehsan, Q. Vera Liao, Michael Muller, Mark O. Riedl, and Justin D. Weisz. 2021. Expanding Explainability: Towards Social Transparency in AI Systems (CHI '21)."},{"key":"e_1_3_2_1_39_1","volume-title":"Large language models for software engineering: Survey and open problems. arXiv preprint arXiv:2310.03533","author":"Fan Angela","year":"2023","unstructured":"Angela Fan, Beliz Gokkaya, Mark Harman, Mitya Lyubarskiy, Shubho Sengupta, Shin Yoo, and Jie M Zhang. 2023. Large language models for software engineering: Survey and open problems. arXiv preprint arXiv:2310.03533 (2023)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3325480.3326578"},{"key":"e_1_3_2_1_41_1","volume-title":"Towards Autonomous Testing Agents via Conversational Large Language Models. arXiv preprint arXiv:2306.05152","author":"Feldt Robert","year":"2023","unstructured":"Robert Feldt, Sungmin Kang, Juyeon Yoon, and Shin Yoo. 2023. Towards Autonomous Testing Agents via Conversational Large Language Models. arXiv preprint arXiv:2306.05152 (2023)."},{"key":"e_1_3_2_1_42_1","volume-title":"Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155","author":"Feng Zhangyin","year":"2020","unstructured":"Zhangyin Feng, Daya Guo, Duyu Tang, Nan Duan, Xiaocheng Feng, Ming Gong, Linjun Shou, Bing Qin, et al. 2020. Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155 (2020)."},{"key":"e_1_3_2_1_43_1","volume-title":"GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers. arXiv preprint arXiv:2210.17323 (2022)."},{"key":"e_1_3_2_1_44_1","volume-title":"Incoder: A generative model for code infilling and synthesis. arXiv preprint arXiv:2204.05999","author":"Fried Daniel","year":"2022","unstructured":"Daniel Fried, Armen Aghajanyan, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong, Wen-tau Yih, et al. 2022. Incoder: A generative model for code infilling and synthesis. arXiv preprint arXiv:2204.05999 (2022)."},{"key":"e_1_3_2_1_45_1","volume-title":"Improving language model negotiation with self-play and in-context learning from ai feedback. arXiv preprint arXiv:2305.10142","author":"Fu Yao","year":"2023","unstructured":"Yao Fu, Hao Peng, Tushar Khot, and Mirella Lapata. 2023. Improving language model negotiation with self-play and in-context learning from ai feedback. arXiv preprint arXiv:2305.10142 (2023)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Mingyang Geng Shangwen Wang Dezun Dong Haotian Wang Ge Li Zhi Jin Xiaoguang Mao and Xiangke Liao. 2024. Large Language Models are Few- Shot Summarizers: Multi-Intent Comment Generation via In-Context Learning. (2024).","DOI":"10.1145\/3597503.3608134"},{"key":"e_1_3_2_1_47_1","volume-title":"Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, et al.","author":"Gunasekar Suriya","year":"2023","unstructured":"Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio C\u00e9sar Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, et al. 2023. Textbooks Are All You Need. arXiv preprint arXiv:2306.11644 (2023)."},{"key":"e_1_3_2_1_48_1","unstructured":"Dan Hendrycks Steven Basart Saurav Kadavath Mantas Mazeika Akul Arora Ethan Guo Collin Burns Samir Puranik et al. 2021. Measuring Coding Challenge Competence With APPS. NeurIPS (2021)."},{"key":"e_1_3_2_1_49_1","volume-title":"Steven Ka Shing Yau, et al","author":"Hong Sirui","year":"2023","unstructured":"Sirui Hong, Xiawu Zheng, Jonathan Chen, Yuheng Cheng, Jinlin Wang, Ceyao Zhang, ZiliWang, Steven Ka Shing Yau, et al. 2023. Metagpt: Meta programming for multi-agent collaborative framework. arXiv preprint arXiv:2308.00352 (2023)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Xinyi Hou Yanjie Zhao Yue Liu Zhou Yang Kailong Wang Li Li Xiapu Luo David Lo et al. 2023. Large Language Models for Software Engineering: A Systematic Literature Review. arXiv:2308.10620 [cs.SE]","DOI":"10.1145\/3695988"},{"key":"e_1_3_2_1_51_1","volume-title":"The bach doodle: Approachable music composition with machine learning at scale. arXiv preprint arXiv:1907.06637","author":"Anna Huang Cheng-Zhi","year":"2019","unstructured":"Cheng-Zhi Anna Huang, Curtis Hawthorne, Adam Roberts, Monica Dinculescu, James Wexler, Leon Hong, and Jacob Howcroft. 2019. The bach doodle: Approachable music composition with machine learning at scale. arXiv preprint arXiv:1907.06637 (2019)."},{"key":"e_1_3_2_1_52_1","unstructured":"Dong Huang Qingwen Bu Yuhao Qing and Heming Cui. 2024. Code- CoT: Tackling Code Syntax Errors in CoT Reasoning for Code Generation. arXiv:2308.08784 [cs.SE]"},{"key":"e_1_3_2_1_53_1","unstructured":"Dong Huang Qingwen Bu Jie M. Zhang Michael Luck and Heming Cui. 2024. AgentCoder: Multi-Agent-based Code Generation with Iterative Testing and Optimisation. arXiv:2312.13010 [cs.CL]"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","unstructured":"Jie Huang and Kevin Chen-Chuan Chang. 2023. Towards Reasoning in Large Language Models: A Survey. arXiv:2212.10403 [cs.CL]","DOI":"10.18653\/v1\/2023.findings-acl.67"},{"key":"e_1_3_2_1_55_1","volume-title":"Colin Clement, Nan Duan, et al.","author":"Huang Junjie","year":"2022","unstructured":"Junjie Huang, Chenglong Wang, Jipeng Zhang, Cong Yan, Haotian Cui, Jeevana Priya Inala, Colin Clement, Nan Duan, et al. 2022. Execution-based evaluation for data science code generation models. arXiv preprint arXiv:2211.09374 (2022)."},{"key":"e_1_3_2_1_56_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_1_57_1","volume-title":"Diego de las Casas, et al.","author":"Jiang Albert Q","year":"2024","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Antoine Roux, Arthur Mensch, Blanche Savary, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, et al. 2024. Mixtral of experts. arXiv preprint arXiv:2401.04088 (2024)."},{"key":"e_1_3_2_1_58_1","volume-title":"SWE-bench: Can Language Models Resolve Real-World GitHub Issues? arXiv preprint arXiv:2310.06770","author":"Jimenez Carlos E","year":"2023","unstructured":"Carlos E Jimenez, John Yang, Alexander Wettig, Shunyu Yao, Kexin Pei, Ofir Press, and Karthik Narasimhan. 2023. SWE-bench: Can Language Models Resolve Real-World GitHub Issues? arXiv preprint arXiv:2310.06770 (2023)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380342"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377522"},{"key":"e_1_3_2_1_61_1","volume-title":"Olga Russakovsky, Ruth Fong, and Andr\u00e9s Monroy-Hern\u00e1ndez.","author":"Kim Sunnie S. Y.","year":"2023","unstructured":"Sunnie S. Y. Kim, Elizabeth Anne Watkins, Olga Russakovsky, Ruth Fong, and Andr\u00e9s Monroy-Hern\u00e1ndez. 2023. \"Help Me Help the AI\": Understanding How Explainability Can Support Human-AI Interaction (CHI '23)."},{"key":"e_1_3_2_1_62_1","volume-title":"Jia Li, Chenghao Mou, Carlos Mu\u00f1oz Ferrandis, Yacine Jernite, Margaret Mitchell, et al.","author":"Kocetkov Denis","year":"2022","unstructured":"Denis Kocetkov, Raymond Li, Loubna Ben Allal, Jia Li, Chenghao Mou, Carlos Mu\u00f1oz Ferrandis, Yacine Jernite, Margaret Mitchell, et al. 2022. The stack: 3 tb of permissively licensed source code. arXiv preprint arXiv:2211.15533 (2022)."},{"key":"e_1_3_2_1_63_1","volume-title":"Yuwei Ren, Markus Nagel, Jorn Peters, and Tijmen Blankevoort.","author":"Kuzmin Andrey","year":"2022","unstructured":"Andrey Kuzmin, Mart Van Baalen, Yuwei Ren, Markus Nagel, Jorn Peters, and Tijmen Blankevoort. 2022. FP8 Quantization: The Power of the Exponent. arXiv preprint arXiv:2208.09225 (2022)."},{"key":"e_1_3_2_1_64_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Lai Yuhang","year":"2023","unstructured":"Yuhang Lai, Chengxi Li, Yiming Wang, Tianyi Zhang, Ruiqi Zhong, Luke Zettlemoyer, Wen-tau Yih, Daniel Fried, et al. 2023. DS-1000: A natural and reliable benchmark for data science code generation. In International Conference on Machine Learning. PMLR, 18319--18345."},{"key":"e_1_3_2_1_65_1","unstructured":"Hung Le Hailin Chen Amrita Saha Akash Gokul Doyen Sahoo and Shafiq Joty. 2023. CodeChain: Towards Modular Code Generation Through Chain of Self-revisions with Representative Sub-modules. arXiv:2310.08992 [cs.AI]"},{"key":"e_1_3_2_1_66_1","unstructured":"Hung Le Hailin Chen Amrita Saha Akash Gokul Doyen Sahoo and Shafiq Joty. 2024. CodeChain: Towards Modular Code Generation Through Chain of Self-revisions with Representative Sub-modules. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id= vYhglxSj8j"},{"key":"e_1_3_2_1_67_1","first-page":"21314","article-title":"Coderl: Mastering code generation through pretrained models and deep reinforcement learning","volume":"35","author":"Le Hung","year":"2022","unstructured":"Hung Le, Yue Wang, Akhilesh Deepak Gotmare, Silvio Savarese, and Steven Chu Hong Hoi. 2022. Coderl: Mastering code generation through pretrained models and deep reinforcement learning. Advances in Neural Information Processing Systems 35 (2022), 21314--21328.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00085"},{"key":"e_1_3_2_1_69_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Leviathan Yaniv","year":"2023","unstructured":"Yaniv Leviathan, Matan Kalman, and Yossi Matias. 2023. Fast inference from transformers via speculative decoding. In International Conference on Machine Learning. PMLR, 19274--19286."},{"key":"e_1_3_2_1_70_1","volume-title":"Camel: Communicative agents for\" mind\" exploration of large language model society. Advances in Neural Information Processing Systems 36","author":"Li Guohao","year":"2024","unstructured":"Guohao Li, Hasan Hammoud, Hani Itani, Dmitrii Khizbullin, and Bernard Ghanem. 2024. Camel: Communicative agents for\" mind\" exploration of large language model society. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_71_1","volume-title":"Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, et al.","author":"Li Raymond","year":"2023","unstructured":"Raymond Li, Loubna Ben Allal, Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, et al. 2023. StarCoder: may the source be with you! arXiv preprint arXiv:2305.06161 (2023)."},{"key":"e_1_3_2_1_72_1","volume-title":"Suriya Gunasekar, and Yin Tat Lee.","author":"Li Yuanzhi","year":"2023","unstructured":"Yuanzhi Li, S\u00e9bastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar, and Yin Tat Lee. 2023. Textbooks are all you need ii: phi-1.5 technical report. arXiv preprint arXiv:2309.05463 (2023)."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.abq1158"},{"key":"e_1_3_2_1_74_1","unstructured":"Percy Liang Rishi Bommasani Tony Lee Dimitris Tsipras Dilara Soylu Michihiro Yasunaga Yian Zhang Deepak Narayanan et al. 2022. Holistic evaluation of language models. arXiv preprint arXiv:2211.09110 (2022)."},{"key":"e_1_3_2_1_75_1","volume-title":"Yuyao Wang, and Lingming Zhang.","author":"Liu Jiawei","year":"2024","unstructured":"Jiawei Liu, Chunqiu Steven Xia, Yuyao Wang, and Lingming Zhang. 2024. Is your code generated by chatgpt really correct? rigorous evaluation of large language models for code generation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_76_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, et al. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_77_1","unstructured":"Ziyang Luo Can Xu Pu Zhao Qingfeng Sun Xiubo Geng Wenxiang Hu Chongyang Tao Jing Ma et al. 2023. WizardCoder: Empowering Code Large Language Models with Evol-Instruct. arXiv preprint arXiv:2306.08568 (2023)."},{"key":"e_1_3_2_1_78_1","volume-title":"Proceedings of the 2024 CHI Conference on Human Factors in Computing Systems - Late Breaking Work.","author":"Ishaani","year":"2024","unstructured":"Ishaani M, Behrooz Omidvar-Tehrani, and Anmol Anubhai. 2024. Evaluating Human-AI Partnership for LLM-based Code Migration. In Proceedings of the 2024 CHI Conference on Human Factors in Computing Systems - Late Breaking Work."},{"key":"e_1_3_2_1_79_1","unstructured":"Shuai Ma Ying Lei Xinru Wang Chengbo Zheng Chuhan Shi Ming Yin and Xiaojuan Ma. 2023. Who Should I Trust: AI or Myself? Leveraging Human and AI Correctness Likelihood to Promote Appropriate Trust in AI-Assisted Decision-Making (CHI '23)."},{"key":"e_1_3_2_1_80_1","volume-title":"Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems 36","author":"Madaan Aman","year":"2024","unstructured":"Aman Madaan, Niket Tandon, Prakhar Gupta, Skyler Hallinan, Luyu Gao, Sarah Wiegreffe, Uri Alon, Nouha Dziri, et al. 2024. Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580940"},{"key":"e_1_3_2_1_82_1","volume-title":"Application of guidelines for designing user interface software. Behaviour & information technology 5, 1","author":"Mosier Jane N","year":"1986","unstructured":"Jane N Mosier and Sidney L Smith. 1986. Application of guidelines for designing user interface software. Behaviour & information technology 5, 1 (1986), 39--46."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00205"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22239492"},{"key":"e_1_3_2_1_85_1","volume-title":"CodeGen2: Lessons for Training LLMs on Programming and Natural Languages. arXiv preprint arXiv:2305.02309","author":"Nijkamp Erik","year":"2023","unstructured":"Erik Nijkamp, Hiroaki Hayashi, Caiming Xiong, Silvio Savarese, and Yingbo Zhou. 2023. CodeGen2: Lessons for Training LLMs on Programming and Natural Languages. arXiv preprint arXiv:2305.02309 (2023)."},{"key":"e_1_3_2_1_86_1","volume-title":"Codegen: An open large language model for code with multi-turn program synthesis. arXiv preprint arXiv:2203.13474","author":"Nijkamp Erik","year":"2022","unstructured":"Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, HuanWang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong. 2022. Codegen: An open large language model for code with multi-turn program synthesis. arXiv preprint arXiv:2203.13474 (2022)."},{"key":"e_1_3_2_1_87_1","volume-title":"Guy Gur-Ari, Henryk Michalewski, Jacob Austin, David Bieber, David Dohan, Aitor Lewkowycz, et al.","author":"Nye Maxwell","year":"2021","unstructured":"Maxwell Nye, Anders Johan Andreassen, Guy Gur-Ari, Henryk Michalewski, Jacob Austin, David Bieber, David Dohan, Aitor Lewkowycz, et al. 2021. Show your work: Scratchpads for intermediate computation with language models. arXiv preprint arXiv:2112.00114 (2021)."},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174223"},{"key":"e_1_3_2_1_89_1","volume-title":"Chenglong Wang, Jianfeng Gao, and Armando Solar-Lezama.","author":"Olausson Theo X","year":"2023","unstructured":"Theo X Olausson, Jeevana Priya Inala, Chenglong Wang, Jianfeng Gao, and Armando Solar-Lezama. 2023. Demystifying GPT Self-Repair for Code Generation. arXiv preprint arXiv:2306.09896 (2023)."},{"key":"e_1_3_2_1_90_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, JeffreyWu, Xu Jiang, Diogo Almeida, CarrollWainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, et al. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems 35 (2022), 27730--27744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_91_1","volume-title":"Kwaiagents: Generalized information-seeking agent system with large language models. arXiv preprint arXiv:2312.04889","author":"Pan Haojie","year":"2023","unstructured":"Haojie Pan, Zepeng Zhai, Hao Yuan, Yaojia Lv, Ruiji Fu, Ming Liu, Zhongyuan Wang, and Bing Qin. 2023. Kwaiagents: Generalized information-seeking agent system with large language models. arXiv preprint arXiv:2312.04889 (2023)."},{"key":"e_1_3_2_1_92_1","volume-title":"SteloCoder: a Decoder-Only LLM for Multi-Language to Python Code Translation. arXiv preprint arXiv:2310.15539","author":"Pan Jialing","year":"2023","unstructured":"Jialing Pan, Adrien Sad\u00e9, Jin Kim, Eric Soriano, Guillem Sole, and Sylvain Flamant. 2023. SteloCoder: a Decoder-Only LLM for Multi-Language to Python Code Translation. arXiv preprint arXiv:2310.15539 (2023)."},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"e_1_3_2_1_94_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545616"},{"key":"e_1_3_2_1_95_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580794"},{"key":"e_1_3_2_1_96_1","volume-title":"Chenguang Zhu, and Michael Zeng.","author":"Pryzant Reid","year":"2023","unstructured":"Reid Pryzant, Dan Iter, Jerry Li, Yin Tat Lee, Chenguang Zhu, and Michael Zeng. 2023. Automatic prompt optimization with\" gradient descent\" and beam search. arXiv preprint arXiv:2305.03495 (2023)."},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_98_1","volume-title":"Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering. arXiv preprint arXiv:2401.08500","author":"Ridnik Tal","year":"2024","unstructured":"Tal Ridnik, Dedy Kredo, and Itamar Friedman. 2024. Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering. arXiv preprint arXiv:2401.08500 (2024)."},{"key":"e_1_3_2_1_99_1","doi-asserted-by":"crossref","unstructured":"Stephen Robertson Hugo Zaragoza et al. 2009. The probabilistic relevance framework: BM25 and beyond. Foundations and Trends\u00ae in Information Retrieval 3 4 (2009) 333--389.","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_100_1","volume-title":"Yossi Adi, Jingyu Liu, et al.","author":"Roziere Baptiste","year":"2023","unstructured":"Baptiste Roziere, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, et al. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:2308.12950 (2023)."},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICoDSE59534.2023.10292037"},{"key":"e_1_3_2_1_102_1","volume-title":"An empirical evaluation of using large language models for automated unit test generation","author":"Sch\u00e4fer Max","year":"2023","unstructured":"Max Sch\u00e4fer, Sarah Nadi, Aryaz Eghbali, and Frank Tip. 2023. An empirical evaluation of using large language models for automated unit test generation. IEEE Transactions on Software Engineering (2023)."},{"key":"e_1_3_2_1_103_1","unstructured":"Xinyu She Yue Liu Yanjie Zhao Yiling He Li Li Chakkrit Tantithamthavorn Zhan Qin and Haoyu Wang. 2023. Pitfalls in Language Models for Code Intelligence: A Taxonomy and Survey. arXiv:2310.17903 [cs.SE]"},{"key":"e_1_3_2_1_104_1","volume-title":"Small llms are weak tool learners: A multillm agent. arXiv preprint arXiv:2401.07324","author":"Shen Weizhou","year":"2024","unstructured":"Weizhou Shen, Chenliang Li, Hongzhan Chen, Ming Yan, Xiaojun Quan, Hehong Chen, Ji Zhang, and Fei Huang. 2024. Small llms are weak tool learners: A multillm agent. arXiv preprint arXiv:2401.07324 (2024)."},{"key":"e_1_3_2_1_105_1","volume-title":"Wang","author":"Shi Freda","year":"2022","unstructured":"Freda Shi, Daniel Fried, Marjan Ghazvininejad, Luke Zettlemoyer, and Sida I. Wang. 2022. Natural Language to Code Translation with Execution. arXiv:2204.11454 [cs.CL]"},{"key":"e_1_3_2_1_106_1","volume-title":"Reflexion: an autonomous agent with dynamic memory and self-reflection. arXiv preprint arXiv:2303.11366","author":"Shinn Noah","year":"2023","unstructured":"Noah Shinn, Beck Labash, and Ashwin Gopinath. 2023. Reflexion: an autonomous agent with dynamic memory and self-reflection. arXiv preprint arXiv:2303.11366 (2023)."},{"key":"e_1_3_2_1_107_1","volume-title":"Llasm: Large language and speech model. arXiv preprint arXiv:2308.15930","author":"Shu Yu","year":"2023","unstructured":"Yu Shu, Siwei Dong, Guangyao Chen, Wenhao Huang, Ruihua Zhang, Daochen Shi, Qiqi Xiang, and Yemin Shi. 2023. Llasm: Large language and speech model. arXiv preprint arXiv:2308.15930 (2023)."},{"key":"e_1_3_2_1_108_1","volume-title":"Reinforcement Learning from Automatic Feedback for High- Quality Unit Test Generation. arXiv preprint arXiv:2310.02368","author":"Steenhoek Benjamin","year":"2023","unstructured":"Benjamin Steenhoek, Michele Tufano, Neel Sundaresan, and Alexey Svyatkovskiy. 2023. Reinforcement Learning from Automatic Feedback for High- Quality Unit Test Generation. arXiv preprint arXiv:2310.02368 (2023)."},{"key":"e_1_3_2_1_109_1","volume-title":"Cognitive architectures for language agents. arXiv preprint arXiv:2309.02427","author":"Sumers Theodore R","year":"2023","unstructured":"Theodore R Sumers, Shunyu Yao, Karthik Narasimhan, and Thomas L Griffiths. 2023. Cognitive architectures for language agents. arXiv preprint arXiv:2309.02427 (2023)."},{"key":"e_1_3_2_1_110_1","unstructured":"Lingyun Sun Zhuoshu Li Yuyang Zhang Yanzhen Liu Shanghua Lou and Zhibin Zhou. 2021. Capturing the Trends Applications Issues and Potential Strategies of Designing Transparent AI Agents (CHI EA '21)."},{"key":"e_1_3_2_1_111_1","unstructured":"Weisong Sun Chunrong Fang Yudu You Yuchen Chen Yi Liu Chong Wang Jian Zhang Quanjun Zhang et al. 2023. A Prompt Learning Framework for Source Code Summarization. arXiv preprint arXiv:2312.16066 (2023)."},{"key":"e_1_3_2_1_112_1","unstructured":"Weisong Sun Chunrong Fang Yudu You Yun Miao Yi Liu Yuekang Li Gelei Deng Shenghan Huang et al. 2023. Automatic Code Summarization via Chat- GPT: How Far Are We? arXiv preprint arXiv:2305.12865 (2023)."},{"key":"e_1_3_2_1_113_1","doi-asserted-by":"crossref","unstructured":"Mirac Suzgun Nathan Scales Nathanael Sch\u00e4rli Sebastian Gehrmann Yi Tay HyungWon Chung Aakanksha Chowdhery Quoc V Le et al. 2022. Challenging big-bench tasks and whether chain-of-thought can solve them. arXiv preprint arXiv:2210.09261 (2022).","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"e_1_3_2_1_114_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417058"},{"key":"e_1_3_2_1_115_1","doi-asserted-by":"publisher","DOI":"10.1145\/108844.108936"},{"key":"e_1_3_2_1_116_1","volume-title":"Roshanak Zilouchian Moghaddam, and Neel Sundaresan","author":"Tufano Michele","year":"2024","unstructured":"Michele Tufano, Anisha Agarwal, Jinu Jang, Roshanak Zilouchian Moghaddam, and Neel Sundaresan. 2024. AutoDev: Automated AI-Driven Development. arXiv preprint arXiv:2403.08299 (2024)."},{"key":"e_1_3_2_1_117_1","volume-title":"A Transformer-based Approach for Source Code Summarization. arXiv e-prints","author":"Ahmad Wasi Uddin","year":"2020","unstructured":"Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, and Kai-Wei Chang. 2020. A Transformer-based Approach for Source Code Summarization. arXiv e-prints (2020), arXiv--2005."},{"key":"e_1_3_2_1_118_1","doi-asserted-by":"crossref","unstructured":"Tu Vu Mohit Iyyer Xuezhi Wang Noah Constant Jerry Wei Jason Wei Chris Tar Yun-Hsuan Sung et al. 2023. FreshLLMs: Refreshing Large Language Models with Search Engine Augmentation. arXiv:2310.03214 [cs.CL]","DOI":"10.18653\/v1\/2024.findings-acl.813"},{"key":"e_1_3_2_1_119_1","unstructured":"Ben Wang and Aran Komatsuzaki. 2021. GPT-J-6B: A 6 billion parameter autoregressive language model."},{"key":"e_1_3_2_1_120_1","doi-asserted-by":"crossref","unstructured":"Fengjie Wang Xuye Liu Oujing Liu Ali Neshati Tengfei Ma Min Zhu and Jian Zhao. 2023. Slide4N: Creating Presentation Slides from Computational Notebooks with Human-AI Collaboration (CHI '23).","DOI":"10.1145\/3544548.3580753"},{"key":"e_1_3_2_1_121_1","volume-title":"Software testing with large language models: Survey, landscape, and vision","author":"Wang Junjie","year":"2024","unstructured":"Junjie Wang, Yuchao Huang, Chunyang Chen, Zhe Liu, Song Wang, and Qing Wang. 2024. Software testing with large language models: Survey, landscape, and vision. IEEE Transactions on Software Engineering (2024)."},{"key":"e_1_3_2_1_122_1","doi-asserted-by":"crossref","unstructured":"Shiqi Wang Zheng Li Haifeng Qian Chenghao Yang Zijian Wang Mingyue Shang Varun Kumar Samson Tan et al. 2022. ReCode: Robustness Evaluation of Code Generation Models. arXiv preprint arXiv:2212.10264 (2022).","DOI":"10.18653\/v1\/2023.acl-long.773"},{"key":"e_1_3_2_1_123_1","volume-title":"Nghi DQ Bui, Junnan Li, and Steven CH Hoi.","author":"Wang Yue","year":"2023","unstructured":"Yue Wang, Hung Le, Akhilesh Deepak Gotmare, Nghi DQ Bui, Junnan Li, and Steven CH Hoi. 2023. Codet5: Open code large language models for code understanding and generation. arXiv preprint arXiv:2305.07922 (2023)."},{"key":"e_1_3_2_1_124_1","volume-title":"Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859","author":"Wang Yue","year":"2021","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859 (2021)."},{"key":"e_1_3_2_1_125_1","volume-title":"Unleashing the emergent cognitive synergy in large language models: A task-solving agent through multi-persona self-collaboration. arXiv preprint arXiv:2307.05300","author":"Mao Shaoguang","year":"2023","unstructured":"ZhenhailongWang, Shaoguang Mao,WenshanWu, Tao Ge, FuruWei, and Heng Ji. 2023. Unleashing the emergent cognitive synergy in large language models: A task-solving agent through multi-persona self-collaboration. arXiv preprint arXiv:2307.05300 (2023)."},{"key":"e_1_3_2_1_126_1","volume-title":"Execution-based evaluation for open-domain code generation. arXiv preprint arXiv:2212.10481","author":"Wang Zhiruo","year":"2022","unstructured":"Zhiruo Wang, Shuyan Zhou, Daniel Fried, and Graham Neubig. 2022. Execution-based evaluation for open-domain code generation. arXiv preprint arXiv:2212.10481 (2022)."},{"key":"e_1_3_2_1_127_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377509"},{"key":"e_1_3_2_1_128_1","volume-title":"Chi, Quoc Le, et al","author":"Wei Jason","year":"2023","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, et al. 2023. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. arXiv:2201.11903 [cs.CL]"},{"key":"e_1_3_2_1_129_1","volume-title":"Powerful: Taming Large Code Generation Models with Quantization. arXiv preprint arXiv:2303.05378","author":"Wei Xiaokai","year":"2023","unstructured":"Xiaokai Wei, Sujan Gonugondla, Wasi Ahmad, Shiqi Wang, Baishakhi Ray, Haifeng Qian, Xiaopeng Li, Varun Kumar, et al. 2023. Greener yet Powerful: Taming Large Code Generation Models with Quantization. arXiv preprint arXiv:2303.05378 (2023)."},{"key":"e_1_3_2_1_130_1","volume-title":"Magicoder: Source code is all you need. arXiv preprint arXiv:2312.02120","author":"Wei Yuxiang","year":"2023","unstructured":"Yuxiang Wei, Zhe Wang, Jiawei Liu, Yifeng Ding, and Lingming Zhang. 2023. Magicoder: Source code is all you need. arXiv preprint arXiv:2312.02120 (2023)."},{"key":"e_1_3_2_1_131_1","volume-title":"Toward general design principles for generative AI applications. arXiv preprint arXiv:2301.05578","author":"Weisz Justin D","year":"2023","unstructured":"Justin D Weisz, Michael Muller, Jessica He, and Stephanie Houde. 2023. Toward general design principles for generative AI applications. arXiv preprint arXiv:2301.05578 (2023)."},{"key":"e_1_3_2_1_132_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397481.3450656"},{"key":"e_1_3_2_1_133_1","volume-title":"The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=hH36JeQZDaO","author":"Welleck Sean","year":"2023","unstructured":"Sean Welleck, Ximing Lu, Peter West, Faeze Brahman, Tianxiao Shen, Daniel Khashabi, and Yejin Choi. 2023. Generating Sequences by Learning to Self- Correct. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=hH36JeQZDaO"},{"key":"e_1_3_2_1_134_1","unstructured":"Jules White Quchen Fu Sam Hays Michael Sandborn Carlos Olea Henry Gilbert Ashraf Elnashar Jesse Spencer-Smith et al. 2023. A prompt pattern catalog to enhance prompt engineering with chatgpt. arXiv preprint arXiv:2302.11382 (2023)."},{"key":"e_1_3_2_1_135_1","volume-title":"Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155","author":"Wu Qingyun","year":"2023","unstructured":"Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Shaokun Zhang, Erkang Zhu, Beibin Li, Li Jiang, et al. 2023. Autogen: Enabling next-gen llm applications via multi-agent conversation framework. arXiv preprint arXiv:2308.08155 (2023)."},{"key":"e_1_3_2_1_136_1","volume-title":"Junning Zhao, et al.","author":"Xie Tianbao","year":"2023","unstructured":"Tianbao Xie, Fan Zhou, Zhoujun Cheng, Peng Shi, Luoxuan Weng, Yitao Liu, Toh Jing Hua, Junning Zhao, et al. 2023. Openagents: An open platform for language agents in the wild. arXiv preprint arXiv:2310.10634 (2023)."},{"key":"e_1_3_2_1_137_1","volume-title":"ExpertPrompting: Instructing Large Language Models to be Distinguished Experts. arXiv preprint arXiv:2305.14688","author":"Xu Benfeng","year":"2023","unstructured":"Benfeng Xu, An Yang, Junyang Lin, QuanWang, Chang Zhou, Yongdong Zhang, and Zhendong Mao. 2023. ExpertPrompting: Instructing Large Language Models to be Distinguished Experts. arXiv preprint arXiv:2305.14688 (2023)."},{"key":"e_1_3_2_1_138_1","unstructured":"Yichen Xu and Yanqiao Zhu. 2022. A Survey on Pretrained Language Models for Neural Code Intelligence. arXiv:2212.10079 [cs.SE]"},{"key":"e_1_3_2_1_139_1","volume-title":"Large Language Models Meet NL2Code: A Survey. arXiv:2212","author":"Zan Daoguang","year":"2023","unstructured":"Daoguang Zan, Bei Chen, Fengji Zhang, Dianjie Lu, Bingchao Wu, Bei Guan, YongjiWang, and Jian-Guang Lou. 2023. Large Language Models Meet NL2Code: A Survey. arXiv:2212.09420 [cs.SE]"},{"key":"e_1_3_2_1_140_1","volume-title":"EcoAssistant: Using LLM assistant more affordably and accurately. arXiv preprint arXiv:2310.03046","author":"Zhang Jieyu","year":"2023","unstructured":"Jieyu Zhang, Ranjay Krishna, Ahmed H Awadallah, and ChiWang. 2023. EcoAssistant: Using LLM assistant more affordably and accurately. arXiv preprint arXiv:2310.03046 (2023)."},{"key":"e_1_3_2_1_141_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_142_1","volume-title":"Self-Edit: Fault-Aware Code Editor","author":"Zhang Kechi","year":"2023","unstructured":"Kechi Zhang, Zhuo Li, Jia Li, Ge Li, and Zhi Jin. 2023. Self-Edit: Fault-Aware Code Editor for Code Generation. arXiv preprint arXiv:2305.04087 (2023)."},{"key":"e_1_3_2_1_143_1","volume-title":"You Complete Me: Human-AI Teams and Complementary Expertise (CHI '22)","author":"Zhang Qiaoning","year":"2022","unstructured":"Qiaoning Zhang, Matthew L Lee, and Scott Carter. 2022. You Complete Me: Human-AI Teams and Complementary Expertise (CHI '22)."},{"key":"e_1_3_2_1_144_1","volume-title":"Self-Contrast: Better Reflection Through Inconsistent Solving Perspectives. arXiv preprint arXiv:2401.02009","author":"Zhang Wenqi","year":"2024","unstructured":"Wenqi Zhang, Yongliang Shen, Linjuan Wu, Qiuying Peng, Jun Wang, Yueting Zhuang, and Weiming Lu. 2024. Self-Contrast: Better Reflection Through Inconsistent Solving Perspectives. arXiv preprint arXiv:2401.02009 (2024)."},{"key":"e_1_3_2_1_145_1","unstructured":"Ziyin Zhang Chaoyu Chen Bingchang Liu Cong Liao Zi Gong Hang Yu Jianguo Li and Rui Wang. 2024. Unifying the Perspectives of NLP and Software Engineering: A Survey on Language Models for Code. arXiv:2311.07989 [cs.CL]"},{"key":"e_1_3_2_1_146_1","unstructured":"Zibin Zheng Kaiwen Ning Yanlin Wang Jingwen Zhang Dewu Zheng Mingxi Ye and Jiachi Chen. 2024. A Survey of Large Language Models for Code: Evolution Benchmarking and Future Trends. arXiv:2311.10372 [cs.SE]"}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671452","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671452","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:25Z","timestamp":1750291405000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671452"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":146,"alternative-id":["10.1145\/3637528.3671452","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671452","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}