{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T01:03:55Z","timestamp":1781485435773,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":94,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62322208"],"award-info":[{"award-number":["62322208"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62232001"],"award-info":[{"award-number":["62232001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62202040"],"award-info":[{"award-number":["62202040"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12411530122"],"award-info":[{"award-number":["12411530122"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"CCF-Huawei Populus Grove Fund"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3691620.3695529","type":"proceedings-article","created":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T15:39:19Z","timestamp":1729265959000},"page":"1607-1619","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":44,"title":["On the Evaluation of Large Language Models in Unit Test Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4475-0925","authenticated-orcid":false,"given":"Lin","family":"Yang","sequence":"first","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0759-940X","authenticated-orcid":false,"given":"Chen","family":"Yang","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4441-7971","authenticated-orcid":false,"given":"Shutao","family":"Gao","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9250-573X","authenticated-orcid":false,"given":"Weijing","family":"Wang","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7944-9182","authenticated-orcid":false,"given":"Bo","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8036-2623","authenticated-orcid":false,"given":"Qihao","family":"Zhu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9041-7020","authenticated-orcid":false,"given":"Xiao","family":"Chu","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Co. Ltd., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4867-5416","authenticated-orcid":false,"given":"Jianyi","family":"Zhou","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Co. Ltd., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2454-1706","authenticated-orcid":false,"given":"Guangtai","family":"Liang","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Co. Ltd., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6598-0041","authenticated-orcid":false,"given":"Qianxiang","family":"Wang","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Co. Ltd., Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3056-9962","authenticated-orcid":false,"given":"Junjie","family":"Chen","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Accessed: 2023. PyTorch. https:\/\/pytorch.org\/."},{"key":"e_1_3_2_1_2_1","unstructured":"Accessed: 2023. Transformers. https:\/\/github.com\/huggingface\/transformers\/."},{"key":"e_1_3_2_1_3_1","unstructured":"Accessed: 2023. Tree-sitter. https:\/\/tree-sitter.github.io\/tree-sitter\/."},{"key":"e_1_3_2_1_4_1","unstructured":"Accessed: 2024. Big Code Models Leaderboard. https:\/\/huggingface.co\/spaces\/bigcode\/bigcode-models-leaderboard."},{"key":"e_1_3_2_1_5_1","unstructured":"Accessed: 2024. HomePage. https:\/\/github.com\/LeonYang95\/LLM4UT."},{"key":"e_1_3_2_1_6_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"Rajamani","author":"Agrawal Lakshya A.","year":"2023","unstructured":"Lakshya A. Agrawal, Aditya Kanade, Navin Goyal, Shuvendu K. Lahiri, and Sriram K. Rajamani. 2023. Monitor-Guided Decoding of Code LMs with Static Analysis of Repository Context. In NeurIPS."},{"key":"e_1_3_2_1_8_1","volume-title":"A3Test: Assertion-Augmented Automated Test Case Generation. CoRR abs\/2302.10352","author":"Alagarsamy Saranya","year":"2023","unstructured":"Saranya Alagarsamy, Chakkrit Tantithamthavorn, and Aldeida Aleti. 2023. A3Test: Assertion-Augmented Automated Test Case Generation. CoRR abs\/2302.10352 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Prompt Design and Engineering: Introduction and Advanced Methods. CoRR abs\/2401.14423","author":"Amatriain Xavier","year":"2024","unstructured":"Xavier Amatriain. 2024. Prompt Design and Engineering: Introduction and Advanced Methods. CoRR abs\/2401.14423 (2024)."},{"key":"e_1_3_2_1_10_1","volume-title":"Unit Test Generation using Generative AI : A Comparative Performance Analysis of Autogeneration Tools. CoRR abs\/2312.10622","author":"Bhatia Shreya","year":"2023","unstructured":"Shreya Bhatia, Tarushi Gandhi, Dhruv Kumar, and Pankaj Jalote. 2023. Unit Test Generation using Generative AI : A Comparative Performance Analysis of Autogeneration Tools. CoRR abs\/2312.10622 (2023)."},{"key":"e_1_3_2_1_11_1","unstructured":"Mark Chen Jerry Tworek Heewoo Jun et al. 2021. Evaluating Large Language Models Trained on Code. CoRR abs\/2107.03374 (2021)."},{"key":"e_1_3_2_1_12_1","unstructured":"Xiangping Chen Xing Hu Yuan Huang He Jiang Weixing Ji Yanjie Jiang Yanyan Jiang Bo Liu Hui Liu Xiaochen Li Xiaoli Lian Guozhu Meng Xin Peng Hailong Sun Lin Shi Bo Wang Chong Wang Jiayi Wang Tiantian Wang Jifeng Xuan Xin Xia Yibiao Yang Yixin Yang Li Zhang Yuming Zhou and Lu Zhang. [n. d.]. Deep Learning-based Software Engineering: Progress Challenges and Opportunities. SCIENCE CHINA Information Sciences ([n. d.]) -."},{"key":"e_1_3_2_1_13_1","volume-title":"Statistical power analysis for the behavioral sciences","author":"Cohen Jacob","unstructured":"Jacob Cohen. 2013. Statistical power analysis for the behavioral sciences. Routledge."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2024.107468"},{"key":"e_1_3_2_1_15_1","unstructured":"DeepSeek. 2023. DeepSeek Coder: Let the Code Write Itself. https:\/\/github.com\/deepseek-ai\/DeepSeek-Coder."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510141"},{"key":"e_1_3_2_1_17_1","first-page":"1","article-title":"Evaluating Large Language Models in Class-Level Code Generation","volume":"81","author":"Du Xueying","year":"2024","unstructured":"Xueying Du, Mingwei Liu, Kaixin Wang, et al. 2024. Evaluating Large Language Models in Class-Level Code Generation. In ICSE. ACM, 81:1--81:13.","journal-title":"ICSE. ACM"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10009-014-0355-9"},{"key":"e_1_3_2_1_19_1","volume-title":"Zhang","author":"Fan Angela","year":"2023","unstructured":"Angela Fan, Beliz Gokkaya, Mark Harman, Mitya Lyubarskiy, Shubho Sengupta, Shin Yoo, and Jie M. Zhang. 2023. Large Language Models for Software Engineering: Survey and Open Problems. In ICSE-FoSE. IEEE, 31--53."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Gordon Fraser and Andrea Arcuri. 2011. EvoSuite: automatic test suite generation for object-oriented software. In SIGSOFT FSE. ACM 416--419.","DOI":"10.1145\/2025113.2025179"},{"key":"e_1_3_2_1_21_1","volume-title":"Heitmeyer","author":"Gargantini Angelo","year":"1999","unstructured":"Angelo Gargantini and Constance L. Heitmeyer. 1999. Using Model Checking to Generate Tests from Requirements Specifications. In ESEC \/ SIGSOFT FSE (Lecture Notes in Computer Science), Vol. 1687. Springer, 146--162."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3623306"},{"key":"e_1_3_2_1_23_1","volume-title":"Word Importance Explains How Prompts Affect Language Model Outputs. CoRR abs\/2403.03028","author":"Hackmann Stefan","year":"2024","unstructured":"Stefan Hackmann, Haniyeh Mahmoudian, Mark Steadman, and Michael Schmidt. 2024. Word Importance Explains How Prompts Affect Language Model Outputs. CoRR abs\/2403.03028 (2024)."},{"key":"e_1_3_2_1_24_1","unstructured":"Marc Hoffmann and Michael Ernst. 2021. JaCoCo: Java Code Coverage Library. https:\/\/www.jacoco.org. Accessed: 2024-05-19."},{"key":"e_1_3_2_1_25_1","unstructured":"Ari Holtzman Jan Buys Li Du Maxwell Forbes and Yejin Choi. 2020. The Curious Case of Neural Text Degeneration. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_26_1","volume-title":"Large Language Models for Software Engineering: A Systematic Literature Review. CoRR abs\/2308.10620","author":"Hou Xinyi","year":"2023","unstructured":"Xinyi Hou, Yanjie Zhao, Yue Liu, Zhou Yang, Kailong Wang, Li Li, Xiapu Luo, David Lo, John C. Grundy, and Haoyu Wang. 2023. Large Language Models for Software Engineering: A Systematic Literature Review. CoRR abs\/2308.10620 (2023)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2019.00054"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3624741","article-title":"Variable-Based Fault Localization via Enhanced Decision Tree","volume":"33","author":"Jiang Jiajun","year":"2023","unstructured":"Jiajun Jiang, Yumeng Wang, Junjie Chen, Delin Lv, and Mengjiao Liu. 2023. Variable-Based Fault Localization via Enhanced Decision Tree. ACM Transactions on Software Engineering and Methodology 33, 2 (2023), 1--32.","journal-title":"ACM Transactions on Software Engineering and Methodology"},{"key":"e_1_3_2_1_29_1","volume-title":"A manual inspection of defects4j bugs and its implications for automatic program repair. Science china information sciences 62","author":"Jiang Jiajun","year":"2019","unstructured":"Jiajun Jiang, Yingfei Xiong, and Xin Xia. 2019. A manual inspection of defects4j bugs and its implications for automatic program repair. Science china information sciences 62 (2019), 1--16."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3213846.3213871"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3643795.3648390"},{"key":"e_1_3_2_1_32_1","volume-title":"Ernst","author":"Just Ren\u00e9","year":"2014","unstructured":"Ren\u00e9 Just, Darioush Jalali, and Michael D. Ernst. 2014. Defects4J: a database of existing faults to enable controlled testing studies for Java programs. In ISSTA. ACM, 437--440."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00194"},{"key":"e_1_3_2_1_34_1","volume-title":"EMNLP (1)","author":"Karpukhin Vladimir","unstructured":"Vladimir Karpukhin, Barlas Oguz, Sewon Min, Patrick S. H. Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 2020. Dense Passage Retrieval for Open-Domain Question Answering. In EMNLP (1). Association for Computational Linguistics, 6769--6781."},{"key":"e_1_3_2_1_35_1","volume-title":"CTRL: A Conditional Transformer Language Model for Controllable Generation. CoRR abs\/1909.05858","author":"Keskar Nitish Shirish","year":"2019","unstructured":"Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong, and Richard Socher. 2019. CTRL: A Conditional Transformer Language Model for Controllable Generation. CoRR abs\/1909.05858 (2019)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.03.003"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00085"},{"key":"e_1_3_2_1_39_1","volume-title":"EMNLP (1)","author":"Lester Brian","unstructured":"Brian Lester, Rami Al-Rfou, and Noah Constant. 2021. The Power of Scale for Parameter-Efficient Prompt Tuning. In EMNLP (1). Association for Computational Linguistics, 3045--3059."},{"key":"e_1_3_2_1_40_1","volume-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)."},{"key":"e_1_3_2_1_41_1","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems 33 (2020), 9459--9474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_42_1","volume-title":"Structured Chain-of-Thought Prompting for Code Generation. CoRR","author":"Li Jia","year":"2023","unstructured":"Jia Li, Yongmin Li, Ge Li, and Zhi Jin. 2023. Structured Chain-of-Thought Prompting for Code Generation. CoRR (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"Large Language Models as Test Case Generators: Performance Evaluation and Enhancement. CoRR abs\/2404.13340","author":"Li Kefan","year":"2024","unstructured":"Kefan Li and Yuan Yuan. 2024. Large Language Models as Test Case Generators: Performance Evaluation and Enhancement. CoRR abs\/2404.13340 (2024)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE56229.2023.00089"},{"key":"e_1_3_2_1_45_1","volume-title":"Exploring the effectiveness of llms in automated logging generation: An empirical study. arXiv preprint arXiv:2307.05950","author":"Li Yichen","year":"2023","unstructured":"Yichen Li, Yintong Huo, Zhihan Jiang, Renyi Zhong, Pinjia He, Yuxin Su, and Michael R Lyu. 2023. Exploring the effectiveness of llms in automated logging generation: An empirical study. arXiv preprint arXiv:2307.05950 (2023)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639091"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME52107.2021.00028"},{"key":"e_1_3_2_1_48_1","volume-title":"Exploring and Evaluating Hallucinations in LLM-Powered Code Generation. arXiv preprint arXiv:2404.00971","author":"Liu Fang","year":"2024","unstructured":"Fang Liu, Yang Liu, Lin Shi, Houkun Huang, Ruifeng Wang, Zhen Yang, and Li Zhang. 2024. Exploring and Evaluating Hallucinations in LLM-Powered Code Generation. arXiv preprint arXiv:2404.00971 (2024)."},{"key":"e_1_3_2_1_49_1","volume-title":"Logicot: Logical chain-of-thought instruction-tuning data collection with gpt-4. arXiv preprint arXiv:2305.12147","author":"Liu Hanmeng","year":"2023","unstructured":"Hanmeng Liu, Zhiyang Teng, Leyang Cui, Chaoli Zhang, Qiji Zhou, and Yue Zhang. 2023. Logicot: Logical chain-of-thought instruction-tuning data collection with gpt-4. arXiv preprint arXiv:2305.12147 (2023)."},{"key":"e_1_3_2_1_50_1","volume-title":"Yuyao Wang, and Lingming Zhang.","author":"Liu Jiawei","year":"2023","unstructured":"Jiawei Liu, Chunqiu Steven Xia, Yuyao Wang, and Lingming Zhang. 2023. Is Your Code Generated by ChatGPT Really Correct? Rigorous Evaluation of Large Language Models for Code Generation. arXiv preprint arXiv:2305.01210 (2023)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3293882.3330577"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00181"},{"key":"e_1_3_2_1_53_1","volume-title":"Test Oracle Automation in the era of LLMs. arXiv preprint arXiv:2405.12766","author":"Molina Facundo","year":"2024","unstructured":"Facundo Molina and Alessandra Gorla. 2024. Test Oracle Automation in the era of LLMs. arXiv preprint arXiv:2405.12766 (2024)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639187"},{"key":"e_1_3_2_1_55_1","volume-title":"An Empirical Comparison of Pre-Trained Models of Source Code","author":"Niu Changan","unstructured":"Changan Niu, Chuanyi Li, Vincent Ng, Dongxiao Chen, Jidong Ge, and Bin Luo. 2023. An Empirical Comparison of Pre-Trained Models of Source Code. In ICSE. IEEE, 2136--2148."},{"key":"e_1_3_2_1_56_1","volume-title":"Andrew Habib, Jacques Klein, David Lo, and Tegawend\u00e9 F. Bissyand\u00e9.","author":"Ou\u00e9draogo Wendk\u00fbuni C.","year":"2023","unstructured":"Wendk\u00fbuni C. Ou\u00e9draogo, Laura Plein, Abdoul Kader Kabor\u00e9, Andrew Habib, Jacques Klein, David Lo, and Tegawend\u00e9 F. Bissyand\u00e9. 2023. Enriching Automatic Test Case Generation by Extracting Relevant Test Inputs from Bug Reports. CoRR abs\/2312.14898 (2023)."},{"key":"e_1_3_2_1_57_1","volume-title":"LLM is Like a Box of Chocolates: the Non-determinism of ChatGPT in Code Generation. CoRR abs\/2308.02828","author":"Ouyang Shuyin","year":"2023","unstructured":"Shuyin Ouyang, Jie M. Zhang, Mark Harman, and Meng Wang. 2023. LLM is Like a Box of Chocolates: the Non-determinism of ChatGPT in Code Generation. CoRR abs\/2308.02828 (2023)."},{"key":"e_1_3_2_1_58_1","volume-title":"SPSS survival manual: A step by step guide to data analysis using IBM SPSS","author":"Pallant Julie","unstructured":"Julie Pallant. 2020. SPSS survival manual: A step by step guide to data analysis using IBM SPSS. Routledge."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897010.2897016"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639226"},{"key":"e_1_3_2_1_61_1","volume-title":"Saikat Chakraborty, Baishakhi Ray, and Kai-Wei Chang.","author":"Rizwan Parvez Md.","year":"2021","unstructured":"Md. Rizwan Parvez, Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, and Kai-Wei Chang. 2021. Retrieval Augmented Code Generation and Summarization. In EMNLP (Findings). Association for Computational Linguistics, 2719--2734."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"Corina S. Pasareanu Peter C. Mehlitz David H. Bushnell Karen Gundy-Burlet Michael R. Lowry Suzette Person and Mark Pape. 2008. Combining unit-level symbolic execution and system-level concrete execution for testing NASA software. In ISSTA. ACM 15--26.","DOI":"10.1145\/1390630.1390635"},{"key":"e_1_3_2_1_63_1","unstructured":"Baptiste Rozi\u00e8re Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing Ellen Tan Yossi Adi Jingyu Liu Tal Remez J\u00e9r\u00e9my Rapin Artyom Kozhevnikov Ivan Evtimov Joanna Bitton Manish Bhatt Cristian Canton-Ferrer Aaron Grattafiori Wenhan Xiong Alexandre D\u00e9fossez Jade Copet Faisal Azhar Hugo Touvron Louis Martin Nicolas Usunier Thomas Scialom and Gabriel Synnaeve. 2023. Code Llama: Open Foundation Models for Code. CoRR abs\/2308.12950 (2023)."},{"key":"e_1_3_2_1_64_1","volume-title":"Sriparna Saha, Vinija Jain, Samrat Mondal, and Aman Chadha.","author":"Sahoo Pranab","year":"2024","unstructured":"Pranab Sahoo, Ayush Kumar Singh, Sriparna Saha, Vinija Jain, Samrat Mondal, and Aman Chadha. 2024. A Systematic Survey of Prompt Engineering in Large Language Models: Techniques and Applications. arXiv preprint arXiv:2402.07927 (2024)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2023.3334955"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2019.00028"},{"key":"e_1_3_2_1_67_1","volume-title":"Quality Assessment of Prompts Used in Code Generation. arXiv preprint arXiv:2404.10155","author":"Siddiq Mohammed Latif","year":"2024","unstructured":"Mohammed Latif Siddiq, Simantika Dristi, Joy Saha, and Joanna Santos. 2024. Quality Assessment of Prompts Used in Code Generation. arXiv preprint arXiv:2404.10155 (2024)."},{"key":"e_1_3_2_1_68_1","volume-title":"Noshin Ulfat, Fahmid Al Rifat, and Vinicius Carvalho Lopes.","author":"Siddiq Mohammed Latif","year":"2023","unstructured":"Mohammed Latif Siddiq, Joanna C. S. Santos, Ridwanul Hasan Tanvir, Noshin Ulfat, Fahmid Al Rifat, and Vinicius Carvalho Lopes. 2023. Exploring the Effectiveness of Large Language Models in Generating Unit Tests. CoRR abs\/2305.00418 (2023)."},{"key":"e_1_3_2_1_69_1","volume-title":"GitBug-Java: A Reproducible Benchmark of Recent Java Bugs. CoRR abs\/2402.02961","author":"Silva Andr\u00e9","year":"2024","unstructured":"Andr\u00e9 Silva, Nuno Saavedra, and Martin Monperrus. 2024. GitBug-Java: A Reproducible Benchmark of Recent Java Bugs. CoRR abs\/2402.02961 (2024)."},{"key":"e_1_3_2_1_70_1","volume-title":"Automatic Code Summarization via ChatGPT: How Far Are We? CoRR abs\/2305.12865","author":"Sun Weisong","year":"2023","unstructured":"Weisong Sun, Chunrong Fang, Yudu You, Yun Miao, Yi Liu, Yuekang Li, Gelei Deng, Shenghan Huang, Yuchen Chen, Quanjun Zhang, Hanwei Qian, Yang Liu, and Zhenyu Chen. 2023. Automatic Code Summarization via ChatGPT: How Far Are We? CoRR abs\/2305.12865 (2023)."},{"key":"e_1_3_2_1_71_1","volume-title":"Revisiting and Improving Retrieval-Augmented Deep Assertion Generation","author":"Sun Weifeng","unstructured":"Weifeng Sun, Hongyan Li, Meng Yan, Yan Lei, and Hongyu Zhang. 2023. Revisiting and Improving Retrieval-Augmented Deep Assertion Generation. In ASE. IEEE, 1123--1135."},{"key":"e_1_3_2_1_72_1","volume-title":"ChatGPT vs SBST: A Comparative Assessment of Unit Test Suite Generation. CoRR abs\/2307.00588","author":"Tang Yutian","year":"2023","unstructured":"Yutian Tang, Zhijie Liu, Zhichao Zhou, and Xiapu Luo. 2023. ChatGPT vs SBST: A Comparative Assessment of Unit Test Suite Generation. CoRR abs\/2307.00588 (2023)."},{"key":"e_1_3_2_1_73_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone et al. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. CoRR abs\/2307.09288 (2023)."},{"key":"e_1_3_2_1_74_1","volume-title":"Shao Kun Deng, and Neel Sundaresan","author":"Tufano Michele","year":"2020","unstructured":"Michele Tufano, Dawn Drain, Alexey Svyatkovskiy, Shao Kun Deng, and Neel Sundaresan. 2020. Unit Test Case Generation with Transformers. CoRR abs\/2009.05617 (2020)."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/32.153381"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2024.3368208"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3611643.3616256"},{"key":"e_1_3_2_1_78_1","volume-title":"Sharan Narang, Aakanksha Chowdhery, and Denny Zhou.","author":"Wang Xuezhi","year":"2023","unstructured":"Xuezhi Wang, Jason Wei, Dale Schuurmans, Quoc V. Le, Ed H. Chi, Sharan Narang, Aakanksha Chowdhery, and Denny Zhou. 2023. Self-Consistency Improves Chain of Thought Reasoning in Language Models. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_79_1","volume-title":"Nghi DQ Bui, Junnan Li, and Steven CH Hoi.","author":"Wang Yue","year":"2023","unstructured":"Yue Wang, Hung Le, Akhilesh Deepak Gotmare, Nghi DQ Bui, Junnan Li, and Steven CH Hoi. 2023. Codet5+: Open code large language models for code understanding and generation. arXiv preprint arXiv:2305.07922 (2023)."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"crossref","unstructured":"Cody Watson Michele Tufano Kevin Moran Gabriele Bavota and Denys Poshyvanyk. 2020. On learning meaningful assert statements for unit test cases. In ICSE. ACM 1398--1409.","DOI":"10.1145\/3377811.3380429"},{"key":"e_1_3_2_1_81_1","volume-title":"Quoc V. Le, and Denny Zhou.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed H. Chi, Quoc V. Le, and Denny Zhou. 2022. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. In NeurIPS."},{"key":"e_1_3_2_1_82_1","volume-title":"Chunqiu Steven Xia, and Lingming Zhang","author":"Wei Yuxiang","year":"2023","unstructured":"Yuxiang Wei, Chunqiu Steven Xia, and Lingming Zhang. 2023. Copiloting the Copilots: Fusing Large Language Models with Completion Engines for Automated Program Repair. In ESEC\/SIGSOFT FSE. ACM, 172--184."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180233"},{"key":"e_1_3_2_1_84_1","volume-title":"Breakthroughs in statistics: Methodology and distribution","author":"Wilcoxon Frank","unstructured":"Frank Wilcoxon. 1992. Individual comparisons by ranking methods. In Breakthroughs in statistics: Methodology and distribution. Springer, 196--202."},{"key":"e_1_3_2_1_85_1","volume-title":"Practical Program Repair in the Era of Large Pre-trained Language Models. CoRR abs\/2210.14179","author":"Xia Chunqiu Steven","year":"2022","unstructured":"Chunqiu Steven Xia, Yuxiang Wei, and Lingming Zhang. 2022. Practical Program Repair in the Era of Large Pre-trained Language Models. CoRR abs\/2210.14179 (2022)."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00129"},{"key":"e_1_3_2_1_87_1","volume-title":"Symstra: A Framework for Generating Object-Oriented Unit Tests Using Symbolic Execution. In TACAS (Lecture Notes in Computer Science)","author":"Xie Tao","year":"2005","unstructured":"Tao Xie, Darko Marinov, Wolfram Schulte, and David Notkin. 2005. Symstra: A Framework for Generating Object-Oriented Unit Tests Using Symbolic Execution. In TACAS (Lecture Notes in Computer Science), Vol. 3440. Springer, 365--381."},{"key":"e_1_3_2_1_88_1","volume-title":"ChatUniTest: a ChatGPT-based automated unit test generation tool. CoRR abs\/2305.04764","author":"Xie Zhuokui","year":"2023","unstructured":"Zhuokui Xie, Yinghao Chen, Chen Zhi, Shuiguang Deng, and Jianwei Yin. 2023. ChatUniTest: a ChatGPT-based automated unit test generation tool. CoRR abs\/2305.04764 (2023)."},{"key":"e_1_3_2_1_89_1","volume-title":"Enhancing LLM-based Test Generation for Hard-to-Cover Branches via Program Analysis. CoRR abs\/2404.04966","author":"Yang Chen","year":"2024","unstructured":"Chen Yang, Junjie Chen, Bin Lin, Jianyi Zhou, and Ziqi Wang. 2024. Enhancing LLM-based Test Generation for Hard-to-Cover Branches via Program Analysis. CoRR abs\/2404.04966 (2024)."},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"crossref","unstructured":"Hao Yu Yiling Lou Ke Sun Dezhi Ran Tao Xie Dan Hao Ying Li Ge Li and Qianxiang Wang. 2022. Automated Assertion Generation via Information Retrieval and Its Integration with Deep learning. In ICSE. ACM 163--174.","DOI":"10.1145\/3510003.3510149"},{"key":"e_1_3_2_1_91_1","volume-title":"Evaluating instruction-tuned large language models on code comprehension and generation. arXiv preprint arXiv:2308.01240","author":"Yuan Zhiqiang","year":"2023","unstructured":"Zhiqiang Yuan, Junwei Liu, Qiancheng Zi, Mingwei Liu, Xin Peng, and Yiling Lou. 2023. Evaluating instruction-tuned large language models on code comprehension and generation. arXiv preprint arXiv:2308.01240 (2023)."},{"key":"e_1_3_2_1_92_1","volume-title":"No More Manual Tests? Evaluating and Improving ChatGPT for Unit Test Generation. CoRR abs\/2305.04207","author":"Yuan Zhiqiang","year":"2023","unstructured":"Zhiqiang Yuan, Yiling Lou, Mingwei Liu, Shiji Ding, Kaixin Wang, Yixuan Chen, and Xin Peng. 2023. No More Manual Tests? Evaluating and Improving ChatGPT for Unit Test Generation. CoRR abs\/2305.04207 (2023)."},{"key":"e_1_3_2_1_93_1","volume-title":"Automatic chain of thought prompting in large language models. arXiv preprint arXiv:2210.03493","author":"Zhang Zhuosheng","year":"2022","unstructured":"Zhuosheng Zhang, Aston Zhang, Mu Li, and Alex Smola. 2022. Automatic chain of thought prompting in large language models. arXiv preprint arXiv:2210.03493 (2022)."},{"key":"e_1_3_2_1_94_1","volume-title":"Fine-Tuning Language Models from Human Preferences. CoRR abs\/1909.08593","author":"Ziegler Daniel M.","year":"2019","unstructured":"Daniel M. Ziegler, Nisan Stiennon, Jeffrey Wu, Tom B. Brown, Alec Radford, Dario Amodei, Paul F. Christiano, and Geoffrey Irving. 2019. Fine-Tuning Language Models from Human Preferences. CoRR abs\/1909.08593 (2019)."}],"event":{"name":"ASE '24: 39th IEEE\/ACM International Conference on Automated Software Engineering","location":"Sacramento CA USA","acronym":"ASE '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS"]},"container-title":["Proceedings of the 39th IEEE\/ACM International Conference on Automated Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3691620.3695529","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3691620.3695529","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:39Z","timestamp":1750295379000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3691620.3695529"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":94,"alternative-id":["10.1145\/3691620.3695529","10.1145\/3691620"],"URL":"https:\/\/doi.org\/10.1145\/3691620.3695529","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2024-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}