{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:55:00Z","timestamp":1766566500001,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,17]]},"DOI":"10.1145\/3756681.3756984","type":"proceedings-article","created":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:30:04Z","timestamp":1766565004000},"page":"1004-1013","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["How Well Do Large Language Models Serve as End-to-End Secure Code Agents for Python?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3449-759X","authenticated-orcid":false,"given":"Jianian","family":"Gong","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7743-3123","authenticated-orcid":false,"given":"Nachuan","family":"Duan","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8579-5000","authenticated-orcid":false,"given":"Ziheng","family":"Tao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5613-2301","authenticated-orcid":false,"given":"Zhaohui","family":"Gong","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4233-4407","authenticated-orcid":false,"given":"Yuan","family":"Yuan","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China and Zhongguancun Laboratory, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7111-1849","authenticated-orcid":false,"given":"Minlie","family":"Huang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China and Zhongguancun Laboratory, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,24]]},"reference":[{"key":"e_1_3_3_2_2_2","volume-title":"Bandit documentation","year":"2024","unstructured":"2024. Bandit documentation. https:\/\/bandit.readthedocs.io\/en\/latest\/Accessed 26 May 2024."},{"key":"e_1_3_3_2_3_2","volume-title":"CodeQL documentation","year":"2024","unstructured":"2024. CodeQL documentation. https:\/\/codeql.github.com\/docs\/codeql-overview\/about-codeql\/Accessed 20 May 2024."},{"key":"e_1_3_3_2_4_2","volume-title":"GitHub documentation","year":"2024","unstructured":"2024. GitHub documentation. https:\/\/docs.github.com\/en\/get-started\/learning-about-github\/about-github-advanced-securityAccessed 26 May 2024."},{"key":"e_1_3_3_2_5_2","volume-title":"Introduction of CodeGeeX copilot","year":"2024","unstructured":"2024. Introduction of CodeGeeX copilot. https:\/\/codegeex.cn\/Accessed 22 May 2024."},{"key":"e_1_3_3_2_6_2","volume-title":"Introduction of CodeGeeX2 model","year":"2024","unstructured":"2024. Introduction of CodeGeeX2 model. https:\/\/github.com\/THUDM\/CodeGeeX2Accessed 22 May 2024."},{"key":"e_1_3_3_2_7_2","volume-title":"Performance on HumanEval","year":"2024","unstructured":"2024. Performance on HumanEval. https:\/\/paperswithcode.com\/sota\/code-generation-on-humanevalAccessed 22 May 2024."},{"key":"e_1_3_3_2_8_2","volume-title":"Prompt engineering","year":"2024","unstructured":"2024. Prompt engineering. https:\/\/platform.openai.com\/docs\/guides\/prompt-engineeringAccessed 20 May 2024."},{"key":"e_1_3_3_2_9_2","volume-title":"Python bug fixer","year":"2024","unstructured":"2024. Python bug fixer. https:\/\/platform.openai.com\/examples\/default-fix-python-bugsAccessed 20 May 2024."},{"key":"e_1_3_3_2_10_2","volume-title":"Query scripts of CodeQL for security","year":"2024","unstructured":"2024. Query scripts of CodeQL for security. https:\/\/github.com\/github\/codeql\/tree\/main\/python\/ql\/src\/SecurityAccessed 20 May 2024."},{"key":"e_1_3_3_2_11_2","volume-title":"The model card of Code Llama-70B","year":"2024","unstructured":"2024. The model card of Code Llama-70B. https:\/\/build.nvidia.com\/meta\/codellama-70bAccessed 27 May 2024."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","unstructured":"Baleegh Ahmad Shailja Thakur Benjamin Tan Ramesh Karri and Hammond Pearce. 2024. On Hardware Security Bug Code Fixes by Prompting Large Language Models. IEEE Transactions on Information Forensics and Security 19 (2024) 4043\u20134057. 10.1109\/TIFS.2024.3374558","DOI":"10.1109\/TIFS.2024.3374558"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/WIECON-ECE60392.2023.10456393"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Owura Asare Meiyappan Nagappan and N Asokan. 2023. Is github\u2019s copilot as bad as humans at introducing vulnerabilities in code? Empirical Software Engineering 28 6 (2023) 129.","DOI":"10.1007\/s10664-023-10380-1"},{"key":"e_1_3_3_2_15_2","volume-title":"The economic impact of the AI-powered developer lifecycle and lessons from GitHub Copilot","author":"Dohmke Thomas","year":"2023","unstructured":"Thomas Dohmke. 2023. The economic impact of the AI-powered developer lifecycle and lessons from GitHub Copilot. https:\/\/github.blog\/2023-06-27-the-economic-impact-of-the-ai-powered-developer-lifecycle-and-lessons-from-github-copilot\/Accessed 20 May 2024."},{"key":"e_1_3_3_2_16_2","volume-title":"Refactoring: improving the design of existing code","author":"Fowler Martin","year":"2018","unstructured":"Martin Fowler. 2018. Refactoring: improving the design of existing code. Addison-Wesley Professional."},{"key":"e_1_3_3_2_17_2","unstructured":"Michael Fu Chakkrit Tantithamthavorn Van Nguyen and Trung Le. 2023. ChatGPT for Vulnerability Detection Classification and Repair: How Far Are We? arxiv:https:\/\/arXiv.org\/abs\/2310.09810\u00a0[cs.SE]"},{"key":"e_1_3_3_2_18_2","unstructured":"Yujia Fu Peng Liang Amjed Tahir Zengyang Li Mojtaba Shahin Jiaxin Yu and Jinfu Chen. 2023. Security weaknesses of copilot generated code in github. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.02059 (2023)."},{"key":"e_1_3_3_2_19_2","unstructured":"Rapha\u00ebl Khoury Anderson\u00a0R. Avila Jacob Brunelle and Baba\u00a0Mamadou Camara. 2023. How Secure is Code Generated by ChatGPT? arxiv:https:\/\/arXiv.org\/abs\/2304.09655\u00a0[cs.CR]"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3589335.3651463"},{"key":"e_1_3_3_2_21_2","unstructured":"Zhijie Liu Yutian Tang Xiapu Luo Yuming Zhou and Liang\u00a0Feng Zhang. 2024. No need to lift a finger anymore? assessing the quality of code generation by chatgpt. IEEE Transactions on Software Engineering (2024)."},{"key":"e_1_3_3_2_22_2","unstructured":"Madhav Nair Rajat Sadhukhan and Debdeep Mukhopadhyay. 2023. Generating secure hardware using chatgpt resistant to cwes. Cryptology ePrint Archive (2023)."},{"key":"e_1_3_3_2_23_2","volume-title":"ChatGPT is growing faster than TikTok","author":"News CBS","year":"2023","unstructured":"CBS News. 2023. ChatGPT is growing faster than TikTok. https:\/\/www.cbsnews.com\/news\/chatgpt-chatbot-tiktok-ai-artificial-intelligence\/Accessed 7 May 2024."},{"key":"e_1_3_3_2_24_2","volume-title":"Introductions of the Chat Completion API","year":"2023","unstructured":"OpenAI. 2023. Introductions of the Chat Completion API. https:\/\/platform.openai.com\/docs\/api-reference\/chat\/createAccessed 7 May 2024."},{"key":"e_1_3_3_2_25_2","unstructured":"OpenAI. 2024. GPT-4 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2303.08774\u00a0[cs.CL]"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP46214.2022.9833571"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP46215.2023.10179420"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSREW60843.2023.00058"},{"key":"e_1_3_3_2_29_2","unstructured":"Baptiste Rozi\u00e8re Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing\u00a0Ellen Tan Yossi Adi Jingyu Liu Romain Sauvestre Tal Remez J\u00e9r\u00e9my Rapin Artyom Kozhevnikov Ivan Evtimov Joanna Bitton Manish Bhatt Cristian\u00a0Canton Ferrer Aaron Grattafiori Wenhan Xiong Alexandre D\u00e9fossez Jade Copet Faisal Azhar Hugo Touvron Louis Martin Nicolas Usunier Thomas Scialom and Gabriel Synnaeve. 2024. Code Llama: Open Foundation Models for Code. arxiv:https:\/\/arXiv.org\/abs\/2308.12950\u00a0[cs.CL]"},{"key":"e_1_3_3_2_30_2","first-page":"2205","volume-title":"32nd USENIX Security Symposium (USENIX Security 23)","author":"Sandoval Gustavo","year":"2023","unstructured":"Gustavo Sandoval, Hammond Pearce, Teo Nys, Ramesh Karri, Siddharth Garg, and Brendan Dolan-Gavitt. 2023. Lost at c: A user study on the security implications of large language model code assistants. In 32nd USENIX Security Symposium (USENIX Security 23). 2205\u20132222."},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3549035.3561184"},{"key":"e_1_3_3_2_32_2","unstructured":"Benjamin Steenhoek Md\u00a0Mahbubur Rahman Monoshi\u00a0Kumar Roy Mirza\u00a0Sanjida Alam Earl\u00a0T. Barr and Wei Le. 2024. A Comprehensive Study of the Capabilities of Large Language Models for Vulnerability Detection. arxiv:https:\/\/arXiv.org\/abs\/2403.17218\u00a0[cs.SE]"},{"key":"e_1_3_3_2_33_2","unstructured":"Norbert Tihanyi Tamas Bisztray Mohamed\u00a0Amine Ferrag Ridhi Jain and Lucas\u00a0C. Cordeiro. 2024. Do Neutral Prompts Produce Insecure Code? FormAI-v2 Dataset: Labelling Vulnerabilities in Code Generated by Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2404.18353\u00a0[cs.CR]"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP54263.2024.00210"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3597926.3598135"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Yanjing Yang Xin Zhou Runfeng Mao Jinwei Xu Lanxin Yang Yu Zhangm Haifeng Shen and He Zhang. 2024. DLAP: A Deep Learning Augmented Large Language Model Prompting Framework for Software Vulnerability Detection. arxiv:https:\/\/arXiv.org\/abs\/2405.01202\u00a0[cs.SE]","DOI":"10.2139\/ssrn.4831591"},{"key":"e_1_3_3_2_37_2","unstructured":"Xin Yin Chao Ni and Shaohua Wang. 2024. Multitask-based Evaluation of Open-Source LLM on Software Vulnerability. arxiv:https:\/\/arXiv.org\/abs\/2404.02056\u00a0[cs.SE]"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Qinkai Zheng Xiao Xia Xu Zou Yuxiao Dong Shan Wang Yufei Xue Zihan Wang Lei Shen Andi Wang Yang Li et\u00a0al. 2023. Codegeex: A pre-trained model for code generation with multilingual evaluations on humaneval-x. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.17568 (2023).","DOI":"10.1145\/3580305.3599790"},{"key":"e_1_3_3_2_39_2","unstructured":"Xin Zhou Sicong Cao Xiaobing Sun and David Lo. 2024. Large Language Model for Vulnerability Detection and Repair: Literature Review and the Road Ahead. arxiv:https:\/\/arXiv.org\/abs\/2404.02525\u00a0[cs.SE]"},{"key":"e_1_3_3_2_40_2","unstructured":"Xin Zhou Ting Zhang and David Lo. 2024. Large Language Model for Vulnerability Detection: Emerging Results and Future Directions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.15468 (2024)."}],"event":{"name":"EASE '25: Evaluation and Assessment in Software Engineering","location":"Istanbul Turkiye","acronym":"EASE '25"},"container-title":["Proceedings of the 29th International Conference on Evaluation and Assessment in Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3756681.3756984","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:42:29Z","timestamp":1766565749000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3756681.3756984"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,17]]},"references-count":39,"alternative-id":["10.1145\/3756681.3756984","10.1145\/3756681"],"URL":"https:\/\/doi.org\/10.1145\/3756681.3756984","relation":{},"subject":[],"published":{"date-parts":[[2025,6,17]]},"assertion":[{"value":"2025-12-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}