{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:38:26Z","timestamp":1777657106355,"version":"3.51.4"},"reference-count":97,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s11432-025-4632-8","type":"journal-article","created":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T10:22:19Z","timestamp":1768299739000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["What is wrong with your code generated by large language models? An extensive study"],"prefix":"10.1007","volume":"69","author":[{"given":"Shihan","family":"Dou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoxiang","family":"Jia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shenxi","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huiyuan","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Muling","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunbo","family":"Tao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingxu","family":"Chai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jessica","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiheng","family":"Xi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rui","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yueming","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Wen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Gui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xipeng","family":"Qiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuanjing","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,4]]},"reference":[{"key":"4632_CR1","unstructured":"Le H, Wang Y, Gotmare A D, et al. Coderl: mastering code generation through pretrained models and deep reinforcement learning. 2022. ArXiv:2207.01780"},{"key":"4632_CR2","doi-asserted-by":"publisher","first-page":"1433","DOI":"10.1145\/3368089.3417058","volume-title":"Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"A Svyatkovskiy","year":"2020","unstructured":"Svyatkovskiy A, Deng S K, Fu S, et al. Intellicode compose: code generation using transformer. In: Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering, 2020. 1433\u20131443"},{"key":"4632_CR3","first-page":"537","volume":"17","author":"J Shin","year":"2021","unstructured":"Shin J, Nam J. A survey of automatic code generation from natural language. J Inf Process Syst, 2021, 17: 537\u2013555","journal-title":"J Inf Process Syst"},{"key":"4632_CR4","doi-asserted-by":"publisher","first-page":"82434","DOI":"10.1109\/ACCESS.2022.3196347","volume":"10","author":"E Dehaerne","year":"2022","unstructured":"Dehaerne E, Dey B, Halder S, et al. Code generation using machine learning: a systematic review. IEEE Access, 2022, 10: 82434\u201382455","journal-title":"IEEE Access"},{"key":"4632_CR5","unstructured":"Zan D, Chen B, Zhang F, et al. Large language models meet NL2Code: a survey. 2022. ArXiv:2212.09420"},{"key":"4632_CR6","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang W, Joty S, et al. CodeT5: identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. 2021. ArXiv:2109.00859","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"4632_CR7","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, et al. Codebert: a pre-trained model for programming and natural languages. 2020. ArXiv:2002.08155","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"4632_CR8","unstructured":"Zhang Z, Chen C, Liu B, et al. Unifying the perspectives of NLP and software engineering: a survey on language models for code. 2023. ArXiv:2311.07989"},{"key":"4632_CR9","unstructured":"Lu S, Guo D, Ren S, et al. Codexglue: a machine learning benchmark dataset for code understanding and generation. 2021. ArXiv:2102.04664"},{"key":"4632_CR10","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, et al. Language models are unsupervised multitask learners. OpenAI blog, 2019, 1: 9","journal-title":"OpenAI blog"},{"key":"4632_CR11","volume-title":"Improving language understanding by generative pre-training","author":"A Radford","year":"2018","unstructured":"Radford A, Narasimhan K, Salimans T, et al. Improving language understanding by generative pre-training. 2018. https:\/\/cdn.openai.com\/research-covers\/language-unsupervised\/language_understanding_paper.pdf"},{"key":"4632_CR12","unstructured":"Brown T, Mann B, Ryder N, et al. Language models are few-shot learners. 2020. ArXiv:2005.14165"},{"key":"4632_CR13","unstructured":"Dou S, Liu Y, Jia H, et al. Stepcoder: improving code generation with reinforcement learning from compiler feedback. 2024. ArXiv:2402.01391"},{"key":"4632_CR14","unstructured":"Roziere B, Gehring J, Gloeckle F, et al. Code llama: open foundation models for code. 2023. ArXiv:2308.12950"},{"key":"4632_CR15","unstructured":"Hendrycks D, Basart S, Kadavath S, et al. Measuring coding challenge competence with APPS. 2021. ArXiv:2105.09938"},{"key":"4632_CR16","unstructured":"Li R, Allal L B, Zi Y, et al. Starcoder: may the source be with you! 2023. ArXiv:2305.06161"},{"key":"4632_CR17","unstructured":"Guo D, Zhu Q, Yang D, et al. DeepSeek-Coder: when the large language model meets programming-the rise of code intelligence. 2024. ArXiv:2401.14196"},{"key":"4632_CR18","unstructured":"Lozhkov A, Li R, Allal L B, et al. Starcoder 2 and the stack v2: the next generation. 2024. ArXiv:2402.19173"},{"key":"4632_CR19","doi-asserted-by":"crossref","unstructured":"Song D, Guo H, Zhou Y, et al. Code needs comments: enhancing code LLMs with comment augmentation. 2024. ArXiv:2402.13013","DOI":"10.18653\/v1\/2024.findings-acl.809"},{"key":"4632_CR20","doi-asserted-by":"crossref","unstructured":"Zheng T, Zhang G, Shen T, et al. OpenCodeInterpreter: integrating code generation with execution and refinement. 2024. ArXiv:2402.14658","DOI":"10.18653\/v1\/2024.findings-acl.762"},{"key":"4632_CR21","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Y Wei","year":"2024","unstructured":"Wei Y, Wang Z, Liu J, et al. Magicoder: empowering code generation with OSS-instruct. In: Proceedings of the 41st International Conference on Machine Learning, 2024"},{"key":"4632_CR22","unstructured":"Wang Z, Zhou S, Fried D, et al. Execution-based evaluation for open-domain code generation. 2022. ArXiv:2212.10481"},{"key":"4632_CR23","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"Z Yu","year":"2023","unstructured":"Yu Z, Tao Y, Chen L, et al. Beta-coder: on value-based deep reinforcement learning for program synthesis. In: Proceedings of the 12th International Conference on Learning Representations, 2023"},{"key":"4632_CR24","unstructured":"Zhou D, Sch\u00e4rli N, Hou L, et al. Least-to-most prompting enables complex reasoning in large language models. 2022. ArXiv:2205.10625"},{"key":"4632_CR25","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1145\/3622815","volume":"7","author":"S Wang","year":"2023","unstructured":"Wang S, Lin B, Sun Z, et al. Two birds with one stone: boosting code generation and code search via a generative adversarial network. Proc ACM Program Lang, 2023, 7: 486\u2013515","journal-title":"Proc ACM Program Lang"},{"key":"4632_CR26","unstructured":"Yuan Z, Lou Y, Liu M, et al. No more manual tests? Evaluating and improving ChatGPT for unit test generation. 2023. ArXiv:2305.04207"},{"key":"4632_CR27","first-page":"743","volume-title":"Proceedings of the 6th International Conference on Information Technology: New Generations","author":"G Fix","year":"2009","unstructured":"Fix G. The design of an automated unit test code generation system. In: Proceedings of the 6th International Conference on Information Technology: New Generations, 2009. 743\u2013747"},{"key":"4632_CR28","unstructured":"Alawad D, Panta M, Zibran M, et al. An empirical study of the relationships between code readability and software complexity. 2019. ArXiv:1909.01760"},{"key":"4632_CR29","doi-asserted-by":"publisher","first-page":"81","DOI":"10.3390\/info14020081","volume":"14","author":"Y Tashtoush","year":"2023","unstructured":"Tashtoush Y, Abu-El-Rub N, Darwish O, et al. A notional understanding of the relationship between code readability and software complexity. Information, 2023, 14: 81","journal-title":"Information"},{"key":"4632_CR30","unstructured":"Austin J, Odena A, Nye M, et al. Program synthesis with large language models. 2021. ArXiv:2108.07732"},{"key":"4632_CR31","unstructured":"Chen M, Tworek J, Jun H, et al. Evaluating large language models trained on code. 2021. ArXiv:2107.03374"},{"key":"4632_CR32","unstructured":"Zan D, Chen B, Yang D, et al. Cert: continual pre-training on sketches for library-oriented code generation. ArXiv:2206.06888"},{"key":"4632_CR33","doi-asserted-by":"crossref","unstructured":"Li J, Li G, Zhao Y, et al. Deveval: a manually-annotated code generation benchmark aligned with real-world code repositories. 2024. ArXiv:2405.19856","DOI":"10.18653\/v1\/2024.findings-acl.214"},{"key":"4632_CR34","unstructured":"Jain N, Han K, Gu A, et al. Livecodebench: holistic and contamination free evaluation of large language models for code. 2024. ArXiv:2403.07974"},{"key":"4632_CR35","unstructured":"Golchin S, Surdeanu M. Time travel in LLMs: tracing data contamination in large language models. 2023. ArXiv:2308.08493"},{"key":"4632_CR36","unstructured":"Weller O, Marone M, Weir N, et al. \u201cAccording to\u2026\u201d: prompting language models improves quoting from pre-training data. 2023. ArXiv:2305.13252"},{"key":"4632_CR37","unstructured":"Riddell M, Ni A, Cohan A. Quantifying contamination in evaluating code generation capabilities of language models. 2024. ArXiv:2403.04811"},{"key":"4632_CR38","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"M Roberts","year":"2023","unstructured":"Roberts M, Thakur H, Herlihy C, et al. To the cutoff\u2026 and beyond? A longitudinal perspective on LLM data contamination. In: Proceedings of the 12th International Conference on Learning Representations, 2023"},{"key":"4632_CR39","unstructured":"OpenAI. GPT-4 technical report. 2023. ArXiv:submit\/4812508"},{"key":"4632_CR40","volume-title":"The claude 3 model family: opus, sonnet, haiku","author":"A Anthropic","year":"2024","unstructured":"Anthropic A. The claude 3 model family: opus, sonnet, haiku. 2024. https:\/\/www-cdn.anthropic.com\/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627\/Model_Card_Claude_3.pdf"},{"key":"4632_CR41","unstructured":"Liu A, Feng B, Xue B, et al. DeepSeek-V3 technical report. 2024. ArXiv:2412.19437"},{"key":"4632_CR42","unstructured":"Guo D, Yang D, Zhang H, et al. DeepSeek-R1: incentivizing reasoning capability in LLMs via reinforcement learning. 2025. ArXiv:2501.12948"},{"key":"4632_CR43","unstructured":"Abdin M, Jacobs S A, Awan A A, et al. Phi-3 technical report: a highly capable language model locally on your phone. 2024. ArXiv:2404.14219"},{"key":"4632_CR44","volume-title":"Proceedings of the Conference on Empirical Methods in Natural Language Processing: Industry Track","author":"X Tan","year":"2023","unstructured":"Tan X, Shi S, Qiu X, et al. Self-criticism: aligning large language models with their understanding of helpfulness, honesty, and harmlessness. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing: Industry Track, 2023"},{"key":"4632_CR45","volume-title":"Opencompass: a universal evaluation platform for foundation models","author":"O Contributors","year":"2023","unstructured":"Contributors O. Opencompass: a universal evaluation platform for foundation models. 2023. https:\/\/github.com\/open-compass\/opencompass"},{"key":"4632_CR46","unstructured":"Zhuo T Y, Vu M C, Chim J, et al. Bigcodebench: benchmarking code generation with diverse function calls and complex instructions. 2024. ArXiv:2406.15877"},{"key":"4632_CR47","unstructured":"Liu J, Xia C S, Wang Y, et al. Is your code generated by chatgpt really correct? Rigorous evaluation of large language models for code generation. 2024. ArXiv:2305.01210"},{"key":"4632_CR48","doi-asserted-by":"crossref","unstructured":"Imani S, Du L, Shrivastava H. Mathprompter: mathematical reasoning using large language models. 2023. ArXiv:2303.05398","DOI":"10.18653\/v1\/2023.acl-industry.4"},{"key":"4632_CR49","unstructured":"Wang X, Wei J, Schuurmans D, et al. Self-consistency improves chain of thought reasoning in language models. 2022. ArXiv:2203.11171"},{"key":"4632_CR50","unstructured":"Wang Y, Kordi Y, Mishra S, et al. Self-instruct: aligning language model with self generated instructions. 2022. ArXiv:2212.10560"},{"key":"4632_CR51","unstructured":"Vacareanu R, Pratik A, Spiliopoulou E, et al. General purpose verification for chain of thought prompting. 2024. ArXiv:2405.00204"},{"key":"4632_CR52","unstructured":"Wei J, Wang X, Schuurmans D, et al. Chain-of-thought prompting elicits reasoning in large language models. 2022. ArXiv:2201.11903"},{"key":"4632_CR53","unstructured":"Sharan S, Pittaluga F, Chandraker M, et al. LLM-assist: enhancing closed-loop planning with language-based reasoning. 2023. ArXiv:2401.00125"},{"key":"4632_CR54","unstructured":"Muennighoff N, Liu Q, Zebaze A R, et al. Octopack: instruction tuning code large language models. 2023. ArXiv:2308.07124"},{"key":"4632_CR55","unstructured":"Liu J, Zhu Y, Xiao K, et al. Rltf: reinforcement learning from unit test feedback. 2023. ArXiv:2307.04349"},{"key":"4632_CR56","unstructured":"Gero Z, Singh C, Cheng H, et al. Self-verification improves few-shot clinical information extraction. 2023. ArXiv:2306.00024"},{"key":"4632_CR57","unstructured":"Kocetkov D, Li R, Jia L, et al. The stack: 3 TB of permissively licensed source code. 2022. ArXiv:2211.15533"},{"key":"4632_CR58","unstructured":"Penedo G, Malartic Q, Hesslow D, et al. The refinedWeb dataset for falcon LLM: outperforming curated corpora with web data, and web data only. 2023. ArXiv:2306.01116"},{"key":"4632_CR59","unstructured":"Gao L, Biderman S, Black S, et al. The pile: an 800GB dataset of diverse text for language modeling. 2020. ArXiv:2101.00027"},{"key":"4632_CR60","first-page":"21","volume-title":"Proceedings of Compression and Complexity of SEQUENCES 1997","author":"A Z Broder","year":"1997","unstructured":"Broder A Z. On the resemblance and containment of documents. In: Proceedings of Compression and Complexity of SEQUENCES 1997, 1997. 21\u201329"},{"key":"4632_CR61","unstructured":"Dong Q, Li L, Dai D, et al. A survey on in-context learning. 2022. ArXiv:2301.00234"},{"key":"4632_CR62","unstructured":"Wies N, Levine Y, Shashua A. The learnability of in-context learning. 2023. ArXiv:2303.07895"},{"key":"4632_CR63","doi-asserted-by":"crossref","unstructured":"Ahn J, Verma R, Lou R, et al. Large language models for mathematical reasoning: progresses and challenges. 2024. ArXiv:2402.00157","DOI":"10.18653\/v1\/2024.eacl-srw.17"},{"key":"4632_CR64","unstructured":"Saunders W, Yeh C, Wu J, et al. Self-critiquing models for assisting human evaluators. 2022. ArXiv:2206.05802"},{"key":"4632_CR65","unstructured":"Chen X, Lin M, Sch\u00e4rli N, et al. Teaching large language models to self-debug. 2023. ArXiv:2304.05128"},{"key":"4632_CR66","doi-asserted-by":"crossref","unstructured":"Wang H, Liu Z, Wang S, et al. Intervenor: prompting the coding ability of large language models with the interactive chain of repair. 2024. ArXiv:2311.09868","DOI":"10.18653\/v1\/2024.findings-acl.124"},{"key":"4632_CR67","unstructured":"Bai J, Bai S, Chu Y, et al. Qwen technical report. 2023. ArXiv:2309.16609"},{"key":"4632_CR68","unstructured":"Shi W, Ajith A, Xia M, et al. Detecting pretraining data from large language models. 2023. ArXiv:2310.16789"},{"key":"4632_CR69","unstructured":"Zhou K, Zhu Y, Chen Z, et al. Don\u2019t make your LLM an evaluation benchmark cheater. 2023. ArXiv:2311.01964"},{"key":"4632_CR70","unstructured":"Jiang A Q, Sablayrolles A, Mensch A, et al. Mistral 7B. 2023. ArXiv:2310.06825"},{"key":"4632_CR71","unstructured":"DeepSeek-AI. DeepSeek LLM: scaling open-source language models with longtermism. 2024. ArXiv:2401.02954"},{"key":"4632_CR72","unstructured":"Allal L B, Li R, Kocetkov D, et al. Santacoder: don\u2019t reach for the stars! 2023. ArXiv:2301.03988"},{"key":"4632_CR73","doi-asserted-by":"crossref","unstructured":"Zheng Q, Xia X, Zou X, et al. Codegeex: a pre-trained model for code generation with multilingual evaluations on Humaneval-X. 2023. ArXiv:2303.17568","DOI":"10.1145\/3580305.3599790"},{"key":"4632_CR74","unstructured":"Luo Z, Xu C, Zhao P, et al. Wizardcoder: empowering code large language models with evol-instruct. 2023. ArXiv:2306.08568"},{"key":"4632_CR75","unstructured":"Le H, Chen H, Saha A, et al. Codechain: towards modular code generation through chain of self-revisions with representative sub-modules. 2023. ArXiv:2310.08992"},{"key":"4632_CR76","unstructured":"Shin J, Tang C, Mohati T, et al. Prompt engineering or fine tuning: an empirical assessment of large language models in automated software engineering tasks. 2023. ArXiv:2310.10508"},{"key":"4632_CR77","doi-asserted-by":"crossref","unstructured":"Denny P, Kumar V, Giacaman N. Conversing with copilot: exploring prompt engineering for solving CS1 problems using natural language. 2023. ArXiv:2210.15157","DOI":"10.1145\/3545945.3569823"},{"key":"4632_CR78","unstructured":"Jain N, Zhang T, Chiang W L, et al. LLM-assisted code cleaning for training accurate code generators. 2023. ArXiv:2311.14904"},{"key":"4632_CR79","unstructured":"Bairi R, Sonwane A, Kanade A, et al. CodePlan: repository-level coding using LLMs and planning. 2023. ArXiv:2309.12499"},{"key":"4632_CR80","unstructured":"Weyssow M, Zhou X, Kim K, et al. Exploring parameter-efficient fine-tuning techniques for code generation with large language models. 2023. ArXiv:2308.10462"},{"key":"4632_CR81","unstructured":"Shojaee P, Jain A, Tipirneni S, et al. Execution-based code generation using deep reinforcement learning. 2023. ArXiv:2301.13816"},{"key":"4632_CR82","unstructured":"Zheng R, Dou S, Gao S, et al. Secrets of RLHF in large language models part I: PPO. 2023. ArXiv:2307.04964"},{"key":"4632_CR83","first-page":"510","volume-title":"Proceedings of the 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"M J Islam","year":"2019","unstructured":"Islam M J, Nguyen G, Pan R, et al. A comprehensive study on deep learning bug characteristics. In: Proceedings of the 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering, 2019. 510\u2013520"},{"key":"4632_CR84","first-page":"1","volume-title":"Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering","author":"G Wang","year":"2022","unstructured":"Wang G, Wang Z, Chen J, et al. An empirical study on numerical bugs in deep learning programs. In: Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering, 2022. 1\u20135"},{"key":"4632_CR85","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1145\/3213846.3213866","volume-title":"Proceedings of the 27th ACM SIGSOFT International Symposium on Software Testing and Analysis","author":"Y Zhang","year":"2018","unstructured":"Zhang Y, Chen Y, Cheung S C, et al. An empirical study on tensorflow program bugs. In: Proceedings of the 27th ACM SIGSOFT International Symposium on Software Testing and Analysis, 2018. 129\u2013140"},{"key":"4632_CR86","volume-title":"Proceedings of the 42nd International Conference on Software Engineering (ICSE)","author":"J Garcia","year":"2020","unstructured":"Garcia J, Feng Y, Shen J, et al. A comprehensive study of autonomous vehicle bugs. In: Proceedings of the 42nd International Conference on Software Engineering (ICSE), 2020"},{"key":"4632_CR87","first-page":"1","volume-title":"Proceedings of the 46th International Conference on Software Engineering","author":"R Pan","year":"2024","unstructured":"Pan R, Ibrahimzada A R, Krishna R, et al. Lost in translation: a study of bugs introduced by large language models while translating code. In: Proceedings of the 46th International Conference on Software Engineering, 2024. 1\u201313"},{"key":"4632_CR88","doi-asserted-by":"publisher","first-page":"959","DOI":"10.1145\/3540250.3549101","volume-title":"Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","author":"C S Xia","year":"2022","unstructured":"Xia C S, Zhang L. Less training, more repairing please: revisiting automated program repair via zero-shot learning. In: Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, 2022. 959\u2013971"},{"key":"4632_CR89","unstructured":"Yang Z, Liu F, Yu Z, et al. Exploring and unleashing the power of large language models in automated code translation. 2024. ArXiv:2404.14646"},{"key":"4632_CR90","unstructured":"Liu F, Liu Y, Shi L, et al. Exploring and evaluating hallucinations in LLM-powered code generation. 2024. ArXiv:2404.00971"},{"key":"4632_CR91","doi-asserted-by":"crossref","unstructured":"Mousavi Z, Islam C, Moore K, et al. An investigation into misuse of Java security APIs by large language models. 2024. ArXiv:2404.03823","DOI":"10.1145\/3634737.3661134"},{"key":"4632_CR92","doi-asserted-by":"crossref","unstructured":"Tambon F, Dakhel A M, Nikanjam A, et al. Bugs in large language models generated code. 2024. ArXiv:2403.08937","DOI":"10.1007\/s10664-025-10614-4"},{"key":"4632_CR93","unstructured":"Dai J, Lu J, Feng Y, et al. MHPP: exploring the capabilities and limitations of language models beyond basic code generation. 2024. ArXiv:2405.11430"},{"key":"4632_CR94","doi-asserted-by":"crossref","unstructured":"Iyer S, Konstas I, Cheung A, et al. Mapping language to code in programmatic context. 2018. ArXiv:1808.09588","DOI":"10.18653\/v1\/D18-1192"},{"key":"4632_CR95","first-page":"1","volume-title":"Proceedings of the 46th IEEE\/ACM International Conference on Software Engineering","author":"H Yu","year":"2024","unstructured":"Yu H, Shen B, Ran D, et al. CoderEval: a benchmark of pragmatic code generation with generative pre-trained models. In: Proceedings of the 46th IEEE\/ACM International Conference on Software Engineering, 2024. 1\u201312"},{"key":"4632_CR96","unstructured":"Liu T, Xu C, McAuley J. Repobench: benchmarking repository-level code auto-completion systems. 2023. ArXiv:2306.03091"},{"key":"4632_CR97","unstructured":"Jimenez C E, Yang J, Wettig A, et al. SWE-bench: can language models resolve real-world GitHub issues? 2023. ArXiv:2310.06770"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-025-4632-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-025-4632-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-025-4632-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T10:22:32Z","timestamp":1768299752000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-025-4632-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":97,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["4632"],"URL":"https:\/\/doi.org\/10.1007\/s11432-025-4632-8","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]},"assertion":[{"value":"21 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 August 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 October 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2026","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"112107"}}