{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T19:24:40Z","timestamp":1764617080135,"version":"3.46.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T00:00:00Z","timestamp":1750809600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T00:00:00Z","timestamp":1750809600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,25]]},"DOI":"10.1109\/snpd65828.2025.11252591","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:29Z","timestamp":1764355229000},"page":"842-847","source":"Crossref","is-referenced-by-count":0,"title":["Making the Case for LLM-Generated Automated Program Repair Benchmarks"],"prefix":"10.1109","author":[{"given":"Yasser","family":"Ebrahim","sequence":"first","affiliation":[{"name":"Algoma University,Computer Science Department,Brampton,Canada"}]}],"member":"263","reference":[{"article-title":"Evaluating Large Language Models Trained on Code","year":"2021","author":"Chen","key":"ref1"},{"article-title":"Program Synthesis with Large Language Models","year":"2021","author":"Austin","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3105906"},{"article-title":"SWE Bench+: Enhanced Coding Benchmark for LLMs","year":"2024","author":"Aleithan","key":"ref4"},{"article-title":"Benchmarking Educational Program Repair","year":"2024","author":"Koutcheme","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3597926.3598135"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3449106"},{"article-title":"Impact of Dataset Size on Deep Learning Model Skill and Performance Estimates","year":"2020","author":"Brownlee","key":"ref8"},{"article-title":"What is Overfitting?","year":"2024","author":"Services","key":"ref9"},{"key":"ref10","first-page":"110396","article-title":"A Comprehensive Study of Automatic Program Repair on the QuixBugs Benchmark","volume":"157","author":"Durieux","year":"2019","journal-title":"Journal of Systems and Software"},{"article-title":"Automated Patch Correctness Assessment: How Far Are We?","volume-title":"2019 34th IEEE\/ACM International Conference on Automated Software Engineering (ASE)","author":"Liu","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2020.110825"},{"article-title":"Efficacy of Synthetic Data as a Benchmark","year":"2024","author":"Whitehouse","key":"ref13"},{"article-title":"Benchmarking Educational Program Repair","year":"2024","author":"Kohn","key":"ref14"},{"article-title":"23- An Evaluation Framework for Synthetic Data Generation Models","year":"2024","author":"Ferdowsi","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3650212.3652140"},{"key":"ref17","first-page":"1","article-title":"Advancements in Automated Program Repair: A Comprehensive Review","author":"Bhatia","year":"2024","journal-title":"Knowledge and Information Systems"},{"issue":"6","key":"ref18","first-page":"4853","article-title":"Automated Patch Assessment for Program Repair at Scale","volume":"25","author":"Koyuncu","year":"2020","journal-title":"Empirical Software Engineering"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2017.8115674"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-Companion.2019.00035"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3631974"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.81"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2019.00116"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2786805.2786825"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3533767.3534219"},{"article-title":"FrugalGPT: How to Use Large Language Models While Reducing Cost and Improving Performance","year":"2024","author":"Chen","key":"ref26"},{"article-title":"Is Your Code Generated by ChatGPT Really Correct? Rigorous Evaluation of Large Language Models for Code Generation","year":"2023","author":"Liu","key":"ref27"},{"article-title":"CodeGen: An Open Large Language Model for Code with Multi-Turn Program Synthesis","year":"2022","author":"Nijkamp","key":"ref28"},{"article-title":"Multi-lingual Evaluation of Code Generation Models","year":"2022","author":"Athiwaratkun","key":"ref29"}],"event":{"name":"2025 IEEE\/ACIS 29th International Conference on Software Engineering, Artificial Intelligence, Networking and Parallel\/Distributed Computing (SNPD)","start":{"date-parts":[[2025,6,25]]},"location":"Busan, Korea, Republic of","end":{"date-parts":[[2025,6,27]]}},"container-title":["2025 IEEE\/ACIS 29th International Conference on Software Engineering, Artificial Intelligence, Networking and Parallel\/Distributed Computing (SNPD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11251382\/11252538\/11252591.pdf?arnumber=11252591","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T18:23:15Z","timestamp":1764613395000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11252591\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,25]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/snpd65828.2025.11252591","relation":{},"subject":[],"published":{"date-parts":[[2025,6,25]]}}}