{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,6]],"date-time":"2026-07-06T11:25:41Z","timestamp":1783337141366,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T00:00:00Z","timestamp":1739318400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Research Council of Finland","award":["356114"],"award-info":[{"award-number":["356114"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,12]]},"DOI":"10.1145\/3716640.3716647","type":"proceedings-article","created":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T11:06:01Z","timestamp":1744023961000},"page":"56-63","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["LLM-itation is the Sincerest Form of Data: Generating Synthetic Buggy Code Submissions for Computing Education"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6829-9449","authenticated-orcid":false,"given":"Juho","family":"Leinonen","sequence":"first","affiliation":[{"name":"Aalto University, Espoo, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5150-9806","authenticated-orcid":false,"given":"Paul","family":"Denny","sequence":"additional","affiliation":[{"name":"The University of Auckland, Auckland, New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1347-0699","authenticated-orcid":false,"given":"Olli","family":"Kiljunen","sequence":"additional","affiliation":[{"name":"Aalto University, Espoo, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2781-6619","authenticated-orcid":false,"given":"Stephen","family":"MacNeil","sequence":"additional","affiliation":[{"name":"Temple University, Philadelphia, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7277-9282","authenticated-orcid":false,"given":"Sami","family":"Sarsa","sequence":"additional","affiliation":[{"name":"University of Jyv\u00e4skyl\u00e4, Jyv\u00e4skyl\u00e4, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6502-209X","authenticated-orcid":false,"given":"Arto","family":"Hellas","sequence":"additional","affiliation":[{"name":"Aalto University, Espoo, Finland"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,4,7]]},"reference":[{"key":"e_1_3_3_2_2_2","first-page":"522","volume-title":"Proc. of the 46th ACM Technical Symp. on Computer Science Education","author":"Altadmri Amjad","year":"2015","unstructured":"Amjad Altadmri and Neil\u00a0CC Brown. 2015. 37 million compilations: Investigating novice programming mistakes in large-scale student data. In Proc. of the 46th ACM Technical Symp. on Computer Science Education. 522\u2013527."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3383455.3422554"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3627217.3627233"},{"key":"e_1_3_3_2_5_2","volume-title":"Proc. of the 54th ACM Technical Symp. on Computer Science Education V. 1","author":"Becker Brett\u00a0A","year":"2023","unstructured":"Brett\u00a0A Becker, Paul Denny, James Finnie-Ansley, Andrew Luxton-Reilly, James Prather, and Eddie\u00a0Antonio Santos. 2023. Programming Is Hard \u2013 Or at Least It Used to Be: Educational Opportunities And Challenges of AI Code Generation. In Proc. of the 54th ACM Technical Symp. on Computer Science Education V. 1."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Alan\u00a0Mark Berg Stefan\u00a0T Mol G\u00e1bor Kismih\u00f3k and Niall Sclater. 2016. The role of a reference synthetic data generator within the field of learning analytics. Journal of Learning Analytics 3 1 (2016) 107\u2013128.","DOI":"10.18608\/jla.2016.31.7"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649217.3653533"},{"key":"e_1_3_3_2_8_2","volume-title":"Proc. of the 2024 on Innovation and Technology in Computer Science Education V. 2","author":"Bernstein Seth","year":"2024","unstructured":"Seth Bernstein, Paul Denny, Juho Leinonen, Matt Littlefield, Arto Hellas, and Stephen MacNeil. 2024. Analyzing Students\u2019 Preferences for LLM-Generated Analogies. In Proc. of the 2024 on Innovation and Technology in Computer Science Education V. 2."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Alessio Botta Alberto Dainotti and Antonio Pescap\u00e9. 2012. A tool for the generation of realistic network workload for emerging networking scenarios. Computer Networks 56 15 (2012) 3531\u20133547.","DOI":"10.1016\/j.comnet.2012.02.019"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","first-page":"575","DOI":"10.18653\/v1\/2023.acl-long.34","volume-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","author":"Chung John","year":"2023","unstructured":"John Chung, Ece Kamar, and Saleema Amershi. 2023. Increasing Diversity While Maintaining Accuracy: Text Data Generation with Large Language Models and Human Interventions. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 575\u2013593."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3408877.3432411"},{"key":"e_1_3_3_2_12_2","unstructured":"Paul Denny Hassan Khosravi Arto Hellas Juho Leinonen and Sami Sarsa. 2023. Can we trust AI-generated educational content? comparative analysis of human and AI-generated learning resources. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.10509 (2023)."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626252.3630909"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Paul Denny James Prather Brett\u00a0A. Becker James Finnie-Ansley Arto Hellas Juho Leinonen Andrew Luxton-Reilly Brent\u00a0N. Reeves Eddie\u00a0Antonio Santos and Sami Sarsa. 2024. Computing Education in the Era of Generative AI. Commun. ACM 67 2 (2024) 56\u201367.","DOI":"10.1145\/3624720"},{"key":"e_1_3_3_2_15_2","first-page":"4672","volume-title":"2019 IEEE Int. Conf. on Big Data","author":"Dorodchi Mohsen","year":"2019","unstructured":"Mohsen Dorodchi, Erfan Al-Hossami, Aileen Benedict, and Elise Demeter. 2019. Using synthetic data generators to promote open science in higher education learning analytics. In 2019 IEEE Int. Conf. on Big Data. IEEE, 4672\u20134675."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3636243.3636256"},{"key":"e_1_3_3_2_17_2","unstructured":"John Edwards Kaden Hart Raj Shrestha et\u00a0al. 2023. Review of CSEDM Data and Introduction of Two Public CS1 Keystroke Datasets. J. of Educational Data Mining 15 1 (2023) 1\u201331."},{"key":"e_1_3_3_2_18_2","first-page":"83","volume-title":"Proc. of the 20th Australasian Computing Education Conf.","author":"Ettles Andrew","year":"2018","unstructured":"Andrew Ettles, Andrew Luxton-Reilly, and Paul Denny. 2018. Common Logic Errors Made by Novice Programmers. In Proc. of the 20th Australasian Computing Education Conf.ACM, New York, NY, USA, 83\u201389."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1145\/3511861.3511863","volume-title":"Australasian Computing Education Conf.","author":"Finnie-Ansley James","year":"2022","unstructured":"James Finnie-Ansley, Paul Denny, Brett\u00a0A. Becker, Andrew Luxton-Reilly, and James Prather. 2022. The Robots Are Coming: Exploring the Implications of OpenAI Codex on Introductory Programming. In Australasian Computing Education Conf.ACM, New York, NY, USA, 10\u201319."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1145\/3511861.3511863","volume-title":"Australasian Computing Education Conf.","author":"Finnie-Ansley James","year":"2022","unstructured":"James Finnie-Ansley, Paul Denny, Brett\u00a0A Becker, Andrew Luxton-Reilly, and James Prather. 2022. The Robots Are Coming: Exploring the Implications of OpenAI Codex on Introductory Programming. In Australasian Computing Education Conf.10\u201319."},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3576123.3576134"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Brendan Flanagan Rwitajit Majumdar and Hiroaki Ogata. 2022. Fine Grain Synthetic Educational Data: Challenges and Limitations of Collaborative Learning Analytics. IEEE Access 10 (2022) 26230\u201326241.","DOI":"10.1109\/ACCESS.2022.3156073"},{"key":"e_1_3_3_2_23_2","first-page":"82","volume-title":"Machine Learning for Health (ML4H)","author":"Goel Akshay","year":"2023","unstructured":"Akshay Goel, Almog Gueta, Omry Gilon, Chang Liu, Sofia Erell, Lan\u00a0Huong Nguyen, Xiaohong Hao, Bolous Jaber, Shashir Reddy, Rupesh Kartha, et\u00a0al. 2023. Llms accelerate annotation for medical information extraction. In Machine Learning for Health (ML4H). PMLR, 82\u2013100."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351287.3351289"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626252.3630826"},{"key":"e_1_3_3_2_26_2","volume-title":"Australasian Computing Education Conference","author":"Hou Irene","year":"2024","unstructured":"Irene Hou, Sophia Mettille, Owen Man, Zhuo Li, Cynthia Zastudil, and Stephen MacNeil. 2024. The Effects of Generative AI on Introductory Students\u2019 Help-Seeking Preferences. In Australasian Computing Education Conference."},{"key":"e_1_3_3_2_27_2","unstructured":"James Jordon Lukasz Szpruch Florimond Houssiau Mirko Bottarelli Giovanni Cherubin Carsten Maple Samuel\u00a0N Cohen and Adrian Weller. 2022. Synthetic Data\u2013what why and how? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.03257 (2022)."},{"key":"e_1_3_3_2_28_2","first-page":"1","volume-title":"Proceedings of the CHI Conference on Human Factors in Computing Systems","author":"Kazemitabaar Majeed","year":"2024","unstructured":"Majeed Kazemitabaar, Runlong Ye, Xiaoning Wang, Austin\u00a0Zachary Henley, Paul Denny, Michelle Craig, and Tovi Grossman. 2024. Codeaid: Evaluating a classroom deployment of an llm-based programming assistant that balances student and educator needs. In Proceedings of the CHI Conference on Human Factors in Computing Systems. 1\u201320."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3568813.3600138"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3587102.3588785"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3051457.3051458"},{"key":"e_1_3_3_2_32_2","unstructured":"Arun-Balajiee Lekshmi-Narayanan Priti Oli Jeevan Chapagain Mohammad Hassany Rabin Banjade Peter Brusilovsky and Vasile Rus. 2024. Explaining Code Examples in Introductory Programming Courses: LLM vs Humans. arxiv:https:\/\/arXiv.org\/abs\/2403.05538\u00a0[cs.CY] https:\/\/arxiv.org\/abs\/2403.05538"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3286960.3286970"},{"key":"e_1_3_3_2_34_2","unstructured":"Zhuoyan Li Hangxiao Zhu Zhuoran Lu and Ming Yin. 2023. Synthetic data generation with large language models for text classification: Potential and limitations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.07849 (2023)."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3631802.3631830"},{"key":"e_1_3_3_2_36_2","unstructured":"Jiawei Liu Chunqiu\u00a0Steven Xia Yuyao Wang and Lingming Zhang. 2024. Is Your Code Generated by ChatGPT Really Correct? Rigorous Evaluation of Large Language Models for Code Generation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"crossref","unstructured":"Lin Long Rui Wang Ruixuan Xiao Junbo Zhao Xiao Ding Gang Chen and Haobo Wang. 2024. On LLMs-Driven Synthetic Data Generation Curation and Evaluation: A Survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.15126 (2024).","DOI":"10.18653\/v1\/2024.findings-acl.658"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649165.3690100"},{"key":"e_1_3_3_2_39_2","volume-title":"Proc. of the ACM Technical Symp. on Computing Science Education","author":"MacNeil Stephen","year":"2023","unstructured":"Stephen MacNeil, Andrew Tran, Arto Hellas, Joanne Kim, Sami Sarsa, Paul Denny, Seth Bernstein, and Juho Leinonen. 2023. Experiences from Using Code Explanations Generated by Large Language Models in a Web Software Development E-Book. In Proc. of the ACM Technical Symp. on Computing Science Education. ACM, 6\u00a0pages."},{"key":"e_1_3_3_2_40_2","first-page":"37","volume-title":"Proc. of the 2022 ACM Conf. on Int. Computing Education Research - Volume 2","author":"MacNeil Stephen","year":"2022","unstructured":"Stephen MacNeil, Andrew Tran, Dan Mogil, Seth Bernstein, Erin Ross, and Ziheng Huang. 2022. Generating Diverse Code Explanations Using the GPT-3 Large Language Model. In Proc. of the 2022 ACM Conf. on Int. Computing Education Research - Volume 2. ACM, 37\u201339."},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3573051.3593393"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Ren\u00e9e McCauley Sue Fitzgerald Gary Lewandowski Laurie Murphy Beth Simon Lynda Thomas and Carol Zander. 2008. Debugging: a review of the literature from an educational perspective. Computer Science Education 18 2 (2008) 67\u201392.","DOI":"10.1080\/08993400802114581"},{"key":"e_1_3_3_2_43_2","first-page":"179","volume-title":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)","author":"M\u00f8ller Anders\u00a0Giovanni","year":"2024","unstructured":"Anders\u00a0Giovanni M\u00f8ller, Arianna Pera, Jacob Dalsgaard, and Luca Aiello. 2024. The Parrot Dilemma: Human-Labeled vs. LLM-augmented Data in Classification Tasks. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers). 179\u2013192."},{"key":"e_1_3_3_2_44_2","first-page":"431","volume-title":"Int. Conf. in Information Technology and Education","author":"Moreno Yaneth","year":"2023","unstructured":"Yaneth Moreno, Anthony Montero, Francisco Hidrobo, and Saba Infante. 2023. Synthetic Data Generator for an E-Learning Platform in a Big Data Environment. In Int. Conf. in Information Technology and Education. Springer, 431\u2013440."},{"key":"e_1_3_3_2_45_2","unstructured":"Jeiyoon Park Chanjun Park and Heuiseok Lim. 2024. ChatLang-8: An LLM-Based Synthetic Data Generation Framework for Grammatical Error Correction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.03202 (2024)."},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3533767.3534217"},{"key":"e_1_3_3_2_47_2","unstructured":"Chris Piech Jonathan Bassen Jonathan Huang Surya Ganguli Mehran Sahami Leonidas\u00a0J Guibas and Jascha Sohl-Dickstein. 2015. Deep knowledge tracing. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/3623762.3633499"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"crossref","unstructured":"James Prather Brent Reeves Juho Leinonen Stephen MacNeil Arisoa\u00a0S Randrianasolo Brett Becker Bailey Kimmel Jared Wright and Ben Briggs. 2024. The Widening Gap: The Benefits and Harms of Generative AI for Novice Programmers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.17739 (2024).","DOI":"10.1145\/3632620.3671116"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3501385.3543957"},{"key":"e_1_3_3_2_51_2","unstructured":"Sami Sarsa Juho Leinonen Arto Hellas et\u00a0al. 2022. Empirical Evaluation of Deep Learning Models for Knowledge Tracing: Of Hyperparameters and Metrics on Performance and Replicability. Journal of Educational Data Mining 14 2 (2022)."},{"key":"e_1_3_3_2_52_2","unstructured":"Ruixiang Tang Xiaotian Han Xiaoqian Jiang and Xia Hu. 2023. Does synthetic data generation of llms help clinical text mining? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.04360 (2023)."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/FIE58773.2023.10342898"},{"key":"e_1_3_3_2_54_2","unstructured":"Stefan\u00a0Sylvius Wagner Maike Behrendt Marc Ziegele and Stefan Harmeling. 2024. The Power of LLM-Generated Synthetic Data for Stance Detection in Online Political Discussions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.12480 (2024)."},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/3408877.3432374"},{"key":"e_1_3_3_2_56_2","first-page":"1","volume-title":"IEEE Frontiers in Education Conference","author":"Zastudil Cynthia","year":"2023","unstructured":"Cynthia Zastudil, Magdalena Rogalska, Christine Kapp, Jennifer Vaughn, and Stephen MacNeil. 2023. Generative ai in computing education: Perspectives of students and instructors. In IEEE Frontiers in Education Conference. IEEE, 1\u20139."},{"key":"e_1_3_3_2_57_2","unstructured":"Chen Zhan Oscar\u00a0Blessed Deho Xuwei Zhang Srecko Joksimovic and Maarten de Laat. 2023. Synthetic data generator for student data serving learning analytics: A comparative study. Learning Letters (2023)."}],"event":{"name":"ACE 2025: 27th Australasian Computing Education Conference","location":"Brisbane Australia","acronym":"ACE 2025"},"container-title":["Proceedings of the 27th Australasian Computing Education Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3716640.3716647","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3716640.3716647","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:52Z","timestamp":1750295932000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3716640.3716647"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,12]]},"references-count":56,"alternative-id":["10.1145\/3716640.3716647","10.1145\/3716640"],"URL":"https:\/\/doi.org\/10.1145\/3716640.3716647","relation":{},"subject":[],"published":{"date-parts":[[2025,2,12]]},"assertion":[{"value":"2025-04-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}