{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T00:03:42Z","timestamp":1782518622005,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T00:00:00Z","timestamp":1709769600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,7]]},"DOI":"10.1145\/3626252.3630826","type":"proceedings-article","created":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T18:17:20Z","timestamp":1709835440000},"page":"526-532","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":45,"title":["Detecting ChatGPT-Generated Code Submissions in a CS1 Course Using Machine Learning Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2591-0476","authenticated-orcid":false,"given":"Muntasir","family":"Hoq","sequence":"first","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6486-4340","authenticated-orcid":false,"given":"Yang","family":"Shi","sequence":"additional","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6829-9449","authenticated-orcid":false,"given":"Juho","family":"Leinonen","sequence":"additional","affiliation":[{"name":"The University of Auckland, Auckland, New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4160-9030","authenticated-orcid":false,"given":"Damilola","family":"Babalola","sequence":"additional","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6958-9368","authenticated-orcid":false,"given":"Collin","family":"Lynch","sequence":"additional","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9375-2292","authenticated-orcid":false,"given":"Thomas","family":"Price","sequence":"additional","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5195-5841","authenticated-orcid":false,"given":"Bita","family":"Akram","sequence":"additional","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,3,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Kristy Elizabeth Boyer, and James Lester","author":"Akram Bita","year":"2018","unstructured":"Bita Akram, Wookhee Min, Eric Wiebe, Bradford Mott, Kristy Elizabeth Boyer, and James Lester. 2018. Improving stealth assessment in game-based learning with LSTM-based analytics. In EDM. 208--218."},{"key":"e_1_3_2_1_2_1","volume-title":"James Lester, et al.","author":"Akram Bita","year":"2020","unstructured":"Bita Akram, Wookhe Min, Eric Wiebe, Anam Navied, Bradford Mott, Kristy Elizabeth Boyer, James Lester, et al. 2020. Automated assessment of computer science competencies from student programs with gaussian process regression. In EDM."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3371156"},{"key":"e_1_3_2_1_4_1","first-page":"1","article-title":"code2vec: Learning distributed representations of code","volume":"3","author":"Alon Uri","year":"2019","unstructured":"Uri Alon, Meital Zilberstein, Omer Levy, and Eran Yahav. 2019. code2vec: Learning distributed representations of code. POPL , Vol. 3 (2019), 1--29.","journal-title":"POPL"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Brett A Becker Paul Denny James Finnie-Ansley Andrew Luxton-Reilly James Prather and Eddie Antonio Santos. 2023. Programming Is Hard-Or at Least It Used to Be: Educational Opportunities and Challenges of AI Code Generation. In SIGCSE. 500--506.","DOI":"10.1145\/3545945.3569759"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TE.2007.906776"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.chbr.2020.100033"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Paul Denny Viraj Kumar and Nasser Giacaman. 2023a. Conversing with Copilot: Exploring prompt engineering for solving CS1 problems using natural language. In SIGCSE. 1136--1142.","DOI":"10.1145\/3545945.3569823"},{"key":"e_1_3_2_1_9_1","volume-title":"Eddie Antonio Santos, and Sami Sarsa","author":"Denny Paul","year":"2023","unstructured":"Paul Denny, James Prather, Brett A Becker, James Finnie-Ansley, Arto Hellas, Juho Leinonen, Andrew Luxton-Reilly, Brent N Reeves, Eddie Antonio Santos, and Sami Sarsa. 2023b. Computing Education in the Era of Generative AI. arXiv preprint arXiv:2306.02608 (2023)."},{"key":"e_1_3_2_1_10_1","volume-title":"2022 Robosourcing Educational Resources--Leveraging Large Language Models for Learnersourcing. arXiv preprint arXiv:2211.04715","author":"Denny Paul","year":"2022","unstructured":"Paul Denny, Sami Sarsa, Arto Hellas, and Juho Leinonen. 2022 Robosourcing Educational Resources--Leveraging Large Language Models for Learnersourcing. arXiv preprint arXiv:2211.04715 (2022)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/782941.783000"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Steve Engels Vivek Lakshmanan and Michelle Craig. 2007. Plagiarism detection using feature-based neural networks. In SIGCSE. 34--38.","DOI":"10.1145\/1227310.1227324"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Akhil Eppa and Anirudh Murali. 2022. Source Code Plagiarism Detection: A Machine Intelligence Approach. In ICAECC. 1--7.","DOI":"10.1109\/ICAECC54045.2022.9716671"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Chunrong Fang Zixi Liu Yangyang Shi Jeff Huang and Qingkai Shi. 2020. Functional code clone detection with syntax and semantics fusion learning. In SIGSOFT. 516--527.","DOI":"10.1145\/3395363.3397362"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"James Finnie-Ansley Paul Denny Brett A Becker Andrew Luxton-Reilly and James Prather. 2022. The robots are coming: Exploring the implications of openai codex on introductory programming. In ACE. 10--19.","DOI":"10.1145\/3511861.3511863"},{"key":"e_1_3_2_1_16_1","volume-title":"James Prather, and Brett A Becker.","author":"Finnie-Ansley James","year":"2023","unstructured":"James Finnie-Ansley, Paul Denny, Andrew Luxton-Reilly, Eddie Antonio Santos, James Prather, and Brett A Becker. 2023. My AI Wants to Know if This Will Be on the Exam: Testing OpenAI's Codex on CS2 Programming Exercises. In ACEC. 97--104."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Manuel A Fokam and Ritesh Ajoodha. 2021. Influence of Contrastive Learning on Source Code Plagiarism Detection through Recursive Neural Networks. In IMITEC. 1--6.","DOI":"10.1109\/IMITEC52926.2021.9714688"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Arto Hellas Juho Leinonen and Petri Ihantola. 2017. Plagiarism in take-home exams: help-seeking collaboration and systematic cheating. In ITiCSE. 238--243.","DOI":"10.1145\/3059009.3059065"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Arto Hellas Juho Leinonen Sami Sarsa Charles Koutcheme Lilja Kujanp\"a\"a and Juha Sorva. 2023. Exploring the Responses of Large Language Models to Beginner Programmers' Help Requests. In ICER.","DOI":"10.1145\/3568813.3600139"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Kenneth Holstein Bruce M McLaren and Vincent Aleven. 2018. Student learning benefits of a mixed-reality teacher awareness tool in AI-enhanced classrooms. In AIED. 154--168.","DOI":"10.1007\/978-3-319-93843-1_12"},{"key":"e_1_3_2_1_21_1","volume-title":"SANN: A Subtree-based Attention Neural Network Model for Student Success Prediction Through Source Code Analysis. In 6th CSEDM Workshop.","author":"Hoq Muntasir","year":"2022","unstructured":"Muntasir Hoq, Peter Brusilovsky, and Bita Akram. 2022. SANN: A Subtree-based Attention Neural Network Model for Student Success Prediction Through Source Code Analysis. In 6th CSEDM Workshop."},{"key":"e_1_3_2_1_22_1","unstructured":"Muntasir Hoq Peter Brusilovsky and Bita Akram. 2023 a. Analysis of an Explainable Student Performance Prediction Model in an Introductory Programming Course. In EDM. 79--90."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Muntasir Hoq Sushanth Reddy Chilla Melika Ahmadi Ranjbar Peter Brusilovsky and Bita Akram. 2023 b. SANN: Programming Code Representation Using Attention Neural Network with Optimized Subtree Extraction. In CIKM. 783--792.","DOI":"10.1145\/3583780.3615047"},{"key":"e_1_3_2_1_24_1","volume-title":"AIED LLM Workshop.","author":"Hoq Muntasir","year":"2023","unstructured":"Muntasir Hoq, Yang Shi, Juho Leinonen, Damilola Babalola, Collin Lynch, and Bita Akram. 2023 c. Detecting ChatGPT-Generated Code in a CS1 Course. In AIED LLM Workshop."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Qiubo Huang Guozheng Fang and Keyuan Jiang. 2019. An Approach of Suspected Code Plagiarism Detection Based on XGBoost Incremental Learning. In CNCI.","DOI":"10.2991\/cnci-19.2019.40"},{"key":"e_1_3_2_1_26_1","volume-title":"Regina Berretta, Ayse Aysin Bilgin, Lakmali Jayarathna, and Judy Sheard.","author":"Jha Meena","year":"2022","unstructured":"Meena Jha, Sander JJ Leemans, Regina Berretta, Ayse Aysin Bilgin, Lakmali Jayarathna, and Judy Sheard. 2022. Online Assessment and COVID: Opportunities and Challenges. In ACEC. 27--35."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TE.2010.2046664"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.21913\/IJEI.v9i1.844"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3506717"},{"key":"e_1_3_2_1_30_1","volume-title":"Barbara J Ericson, David Weintrop, and Tovi Grossman.","author":"Kazemitabaar Majeed","year":"2023","unstructured":"Majeed Kazemitabaar, Justin Chow, Carl Ka To Ma, Barbara J Ericson, David Weintrop, and Tovi Grossman. 2023. Studying the effect of AI Code Generators on Supporting Novice Learners in Introductory Programming. In CHI. 1--23."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCSNT.2012.6526164"},{"key":"e_1_3_2_1_32_1","volume-title":"Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa.","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. In NeurIPS."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"SeolHwa Lee Andrew Matteson Danial Hooshyar SongHyun Kim JaeBum Jung GiChun Nam and HeuiSeok Lim. 2016. Comparing programming language comprehension between novice and expert programmers using eeg analysis. In BIBE. 350--355.","DOI":"10.1109\/BIBE.2016.30"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Juho Leinonen Paul Denny Stephen MacNeil Sami Sarsa Seth Bernstein Joanne Kim Andrew Tran and Arto Hellas. 2023 a. Comparing code explanations created by students and large language models. In ITiCSE.","DOI":"10.1145\/3587102.3588785"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Juho Leinonen Arto Hellas Sami Sarsa Brent Reeves Paul Denny James Prather and Brett A Becker. 2023 b. Using large language models to enhance programming error messages. In SIGCSE. 563--569.","DOI":"10.1145\/3545945.3569770"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Stephen MacNeil Andrew Tran Arto Hellas Joanne Kim Sami Sarsa Paul Denny Seth Bernstein and Juho Leinonen. 2023. Experiences from using code explanations generated by large language models in a web software development e-book. In SIGCSE. 931--937.","DOI":"10.1145\/3545945.3569785"},{"key":"e_1_3_2_1_37_1","unstructured":"Ye Mao Yang Shi Samiha Marwan Thomas W Price Tiffany Barnes and Min Chi. 2021. Knowing both when and where: Temporal-ASTNN for Early Prediction of Student Success in Novice Programming Tasks. In EDM."},{"key":"e_1_3_2_1_38_1","volume-title":"Joseph Jay Williams, and Thomas Price","author":"Marwan Samiha","year":"2019","unstructured":"Samiha Marwan, Joseph Jay Williams, and Thomas Price. 2019. An evaluation of the impact of automated programming hints on performance and learning. In ICER. 61--70."},{"key":"e_1_3_2_1_39_1","volume-title":"DetectGPT: Zero-Shot Machine-Generated Text Detection using Probability Curvature. arXiv preprint arXiv:2301.11305","author":"Mitchell Eric","year":"2023","unstructured":"Eric Mitchell, Yoonho Lee, Alexander Khazatsky, Christopher D Manning, and Chelsea Finn. 2023. DetectGPT: Zero-Shot Machine-Generated Text Detection using Probability Curvature. arXiv preprint arXiv:2301.11305 (2023)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/0273-2297(81)90019-8"},{"key":"e_1_3_2_1_41_1","volume-title":"How self-regulated learners cope with academic difficulty: The role of adaptive help seeking. Theory into practice","author":"Newman Richard S","year":"2002","unstructured":"Richard S Newman. 2002. How self-regulated learners cope with academic difficulty: The role of adaptive help seeking. Theory into practice, Vol. 41, 2 (2002), 132--138."},{"key":"e_1_3_2_1_42_1","volume-title":"Carlos Anibal Suarez, and Michael Liut","author":"Orenstrakh Michael Sheinman","year":"2023","unstructured":"Michael Sheinman Orenstrakh, Oscar Karnalim, Carlos Anibal Suarez, and Michael Liut. 2023. Detecting LLM-Generated Text in Computing Education: A Comparative Study for ChatGPT Cases. arXiv preprint arXiv:2307.07411 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"2023 a. The Robots Are Here: The Generative AI Revolution in Computing Education","author":"Prather James","unstructured":"James Prather, Paul Denny, Juho Leinonen, Brett A Becker, Ibrahim Albluwi, Michelle Craig, Hieke Keuning, Natalie Kiesler, Tobias Kohn, Andrew Luxton-Reilly, Stephen MacNeil, Andrew Petersen, Raymond Pettit, Brent N Reeves, and Jaromir Savelka. 2023 a. The Robots Are Here: The Generative AI Revolution in Computing Education. Working Group Reports on Innovation and Technology in Computer Science Education (2023)."},{"key":"e_1_3_2_1_44_1","volume-title":"2023 b. \"It's Weird That it Knows What I Want\": Usability and Interactions with Copilot for Novice Programmers. TOCHI","author":"Prather James","year":"2023","unstructured":"James Prather, Brent N Reeves, Paul Denny, Brett A Becker, Juho Leinonen, Andrew Luxton-Reilly, Garrett Powell, James Finnie-Ansley, and Eddie Antonio Santos. 2023 b. \"It's Weird That it Knows What I Want\": Usability and Interactions with Copilot for Novice Programmers. TOCHI (2023)."},{"key":"e_1_3_2_1_45_1","first-page":"1016","article-title":"Finding plagiarisms among a set of programs with JPlag","volume":"8","author":"Prechelt Lutz","year":"2002","unstructured":"Lutz Prechelt, Guido Malpohl, Michael Philippsen, et al. 2002. Finding plagiarisms among a set of programs with JPlag. J. Univ. Comput. Sci. , Vol. 8, 11 (2002), 1016.","journal-title":"J. Univ. Comput. Sci."},{"key":"e_1_3_2_1_46_1","volume-title":"NPR","volume":"18","author":"Rosalsky Greg","year":"2023","unstructured":"Greg Rosalsky and Emma Peaslee. 2023. This 22-year-old is trying to save us from ChatGPT before it changes writing forever. NPR , Vol. 18 (2023)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1037\/a0027696"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Sami Sarsa Paul Denny Arto Hellas and Juho Leinonen. 2022a. Automatic generation of programming exercises and code explanations using large language models. In ICER. 27--43.","DOI":"10.1145\/3501385.3543957"},{"key":"e_1_3_2_1_49_1","article-title":"Empirical Evaluation of Deep Learning Models for Knowledge Tracing: Of Hyperparameters and Metrics on Performance and Replicability","volume":"14","author":"Sarsa Sami","year":"2022","unstructured":"Sami Sarsa, Juho Leinonen, and Arto Hellas. 2022b. Empirical Evaluation of Deep Learning Models for Knowledge Tracing: Of Hyperparameters and Metrics on Performance and Replicability. J. of EDM, Vol. 14, 2 (2022).","journal-title":"J. of EDM"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Judy Sheard Martin Dick Selby Markham Ian Macdonald and Meaghan Walsh. 2002. Cheating and plagiarism: Perceptions and practices of first year IT students. In ITiCSE. 183--187.","DOI":"10.1145\/544414.544468"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1080\/0729436032000056526"},{"key":"e_1_3_2_1_52_1","unstructured":"Yang Shi. 2023. Interpretable Code-Informed Learning Analytics for CS Education. In LAK. 180--187."},{"key":"e_1_3_2_1_53_1","unstructured":"Yang Shi Min Chi Tiffany Barnes and Thomas Price. 2022. Code-DKT: A Code-based Knowledge Tracing Model for Programming Tasks. In EDM. 50--61."},{"key":"e_1_3_2_1_54_1","unstructured":"Yang Shi Ye Mao Tiffany Barnes Min Chi and Thomas W Price. 2021a. More with less: Exploring how to use deep learning effectively through semi-supervised learning for automatic bug detection in student code.. In EDM. 446--453."},{"key":"e_1_3_2_1_55_1","unstructured":"Yang Shi Robin Schmucker Min Chi Tiffany Barnes and Thomas Price. 2023. KC-Finder: Automated Knowledge Component Discovery for Programming Problems.. In EDM."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Yang Shi Krupal Shah Wengran Wang Samiha Marwan Poorvaja Penmetsa and Thomas Price. 2021b. Toward semi-automatic misconception discovery using code embeddings. In LAK. 606--612.","DOI":"10.1145\/3448139.3448205"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Wenhan Wang Ge Li Bo Ma Xin Xia and Zhi Jin. 2020. Detecting code clones with graph neural network and flow-augmented abstract syntax tree. In SANER.","DOI":"10.1109\/SANER48275.2020.9054857"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1006\/imms.1993.1084"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"Jian Zhang Xu Wang Hongyu Zhang Hailong Sun Kaixuan Wang and Xudong Liu. 2019. A novel neural source code representation based on abstract syntax tree. In ICSE. 783--794. io","DOI":"10.1109\/ICSE.2019.00086"}],"event":{"name":"SIGCSE 2024: The 55th ACM Technical Symposium on Computer Science Education","location":"Portland OR USA","acronym":"SIGCSE 2024","sponsor":["SIGCSE ACM Special Interest Group on Computer Science Education"]},"container-title":["Proceedings of the 55th ACM Technical Symposium on Computer Science Education V. 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626252.3630826","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626252.3630826","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:34:43Z","timestamp":1755750883000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626252.3630826"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,7]]},"references-count":59,"alternative-id":["10.1145\/3626252.3630826","10.1145\/3626252"],"URL":"https:\/\/doi.org\/10.1145\/3626252.3630826","relation":{},"subject":[],"published":{"date-parts":[[2024,3,7]]},"assertion":[{"value":"2024-03-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}