{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T17:11:56Z","timestamp":1777050716326,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006464","name":"Birla Institute of Technology and Science, Pilani","doi-asserted-by":"publisher","award":["N4\/24\/1004"],"award-info":[{"award-number":["N4\/24\/1004"]}],"id":[{"id":"10.13039\/501100006464","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3702652.3744220","type":"proceedings-article","created":{"date-parts":[[2025,7,31]],"date-time":"2025-07-31T09:39:27Z","timestamp":1753954767000},"page":"181-195","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Rubric Is All You Need: Improving LLM-Based Code Evaluation With Question-Specific Rubrics"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4712-1790","authenticated-orcid":false,"given":"Aditya","family":"Pathak","sequence":"first","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1594-6052","authenticated-orcid":false,"given":"Rachit","family":"Gandhi","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6660-6374","authenticated-orcid":false,"given":"Vaibhav","family":"Uttam","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5577-9138","authenticated-orcid":false,"given":"Arnav","family":"Ramamoorthy","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2313-2949","authenticated-orcid":false,"given":"Pratyush","family":"Ghosh","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7509-9389","authenticated-orcid":false,"given":"Aaryan Raj","family":"Jindal","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7239-2986","authenticated-orcid":false,"given":"Shreyash","family":"Verma","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6497-7142","authenticated-orcid":false,"given":"Aditya","family":"Mittal","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1027-3873","authenticated-orcid":false,"given":"Aashna","family":"Ased","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8346-4966","authenticated-orcid":false,"given":"Chirag","family":"Khatri","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1509-8644","authenticated-orcid":false,"given":"Yashwanth","family":"Nakka","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6882-2379","authenticated-orcid":false,"family":"Devansh","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9794-0087","authenticated-orcid":false,"given":"Jagat Sesh","family":"Challa","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4831-1847","authenticated-orcid":false,"given":"Dhruv","family":"Kumar","sequence":"additional","affiliation":[{"name":"BITS Pilani, Pilani, India"}]}],"member":"320","published-online":{"date-parts":[[2025,8,2]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. Claude 3.7 Sonnet and Claude Code. https:\/\/www.anthropic.com\/news\/claude-3-7-sonnet"},{"key":"e_1_3_3_2_3_2","volume-title":"Pingouin","year":"2024","unstructured":"2024. Pingouin. https:\/\/github.com\/raphaelvallat\/pingouin\/releases\/tag\/0.5.5 Release 0.5.5 from September 2024."},{"key":"e_1_3_3_2_4_2","volume-title":"scikit-learn","year":"2024","unstructured":"2024. scikit-learn. https:\/\/github.com\/scikit-learn\/scikit-learn\/releases\/tag\/1.6.1 Release 1.6.1."},{"key":"e_1_3_3_2_5_2","volume-title":"SciPy","year":"2025","unstructured":"2025. SciPy. https:\/\/github.com\/scipy\/scipy\/releases\/tag\/v1.15.2 Release 1.15.2 from Feb 2025."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","unstructured":"Umar Alkafaween Ibrahim Albluwi and Paul Denny. 2024. Automating Autograding: Large Language Models as Test Suite Generators for Introductory Programming. Journal of Computer Assisted Learning 41 1 (Dec. 2024). 10.1111\/jcal.13100","DOI":"10.1111\/jcal.13100"},{"key":"e_1_3_3_2_7_2","unstructured":"Anishka Atharva Mehta Nipun Gupta Aarav Balachandran Dhruv Kumar and Pankaj Jalote. 2024. Can ChatGPT Play the Role of a Teaching Assistant in an Introductory Programming Course? arxiv:https:\/\/arXiv.org\/abs\/2312.07343\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2312.07343"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649217.3653594"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Faieza Chowdhury. 2018. Application of Rubrics in the Classroom: A Vital Tool for Improvement in Assessment Feedback and Learning. International Education Studies (2018). https:\/\/api.semanticscholar.org\/CorpusID:59295385","DOI":"10.5539\/ies.v12n1p61"},{"key":"e_1_3_3_2_10_2","unstructured":"Jacob Devlin Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arxiv:https:\/\/arXiv.org\/abs\/1810.04805\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1810.04805"},{"key":"e_1_3_3_2_11_2","unstructured":"Zhiyuan Fan Weinong Wang Xing Wu and Debing Zhang. 2025. SedarEval: Automated Evaluation using Self-Adaptive Rubrics. arxiv:https:\/\/arXiv.org\/abs\/2501.15595\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2501.15595"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/FIE43999.2019.9028686"},{"key":"e_1_3_3_2_13_2","unstructured":"Arto Hellas Juho Leinonen and Leo Lepp\u00e4nen. 2024. Experiences from Integrating Large Language Model Chatbots into the Classroom. arxiv:https:\/\/arXiv.org\/abs\/2406.04817\u00a0[cs.CY] https:\/\/arxiv.org\/abs\/2406.04817"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3568813.3600139"},{"key":"e_1_3_3_2_15_2","unstructured":"Yann Hicke Anmol Agarwal Qianou Ma and Paul Denny. 2023. AI-TA: Towards an Intelligent Question-Answer Teaching Assistant using Open-Source LLMs. arxiv:https:\/\/arXiv.org\/abs\/2311.02775\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2311.02775"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/1930464.1930480"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/educon60312.2024.10578838"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3626252.3630803"},{"key":"e_1_3_3_2_19_2","unstructured":"Ishika Joshi Ritvik Budhiraja Pranav\u00a0Deepak Tanna Lovenya Jain Mihika Deshpande Arjun Srivastava Srinivas Rallapalli Harshal\u00a0D Akolekar Jagat\u00a0Sesh Challa and Dhruv Kumar. 2023. \"With Great Power Comes Great Responsibility!\": Student and Instructor Perspectives on the influence of LLMs on Undergraduate Engineering Education. arxiv:https:\/\/arXiv.org\/abs\/2309.10694\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2309.10694"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/2899415.2899422"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","unstructured":"Hieke Keuning Johan Jeuring and Bastiaan Heeren. 2018. A Systematic Literature Review of Automated Feedback Generation for Programming Exercises. ACM Trans. Comput. Educ. 19 1 Article 3 (Sept. 2018) 43\u00a0pages. 10.1145\/3231711","DOI":"10.1145\/3231711"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Natalie Kiesler Dominic Lohr and Hieke Keuning. 2023. Exploring the Potential of Large Language Models to Generate Formative Programming Feedback. arxiv:https:\/\/arXiv.org\/abs\/2309.00029\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2309.00029","DOI":"10.1109\/FIE58773.2023.10343457"},{"key":"e_1_3_3_2_23_2","unstructured":"Nachiket Kotalwar Alkis Gotovos and Adish Singla. 2025. Hints-In-Browser: Benchmarking Language Models for Programming Feedback Generation. arxiv:https:\/\/arXiv.org\/abs\/2406.05053\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2406.05053"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3641554.3701791"},{"key":"e_1_3_3_2_25_2","unstructured":"Mark Liffiton Brad Sheese Jaromir Savelka and Paul Denny. 2023. CodeHelp: Using Large Language Models with Guardrails for Scalable Support in Programming Classes. arxiv:https:\/\/arXiv.org\/abs\/2308.06921\u00a0[cs.CY] https:\/\/arxiv.org\/abs\/2308.06921"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEET.2019.00022"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Richard Lobb and Jenny Harlow. 2016. Coderunner: a tool for assessing computer programming skills. ACM Inroads 7 1 (Feb. 2016) 47\u201351. 10.1145\/2810041","DOI":"10.1145\/2810041"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","unstructured":"B. Mandernach Swinton Hudson and Shanna Wise. 2013. Where Has The Time Gone? Faculty Activities and Time Commitments in the Online Classroom. Journal of Educators Online 10 (07 2013). 10.9743\/JEO.2013.2.2","DOI":"10.9743\/JEO.2013.2.2"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","unstructured":"Marcus Messer Neil C.\u00a0C. Brown Michael K\u00f6lling and Miaojing Shi. 2024. Automated Grading and Feedback Tools for Programming Education: A Systematic Review. ACM Transactions on Computing Education 24 1 (Feb. 2024) 1\u201343. 10.1145\/3636515","DOI":"10.1145\/3636515"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"Dan-Anders Normann Lise\u00a0Vikan Sandvik and Henning Fj\u00f8rtoft. 2023. Reduced grading in assessment: A scoping review. Teaching and Teacher Education 135 (2023) 104336. 10.1016\/j.tate.2023.104336","DOI":"10.1016\/j.tate.2023.104336"},{"key":"e_1_3_3_2_31_2","unstructured":"OpenAI. 2023. GPT-4 Technical Report. https:\/\/arxiv.org\/abs\/2303.08774v2"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICALT.2019.00089"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"crossref","unstructured":"Maciej Pankiewicz and Ryan\u00a0S. Baker. 2023. Large Language Models (GPT) for automating feedback on programming assignments. arxiv:https:\/\/arXiv.org\/abs\/2307.00150\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2307.00150","DOI":"10.58459\/icce.2023.950"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Tung Phung Victor-Alexandru P\u0103durean Jos\u00e9 Cambronero Sumit Gulwani Tobias Kohn Rupak Majumdar Adish Singla and Gustavo Soares. 2023. Generative AI for Programming Education: Benchmarking ChatGPT GPT-4 and Human Tutors. arxiv:https:\/\/arXiv.org\/abs\/2306.17156\u00a0[cs.CY] https:\/\/arxiv.org\/abs\/2306.17156","DOI":"10.1145\/3568812.3603476"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.5555\/2541917.2541921"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3105726.3106169"},{"key":"e_1_3_3_2_38_2","unstructured":"Nishat Raihan Mohammed\u00a0Latif Siddiq Joanna C.\u00a0S. Santos and Marcos Zampieri. 2024. Large Language Models in Computer Science Education: A Systematic Literature Review. arxiv:https:\/\/arXiv.org\/abs\/2410.16349\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2410.16349"},{"key":"e_1_3_3_2_39_2","unstructured":"Shuo Ren Daya Guo Shuai Lu Long Zhou Shujie Liu Duyu Tang Neel Sundaresan Ming Zhou Ambrosio Blanco and Shuai Ma. 2020. CodeBLEU: a Method for Automatic Evaluation of Code Synthesis. arxiv:https:\/\/arXiv.org\/abs\/2009.10297\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2009.10297"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3501385.3543957"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","unstructured":"Hyein Seo Taewook Hwang Jeesu Jung Hyeonseok Kang Hyuk Namgoong Yohan Lee and Sangkeun Jung. 2025. Large Language Models as Evaluators in Education: Verification of Feedback Consistency and Accuracy. Applied Sciences 15 2 (2025). 10.3390\/app15020671","DOI":"10.3390\/app15020671"},{"key":"e_1_3_3_2_42_2","unstructured":"Weixi Tong and Tianyi Zhang. 2024. CodeJudge: Evaluating Code Generation with Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2410.02184\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2410.02184"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/bigdata62323.2024.10825949"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3519665"},{"key":"e_1_3_3_2_45_2","unstructured":"Wenjing Xie Juxin Niu Chun\u00a0Jason Xue and Nan Guan. 2024. Grade Like a Human: Rethinking Automated Assessment with Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2405.19694\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2405.19694"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","unstructured":"Mina Yousef Kareem Mohamed Walaa Medhat Ensaf\u00a0Hussein Mohamed Ghada Khoriba and Tamer Arafa. 2024. BeGrading: large language models for enhanced feedback in programming education. Neural Computing and Applications 37 2 (Oct. 2024) 1027\u20131040. 10.1007\/s00521-024-10449-y","DOI":"10.1007\/s00521-024-10449-y"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"crossref","unstructured":"Shuyan Zhou Uri Alon Sumit Agarwal and Graham Neubig. 2023. CodeBERTScore: Evaluating Code Generation with Pretrained Models of Code. arxiv:https:\/\/arXiv.org\/abs\/2302.05527\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2302.05527","DOI":"10.18653\/v1\/2023.emnlp-main.859"},{"key":"e_1_3_3_2_48_2","unstructured":"Terry Yue Zhuo. 2024. ICE-Score: Instructing Large Language Models to Evaluate Code. arxiv:https:\/\/arXiv.org\/abs\/2304.14317\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2304.14317"}],"event":{"name":"ICER 2025: ACM Conference on International Computing Education Research","location":"Charlottesville USA","acronym":"ICER '25","sponsor":["SIGCSE ACM Special Interest Group on Computer Science Education"]},"container-title":["Proceedings of the 2025 ACM Conference on International Computing Education Research V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3702652.3744220","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,31]],"date-time":"2025-07-31T11:36:17Z","timestamp":1753961777000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3702652.3744220"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,2]]},"references-count":47,"alternative-id":["10.1145\/3702652.3744220","10.1145\/3702652"],"URL":"https:\/\/doi.org\/10.1145\/3702652.3744220","relation":{},"subject":[],"published":{"date-parts":[[2025,8,2]]},"assertion":[{"value":"2025-08-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}