{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T17:06:39Z","timestamp":1777914399102,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,3]],"date-time":"2025-03-03T00:00:00Z","timestamp":1740960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,3]]},"DOI":"10.1145\/3706468.3706530","type":"proceedings-article","created":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:04:11Z","timestamp":1740146651000},"page":"494-504","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Does Multiple Choice Have a Future in the Age of Generative AI? A Posttest-only RCT"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8196-3252","authenticated-orcid":false,"given":"Danielle R","family":"Thomas","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3437-8979","authenticated-orcid":false,"given":"Conrad","family":"Borchers","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6508-8647","authenticated-orcid":false,"given":"Sanjit","family":"Kakarla","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3320-3907","authenticated-orcid":false,"given":"Jionghao","family":"Lin","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3695-2334","authenticated-orcid":false,"given":"Shambhavi","family":"Bhushan","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8303-4649","authenticated-orcid":false,"given":"Boyuan","family":"Guo","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6880-5740","authenticated-orcid":false,"given":"Erin","family":"Gatz","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5850-4768","authenticated-orcid":false,"given":"Kenneth R","family":"Koedinger","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,3]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Vincent\u00a0A Aleven and Kenneth\u00a0R Koedinger. 2002. An effective metacognitive strategy: Learning by doing and explaining with a computer-based cognitive tutor. Cognitive science 26 2 (2002) 147\u2013179.","DOI":"10.1207\/s15516709cog2602_1"},{"key":"e_1_3_3_1_3_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Andrew\u00a0C Butler. 2018. Multiple-choice testing in education: Are the best practices for assessment also good for learning? Journal of Applied Research in Memory and Cognition 7 3 (2018) 323\u2013331.","DOI":"10.1016\/j.jarmac.2018.07.002"},{"key":"e_1_3_3_1_5_2","first-page":"403","volume-title":"Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024)","author":"Carpenter Dan","year":"2024","unstructured":"Dan Carpenter, Wookhee Min, Seung Lee, Gamze Ozogul, Xiaoying Zheng, and James Lester. 2024. Assessing Student Explanations with Large Language Models Using Fine-Tuning and Few-Shot Learning. In Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024), Ekaterina Kochmar, Marie Bexte, Jill Burstein, Andrea Horbach, Ronja Laarmann-Quante, Ana\u00efs Tack, Victoria Yaneva, and Zheng Yuan (Eds.). Association for Computational Linguistics, Mexico City, Mexico, 403\u2013413."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Loredana Caruccio Stefano Cirillo Giuseppe Polese Giandomenico Solimando Shanmugam Sundaramurthy and Genoveffa Tortora. 2024. Claude 2.0 Large Language Model: tackling a real-world classification problem with a new Iterative Prompt Engineering approach. Intelligent Systems with Applications 21 (2024).","DOI":"10.1016\/j.iswa.2024.200336"},{"key":"e_1_3_3_1_7_2","first-page":"1812","volume-title":"Society for Information Technology & Teacher Education International Conference","author":"Chhabra Pallavi","year":"2022","unstructured":"Pallavi Chhabra, Danielle Chine, Adetunji Adeniran, Shivang Gupta, and Kenneth Koedinger. 2022. An evaluation of perceptions regarding mentor competencies for technology-based personalized learning. In Society for Information Technology & Teacher Education International Conference. Association for the Advancement of Computing in Education (AACE), San Diego, CA, USA, 1812\u20131817."},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Michelene\u00a0TH Chi and Ruth Wylie. 2014. The ICAP framework: Linking cognitive engagement to active learning outcomes. Educational psychologist 49 4 (2014) 219\u2013243.","DOI":"10.1080\/00461520.2014.965823"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491140.3528262"},{"key":"e_1_3_3_1_10_2","first-page":"581","volume-title":"Proceedings of The Learning Ideas Conference 2022","author":"Chine Danielle\u00a0R.","year":"2022","unstructured":"Danielle\u00a0R. Chine, Pallavi Chhabra, Adetunji Adeniran, Joseph Kopko, Cindy Tipper, Shivang Gupta, and Kenneth\u00a0R. Koedinger. 2022. Scenario-based training and on-the-job support for equitable mentoring. In Proceedings of The Learning Ideas Conference 2022. Springer, Cham, Switzerland, 581\u2013592."},{"key":"e_1_3_3_1_11_2","unstructured":"Aubrey Condor Max Litster and Zachary Pardos. 2021. Automatic Short Answer Grading with SBERT on Out-of-Sample Questions. International Educational Data Mining Society (2021)."},{"key":"e_1_3_3_1_12_2","volume-title":"Experimental and quasi-experimental designs for generalized causal inference","author":"Cook Thomas\u00a0D","year":"2007","unstructured":"Thomas\u00a0D Cook and Donald\u00a0T Campbell. 2007. Experimental and quasi-experimental designs for generalized causal inference. Figures."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Alison Cook-Sather. 2020. Student voice across contexts: Fostering student agency in today\u2019s schools. Theory into practice 59 2 (2020) 182\u2013191.","DOI":"10.1080\/00405841.2019.1705091"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-64299-9_18"},{"key":"e_1_3_3_1_15_2","unstructured":"Isabel\u00a0O Gallegos Ryan\u00a0A Rossi Joe Barrow Md\u00a0Mehrab Tanjim Sungchul Kim Franck Dernoncourt Tong Yu Ruiyi Zhang and Nesreen\u00a0K Ahmed. 2024. Bias and fairness in large language models: A survey. Computational Linguistics (2024) 1\u201379."},{"key":"e_1_3_3_1_16_2","unstructured":"Graham Gibbs. 1988. Learning by doing: A guide to teaching and learning methods. Further Education Unit (1988)."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3636555.3636908"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Jonathan Guryan Jens Ludwig Monica\u00a0P Bhatt Philip\u00a0J Cook Jonathan Davis Kenneth Dodge George Farkas Roland\u00a0G Fryer\u00a0Jr Susan Mayer Harold Pollack et\u00a0al. 2021. Not Too Late: Improving Academic Outcomes among Adolescents (Working Paper 28531). National Bureau of Economic Research (2021).","DOI":"10.3386\/w28531"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.4324\/9780203825945"},{"key":"e_1_3_3_1_20_2","volume-title":"AI for Education: Bridging Innovation and Responsibility at the 38th AAAI Annual Conference on AI","author":"Han Zifei\u00a0FeiFei","year":"2024","unstructured":"Zifei\u00a0FeiFei Han, Jionghao Lin, Ashish Gurung, Danielle Thomas, Eason Chen, Conrad Borchers, Shivang Gupta, and Ken Koedinger. 2024. Improving Assessment of Tutoring Practices using Retrieval-Augmented Generation. In AI for Education: Bridging Innovation and Responsibility at the 38th AAAI Annual Conference on AI."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3657604.3664693"},{"key":"e_1_3_3_1_22_2","first-page":"37","volume-title":"Human-AI Math Tutoring@ AIED","author":"Hirunyasiri Dollaya","year":"2023","unstructured":"Dollaya Hirunyasiri, Danielle\u00a0R Thomas, Jionghao Lin, Kenneth\u00a0R Koedinger, and Vincent Aleven. 2023. Comparative Analysis of GPT-4 and Human Graders in Evaluating Human Tutors Giving Praise to Students.. In Human-AI Math Tutoring@ AIED. 37\u201348."},{"key":"e_1_3_3_1_23_2","unstructured":"Sanjit Kakarla Danielle Thomas Jionghao Lin Shivang Gupta and Kenneth\u00a0R Koedinger. 2024. Using large language models to assess tutors\u2019 performance in reacting to students making math errors. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.03238 (2024)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/2724660.2724681"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"MA Kraft and G Falken. 2021. A blueprint for scaling tutoring across public schools (EdWorkingPaper No. 21\u2013335). Annenberg Institute at Brown University.","DOI":"10.1177\/23328584211042858"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Gyeong-Geon Lee Ehsan Latif Xuansheng Wu Ninghao Liu and Xiaoming Zhai. 2024. Applying large language models and chain-of-thought for automatic scoring. Computers and Education: Artificial Intelligence 6 (2024) 100213.","DOI":"10.1016\/j.caeai.2024.100213"},{"key":"e_1_3_3_1_27_2","volume-title":"emmeans: Estimated Marginal Means, aka Least-Squares Means","author":"Lenth Russell\u00a0V.","year":"2024","unstructured":"Russell\u00a0V. Lenth. 2024. emmeans: Estimated Marginal Means, aka Least-Squares Means. https:\/\/CRAN.R-project.org\/package=emmeans R package version 1.10.2."},{"key":"e_1_3_3_1_28_2","first-page":"236","volume-title":"Proceedings of the 17th International Conference on Educational Data Mining","author":"Lin Jionghao","year":"2024","unstructured":"Jionghao Lin, Eason Chen, Zifei Han, Ashish Gurung, Danielle\u00a0R. Thomas, Wei Tan, Ngoc\u00a0Dang Nguyen, and Kenneth\u00a0R. Koedinger. 2024. How Can I Improve? Using GPT to Highlight the Desired and Undesired Parts of Open-ended Responses. In Proceedings of the 17th International Conference on Educational Data Mining. Atlanta, Georgia, USA, 236\u2013250."},{"key":"e_1_3_3_1_29_2","unstructured":"Jionghao Lin Zifei Han Danielle\u00a0R Thomas Ashish Gurung Shivang Gupta Vincent Aleven and Kenneth\u00a0R Koedinger. 2024. How Can I Get It Right? Using GPT to Rephrase Incorrect Trainee Responses. International Journal of Artificial Intelligence in Education (2024) 1\u201327."},{"key":"e_1_3_3_1_30_2","first-page":"12","volume-title":"Human-AI Math Tutoring@ AIED","author":"Lin Jionghao","year":"2023","unstructured":"Jionghao Lin, Danielle\u00a0R Thomas, Feifei Han, Shivang Gupta, Wei Tan, Ngoc\u00a0Dang Nguyen, and Kenneth\u00a0R Koedinger. 2023. Using Large Language Models to Provide Explanatory Feedback to Human Tutors. In Human-AI Math Tutoring@ AIED. 12\u201332."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Andre Nickow Philip Oreopoulos and Vincent Quan. 2020. The impressive effects of tutoring on prek-12 learning: A systematic review and meta-analysis of the experimental evidence. (2020).","DOI":"10.3386\/w27476"},{"key":"e_1_3_3_1_32_2","unstructured":"OpenAI Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2024. GPT-4 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2303.08774\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"crossref","unstructured":"John\u00a0F Pane Beth\u00a0Ann Griffin Daniel\u00a0F McCaffrey and Rita Karam. 2014. Effectiveness of cognitive tutor algebra I at scale. Educational Evaluation and Policy Analysis 36 2 (2014) 127\u2013144.","DOI":"10.3102\/0162373713507480"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-36272-9_5"},{"key":"e_1_3_3_1_35_2","unstructured":"Carly\u00a0D Robinson and Susanna Loeb. 2021. High-impact tutoring: State of the research and priorities for future learning. National Student Support Accelerator 21 284 (2021) 1\u201353."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3576050.3576089"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3657604.3664700"},{"key":"e_1_3_3_1_38_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_1_39_2","volume-title":"Research methods knowledge base","author":"Trochim William\u00a0MK","year":"2001","unstructured":"William\u00a0MK Trochim and James\u00a0P Donnelly. 2001. Research methods knowledge base. Vol.\u00a02. Atomic dog publishing Cincinnati, OH."},{"key":"e_1_3_3_1_40_2","volume-title":"The Eleventh International Conference on Learning Representations","author":"Wang Xuezhi","year":"2022","unstructured":"Xuezhi Wang, Jason Wei, Dale Schuurmans, Quoc\u00a0V Le, Ed\u00a0H Chi, Sharan Narang, Aakanksha Chowdhery, and Denny Zhou. 2022. Self-Consistency Improves Chain of Thought Reasoning in Language Models. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_3_1_41_2","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Information Processing Systems 35 (2022) 24824\u201324837."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3657604.3664681"},{"key":"e_1_3_3_1_43_2","unstructured":"Yue Zhang Yafu Li Leyang Cui Deng Cai Lemao Liu Tingchen Fu Xinting Huang Enbo Zhao Yu Zhang Yulong Chen et\u00a0al. 2023. Siren\u2019s song in the AI ocean: a survey on hallucination in large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.01219 (2023)."}],"event":{"name":"LAK '25: The 15th International Learning Analytics and Knowledge Conference","location":"Dublin Ireland","acronym":"LAK 2025"},"container-title":["Proceedings of the 15th International Learning Analytics and Knowledge Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706468.3706530","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706468.3706530","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:56:51Z","timestamp":1750298211000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706468.3706530"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,3]]},"references-count":42,"alternative-id":["10.1145\/3706468.3706530","10.1145\/3706468"],"URL":"https:\/\/doi.org\/10.1145\/3706468.3706530","relation":{},"subject":[],"published":{"date-parts":[[2025,3,3]]},"assertion":[{"value":"2025-03-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}