{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T17:22:28Z","timestamp":1778692948377,"version":"3.51.4"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031643019","type":"print"},{"value":"9783031643026","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-64302-6_20","type":"book-chapter","created":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T06:04:39Z","timestamp":1719813879000},"page":"280-294","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Improving the\u00a0Validity of\u00a0Automatically Generated Feedback via\u00a0Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Alexander","family":"Scarlatos","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Digory","family":"Smith","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simon","family":"Woodhead","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrew","family":"Lan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,7,2]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Al-Hossami, E., Bunescu, R., Teehan, R., Powell, L., Mahajan, K., Dorodchi, M.: Socratic questioning of novice debuggers: a benchmark dataset and preliminary evaluations. In: Proceedings of the Workshop on Innovative Use of NLP for Building Educational Applications (BEA2023@ACL), pp. 709\u2013726 (2023)","DOI":"10.18653\/v1\/2023.bea-1.57"},{"key":"20_CR2","doi-asserted-by":"publisher","first-page":"143","DOI":"10.2304\/forum.2013.55.1.143","volume":"55","author":"J Boaler","year":"2013","unstructured":"Boaler, J.: Ability and mathematics: the mindset revolution that is reshaping education. Forum 55, 143\u2013152 (2013)","journal-title":"Forum"},{"issue":"3","key":"20_CR3","doi-asserted-by":"publisher","first-page":"823","DOI":"10.1111\/jcal.12793","volume":"39","author":"A Botelho","year":"2023","unstructured":"Botelho, A., Baral, S., Erickson, J.A., Benachamardi, P., Heffernan, N.T.: Leveraging natural language processing to support automated assessment and feedback for student open responses in mathematics. J. Comput. Assist. Learn. 39(3), 823\u2013840 (2023)","journal-title":"J. Comput. Assist. Learn."},{"key":"20_CR4","unstructured":"Chen, M., et al.: Evaluating large language models trained on code (2021)"},{"key":"20_CR5","unstructured":"Chen, W., Ma, X., Wang, X., Cohen, W.W.: Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. arXiv preprint arXiv:2211.12588 (2022)"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Chiang, C.H., Lee, H.V.: Can large language models be an alternative to human evaluations? arXiv preprint arXiv:2305.01937 (2023)","DOI":"10.18653\/v1\/2023.acl-long.870"},{"key":"20_CR7","unstructured":"Dettmers, T., Lewis, M., Belkada, Y., Zettlemoyer, L.: Llm.int8(): 8-bit matrix multiplication for transformers at scale (2022)"},{"key":"20_CR8","unstructured":"Hu, E.J., et al.: Lora: Low-rank adaptation of large language models (2021)"},{"key":"20_CR9","unstructured":"Jia, Q., Cui, J., Xiao, Y., Liu, C., Rashid, P., Gehringer, E.F.: All-in-one: multi-task learning BERT models for evaluating peer assessments. arXiv preprint arXiv:2110.03895 (2021)"},{"key":"20_CR10","unstructured":"Jia, Q., et al.: Insta-reviewer: a data-driven approach for generating instant feedback on students\u2019 project reports. International Educational Data Mining Society (2022)"},{"key":"20_CR11","unstructured":"Kakarla, S., Thomas, D., Lin, J., Gupta, S., Koedinger, K.R.: Using large language models to assess tutors\u2019 performance in reacting to students making math errors. arXiv preprint arXiv:2401.03238 (2024)"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Kochmar, E., Vu, D.D., Belfer, R., Gupta, V., Serban, I.V., Pineau, J.: Automated personalized feedback improves learning gains in an intelligent tutoring system. In: International Conference on Artificial Intelligence in Education. pp. 140\u2013146 (2020)","DOI":"10.1007\/978-3-030-52240-7_26"},{"key":"20_CR13","unstructured":"Kocmi, T., Federmann, C.: Large language models are state-of-the-art evaluators of translation quality. arXiv preprint arXiv:2302.14520 (2023)"},{"key":"20_CR14","first-page":"22199","volume":"35","author":"T Kojima","year":"2022","unstructured":"Kojima, T., Gu, S.S., Reid, M., Matsuo, Y., Iwasawa, Y.: Large language models are zero-shot reasoners. Adv. Neural. Inf. Process. Syst. 35, 22199\u201322213 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Lan, A.S., Vats, D., Waters, A.E., Baraniuk, R.G.: Mathematical language processing: automatic grading and feedback for open response mathematical questions. In: Proceedings of the ACM Conference on learning@scale, pp. 167\u2013176 (2015)","DOI":"10.1145\/2724660.2724664"},{"key":"20_CR16","unstructured":"Lee, H., et al.: Rlaif: Scaling reinforcement learning from human feedback with AI feedback. arXiv preprint arXiv:2309.00267 (2023)"},{"key":"20_CR17","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, Barcelona, Spain, pp. 74\u201381. Association for Computational Linguistics (2004)"},{"key":"20_CR18","unstructured":"Liu, N., Sonkar, S., Wang, Z., Woodhead, S., Baraniuk, R.G.: Novice learner and expert tutor: evaluating math reasoning abilities of large language models with misconceptions. arXiv preprint arXiv:2310.02439 (2023)"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Liu, N., Wang, Z., Baraniuk, R., Lan, A.: Open-ended knowledge tracing for computer science education. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 3849\u20133862 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.254"},{"key":"20_CR20","unstructured":"McNichols, H., et al.: Automated distractor and feedback generation for math multiple-choice questions via in-context learning. In: NeurIPS\u201923 Workshop on Generative AI for Education (2023)"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"McNichols, H., Zhang, M., Lan, A.: Algebra error classification with large language models. In: International Conference on Artificial Intelligence in Education, pp. 365\u2013376 (2023)","DOI":"10.1007\/978-3-031-36272-9_30"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Naismith, B., Mulcaire, P., Burstein, J.: Automated evaluation of written discourse coherence using GPT-4. In: Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023), Toronto, Canada, pp. 394\u2013403. Association for Computational Linguistics (2023)","DOI":"10.18653\/v1\/2023.bea-1.32"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Nguyen, H.A., Stec, H., Hou, X., Di, S., McLaren, B.M.: Evaluating chatgpt\u2019s decimal skills and feedback generation in a digital learning game. In: Responsive and Sustainable Educational Futures, pp. 278\u2013293 (2023)","DOI":"10.1007\/978-3-031-42682-7_19"},{"key":"20_CR24","unstructured":"Rafailov, R., Sharma, A., Mitchell, E., Ermon, S., Manning, C.D., Finn, C.: Direct preference optimization: your language model is secretly a reward model (2023)"},{"key":"20_CR25","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/978-3-030-52240-7_48","volume-title":"Artificial Intelligence in Education","author":"R Razzaq","year":"2020","unstructured":"Razzaq, R., Ostrow, K.S., Heffernan, N.T.: Effect of immediate feedback on math achievement at the high school level. In: Bittencourt, I.I., Cukurova, M., Muldner, K., Luckin, R., Mill\u00e1n, E. (eds.) AIED 2020. LNCS (LNAI), vol. 12164, pp. 263\u2013267. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-52240-7_48"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using Siamese BERT-networks. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"20_CR27","unstructured":"Robinson, J.D., Chuang, C.Y., Sra, S., Jegelka, S.: Contrastive learning with hard negative samples. In: International Conference on Learning Representations (2021)"},{"key":"20_CR28","unstructured":"Shinn, N., Cassano, F., Labash, B., Gopinath, A., Narasimhan, K., Yao, S.: Reflexion: language agents with verbal reinforcement learning. arXiv preprint arXiv:2303.1136614 (2023)"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Singh, R., Gulwani, S., Solar-Lezama, A.: Automated feedback generation for introductory programming assignments. In: Proceedings of the 34th ACM SIGPLAN Conference on Programming Language Design and Implementation, pp. 15\u201326 (2013)","DOI":"10.1145\/2491956.2462195"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Song, D., Lee, W., Oh, H.: Context-aware and data-driven feedback generation for programming assignments. In: Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering, pp. 328\u2013340 (2021)","DOI":"10.1145\/3468264.3468598"},{"key":"20_CR31","doi-asserted-by":"crossref","unstructured":"Sonkar, S., Le, M., Chen, X., Liu, N., Mallick, D.B., Baraniuk, R.G.: Code soliloquies for accurate calculations in large language models. arXiv preprint arXiv:2309.12161 (2023)","DOI":"10.1145\/3636555.3636889"},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Steiss, J., et\u00a0al.: Comparing the quality of human and ChatGPT feedback on students\u2019 writing (2023)","DOI":"10.35542\/osf.io\/ty3em"},{"issue":"3","key":"20_CR33","doi-asserted-by":"publisher","first-page":"330","DOI":"10.5951\/jresematheduc.49.3.0330","volume":"49","author":"KL Sun","year":"2018","unstructured":"Sun, K.L.: Brief report: the role of mathematics teaching in fostering student growth mindset. J. Res. Math. Educ. 49(3), 330\u2013335 (2018)","journal-title":"J. Res. Math. Educ."},{"key":"20_CR34","unstructured":"Touvron, H., et al.: Llama 2: open foundation and fine-tuned chat models (2023)"},{"key":"20_CR35","unstructured":"Wolf, T., et\u00a0al.: Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019)"},{"key":"20_CR36","unstructured":"Zhang, M., Baral, S., Heffernan, N., Lan, A.: Automatic short math answer grading via in-context meta-learning. International Educational Data Mining Society (2022)"},{"key":"20_CR37","unstructured":"Zhang, M., Wang, Z., Baraniuk, R., Lan, A.: Math operation embeddings for open-ended solution analysis and feedback. International Educational Data Mining Society (2021)"},{"key":"20_CR38","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: BERTScore: evaluating text generation with BERT. In: International Conference on Learning Representations (2020)"},{"key":"20_CR39","unstructured":"Ziegler, D.M., et al.: Fine-tuning language models from human preferences. arXiv preprint arXiv:1909.08593 (2019)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-64302-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T23:15:21Z","timestamp":1757459721000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-64302-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031643019","9783031643026"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-64302-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"2 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Recife","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2024.cesar.school\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}