{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T16:24:33Z","timestamp":1783095873520,"version":"3.54.6"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031984198","type":"print"},{"value":"9783031984204","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-98420-4_17","type":"book-chapter","created":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T20:14:09Z","timestamp":1752783249000},"page":"231-245","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Benchmarking Generative AI for\u00a0Scoring Medical Student Interviews in\u00a0Objective Structured Clinical Examinations (OSCEs)"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-2851-2457","authenticated-orcid":false,"given":"Jadon","family":"Geathers","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7234-7001","authenticated-orcid":false,"given":"Yann","family":"Hicke","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6323-5673","authenticated-orcid":false,"given":"Colleen","family":"Chan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1205-6979","authenticated-orcid":false,"given":"Niroop","family":"Rajashekar","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6203-3986","authenticated-orcid":false,"given":"Sarah","family":"Young","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4049-2874","authenticated-orcid":false,"given":"Justin","family":"Sewell","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3946-2531","authenticated-orcid":false,"given":"Susannah","family":"Cornes","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6283-5546","authenticated-orcid":false,"given":"Rene F.","family":"Kizilcec","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8226-1842","authenticated-orcid":false,"given":"Dennis","family":"Shung","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,7,15]]},"reference":[{"issue":"1","key":"17_CR1","doi-asserted-by":"publisher","DOI":"10.2196\/48291","volume":"9","author":"A Abd-Alrazaq","year":"2023","unstructured":"Abd-Alrazaq, A., et al.: Large language models in medical education: opportunities, challenges, and future directions. JMIR Med. Educ. 9(1), e48291 (2023)","journal-title":"JMIR Med. Educ."},{"key":"17_CR2","unstructured":"AI, M.: Llama-v3p1-405b-instruct (2024), model release date: 2024"},{"key":"17_CR3","unstructured":"Anthropic: Claude-3.5-sonnet-20240620 (2024), model release date: June 20, 2024"},{"key":"17_CR4","unstructured":"Attali, Y., Burstein, J.: Automated essay scoring with e-rater\u00ae v. 2. J. Technol. Learn. Assess. 4(3) (2006)"},{"issue":"5","key":"17_CR5","doi-asserted-by":"publisher","first-page":"645","DOI":"10.4300\/JGME-D-17-00016.1","volume":"9","author":"JD Baldwin","year":"2017","unstructured":"Baldwin, J.D., Cox, J., Wu, Z.H., Kenny, A., Angus, S.: Delivery and measurement of high-value care in standardized patient encounters. J. Grad. Med. Educ. 9(5), 645\u2013649 (2017)","journal-title":"J. Grad. Med. Educ."},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Bartlett, G., Blais, R., Tamblyn, R., Clermont, R.J., MacGibbon, B.: Impact of patient communication problems on the risk of preventable adverse events in acute care settings. CMAJ: Canadian Med. Assoc. J. 178(12), 1555\u20131562 (2008)","DOI":"10.1503\/cmaj.070690"},{"issue":"1","key":"17_CR7","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1097\/ACM.0000000000000529","volume":"90","author":"K Berg","year":"2015","unstructured":"Berg, K., et al.: Standardized patient assessment of medical student empathy: ethnicity and gender effects in a multi-institutional study. Acad. Med. 90(1), 105\u2013111 (2015). https:\/\/doi.org\/10.1097\/ACM.0000000000000529","journal-title":"Acad. Med."},{"issue":"2","key":"17_CR8","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1002\/hrm.21678","volume":"55","author":"HJ Bernardin","year":"2016","unstructured":"Bernardin, H.J., Thomason, S., Buckley, M.R., Kane, J.S.: Rater rating-level bias and accuracy in performance appraisals: The impact of rater personality, performance management competence, and rater accountability. Hum. Resour. Manage. 55(2), 321\u2013340 (2016)","journal-title":"Hum. Resour. Manage."},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Borchers, C., Yang, K., Lin, J., Rummel, N., Koedinger, K.R., Aleven, V.: Combining dialog acts and skill modeling: What chat interactions enhance learning rates during AI-supported peer tutoring? In: Proceedings of the 17th International Conference on Educational Data Mining (2024)","DOI":"10.35542\/osf.io\/3tmhy"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Chong, L., Taylor, S., Haywood, M., Adelstein, B.A., Shulruf, B.: The sights and insights of examiners in objective structured clinical examinations. J. Educ. Eval. Health Professions 14 (2017)","DOI":"10.3352\/jeehp.2017.14.34"},{"key":"17_CR11","unstructured":"DeepMind, G.: Gemini-1.5-pro-exp-0801 (2024), model release date: August 1, 2024"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Demszky, D., Liu, J.: M-powering teachers: natural language processing powered feedback improves 1: 1 instruction and student outcomes. In: Proceedings of the Tenth ACM Conference on Learning@ Scale, pp. 59\u201369 (2023)","DOI":"10.1145\/3573051.3593379"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Dimari, A., Tyagi, N., Davanageri, M., Kukreti, R., Yadav, R., Dimari, H.: Ai-based automated grading systems for open book examination system: implications for assessment in higher education. In: 2024 International Conference on Knowledge Engineering and Communication Systems (ICKECS), vol.\u00a01, pp.\u00a01\u20137. IEEE (2024)","DOI":"10.1109\/ICKECS61492.2024.10616490"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Duran, M., Hendrix, J.M.: The role of videotaped clinical skills aggregated peer evaluations in the enhancement of evaluation skills of individual medical school faculty members. Cureus 15(4) (2023)","DOI":"10.7759\/cureus.37871"},{"issue":"8","key":"17_CR15","doi-asserted-by":"publisher","first-page":"1155","DOI":"10.1007\/s11606-007-0250-0","volume":"22","author":"A Fernandez","year":"2007","unstructured":"Fernandez, A., Wang, F., Braveman, P., Finkas, L.K., Hauer, K.E.: Impact of student ethnicity and primary childhood language on communication skill assessment in a clinical performance examination. J. Gen. Intern. Med. 22(8), 1155\u20131160 (2007). https:\/\/doi.org\/10.1007\/s11606-007-0250-0. epub 2007 Jun 9","journal-title":"J. Gen. Intern. Med."},{"key":"17_CR16","doi-asserted-by":"publisher","unstructured":"Firdaus, M., Singh, G., Ekbal, A., Bhattacharyya, P.: Multi-step prompting for few-shot emotion-grounded conversations. In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, pp. 3886\u20133891. CIKM \u201923, Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3583780.3615265","DOI":"10.1145\/3583780.3615265"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Flod\u00e9n, J.: Grading exams using large language models: a comparison between human and AI grading of exams in higher education using ChatGPT. British Educ. Res. J. (2024)","DOI":"10.1002\/berj.4069"},{"issue":"11S","key":"17_CR18","doi-asserted-by":"publisher","first-page":"S29","DOI":"10.1097\/ACM.0000000000004925","volume":"97","author":"A Fluet","year":"2022","unstructured":"Fluet, A., Essakow, J., Ju, M.: Standardized patients\u2019 perspectives on bias in student encounters. Acad. Med. 97(11S), S29\u2013S34 (2022). https:\/\/doi.org\/10.1097\/ACM.0000000000004925. epub 2022 Aug 9","journal-title":"Acad. Med."},{"key":"17_CR19","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/s10459-014-9524-6","volume":"20","author":"CJ Harrison","year":"2015","unstructured":"Harrison, C.J., K\u00f6nings, K.D., Schuwirth, L., Wass, V., Van der Vleuten, C.: Barriers to the uptake and use of feedback in the context of summative assessment. Adv. Health Sci. Educ. 20, 229\u2013245 (2015)","journal-title":"Adv. Health Sci. Educ."},{"issue":"1","key":"17_CR20","doi-asserted-by":"publisher","first-page":"81","DOI":"10.3109\/0142159X.2014.947934","volume":"37","author":"D Hope","year":"2015","unstructured":"Hope, D., Cameron, H.: Examiners are most lenient at the start of a two-day osce. Med. Teach. 37(1), 81\u201385 (2015)","journal-title":"Med. Teach."},{"key":"17_CR21","doi-asserted-by":"publisher","first-page":"1265024","DOI":"10.3389\/fonc.2023.1265024","volume":"13","author":"Y Huang","year":"2023","unstructured":"Huang, Y., et al.: Benchmarking chatgpt-4 on a radiation oncology in-training exam and red journal gray zone cases: potentials and challenges for ai-assisted medical education and decision making in radiation oncology. Front. Oncol. 13, 1265024 (2023)","journal-title":"Front. Oncol."},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Jain, R., Saha, T., Lalwani, J., Saha, S.: Can you summarize my learnings? towards perspective-based educational dialogue summarization. In: Findings of the Association for Computational Linguistics: EMNLP 2023, pp. 3158\u20133173 (2023)","DOI":"10.18653\/v1\/2023.findings-emnlp.208"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Jamieson, A.R., et\u00a0al.: Rubrics to prompts: Assessing medical student post-encounter notes with AI. NEJM AI 1(12), AIcs2400631 (2024)","DOI":"10.1056\/AIcs2400631"},{"issue":"2","key":"17_CR24","doi-asserted-by":"publisher","first-page":"165","DOI":"10.4300\/JGME-D-10-00024.1","volume":"2","author":"BL Joyce","year":"2010","unstructured":"Joyce, B.L., Steenbergh, T., Scher, E.: Use of the Kalamazoo essential elements communication checklist (adapted) in an institutional interpersonal and communication skills curriculum. J. Grad. Med. Educ. 2(2), 165\u2013169 (2010)","journal-title":"J. Grad. Med. Educ."},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Khan, K.Z., Ramachandran, S., Gaunt, K., Pushkar, P.: The objective structured clinical examination (osce): Amee guide no. 81. part i: an historical and theoretical perspective. Med. Teacher 35(9), e1437\u2013e1446 (2013)","DOI":"10.3109\/0142159X.2013.818634"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Luo, M., Warren, C.J., Cheng, L., Abdul-Muhsin, H.M., Banerjee, I.: Assessing empathy in large language models with real-world physician-patient interactions. arXiv preprint arXiv:2405.16402 (2024)","DOI":"10.1109\/BigData62323.2024.10825307"},{"issue":"10","key":"17_CR27","doi-asserted-by":"publisher","first-page":"989","DOI":"10.1111\/j.1365-2923.2009.03438.x","volume":"43","author":"K McLaughlin","year":"2009","unstructured":"McLaughlin, K., Ainslie, M., Coderre, S., Wright, B., Violato, C.: The effect of differential rater function over time (drift) on objective structured clinical examination ratings. Med. Educ. 43(10), 989\u2013992 (2009)","journal-title":"Med. Educ."},{"key":"17_CR28","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/j.asw.2014.09.002","volume":"23","author":"DS McNamara","year":"2015","unstructured":"McNamara, D.S., Crossley, S.A., Roscoe, R.D., Allen, L.K., Dai, J.: A hierarchical classification approach to automated essay scoring. Assess. Writ. 23, 35\u201359 (2015)","journal-title":"Assess. Writ."},{"key":"17_CR29","unstructured":"OpenAI: Gpt-4o-2024-08-06 (2024), model release date: August 6, 2024"},{"key":"17_CR30","unstructured":"OpenAI: Whisper: Automatic speech recognition system (2024). https:\/\/github.com\/openai\/whisper. Accessed 07 Oct 2024"},{"issue":"9","key":"17_CR31","doi-asserted-by":"publisher","first-page":"1730","DOI":"10.1111\/j.1532-5415.2008.01860.x","volume":"56","author":"P O\u2019Sullivan","year":"2008","unstructured":"O\u2019Sullivan, P., Chao, S., Russell, M., Levine, S., Fabiny, A.: Development and implementation of an objective structured clinical examination to provide formative feedback on communication and interpersonal skills in geriatric training. J. Am. Geriatr. Soc. 56(9), 1730\u20131735 (2008). https:\/\/doi.org\/10.1111\/j.1532-5415.2008.01860.x","journal-title":"J. Am. Geriatr. Soc."},{"key":"17_CR32","unstructured":"Pfeiffer, C.A., et al.: Master interview rating scale (mirs). https:\/\/health.uconn.edu\/principles-clinical-medicine-clinical-skills-assessment\/master-interview-rating-scale-mirs\/. Accessed 07 Oct 2024"},{"key":"17_CR33","first-page":"1","volume":"23","author":"H Ratna","year":"2019","unstructured":"Ratna, H.: The importance of effective communication in healthcare practice. Harvard Public Health Rev. 23, 1\u20136 (2019)","journal-title":"Harvard Public Health Rev."},{"key":"17_CR34","doi-asserted-by":"crossref","unstructured":"Safranek, C.W., Sidamon-Eristoff, A.E., Gilson, A., Chartash, D.: The role of large language models in medical education: applications and implications (2023)","DOI":"10.2196\/preprints.50945"},{"key":"17_CR35","unstructured":"Shakur, A.H., et al.: Large language models for medical OSCE assessment: A novel approach to transcript analysis. arXiv preprint arXiv:2410.12858 (2024)"},{"key":"17_CR36","unstructured":"The University of Tennessee Health Science Center: Master interview rating scale (2024). https:\/\/www.uthsc.edu\/simulation\/resources\/documents\/sp-full-communication-rating-scale-mirs.docx, accessed: 2024-02-07"},{"key":"17_CR37","unstructured":"Uchida, T., Cornes, S.: Standardized Patients. Elsevier, 3rd edn. (2023)"},{"issue":"2","key":"17_CR38","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1111\/j.1365-2923.1992.tb00133.x","volume":"26","author":"NV Vu","year":"1992","unstructured":"Vu, N.V., Marcy, M.M., Colliver, J.A., Verhulst, S.J., Travis, T.A., Barrows, H.S.: Standardized (simulated) patients\u2019 accuracy in recording clinical performance check-list items. Med. Educ. 26(2), 99\u2013104 (1992)","journal-title":"Med. Educ."},{"key":"17_CR39","doi-asserted-by":"crossref","unstructured":"Wang, R.E., Ribeiro, A.T., Robinson, C.D., Loeb, S., Demszky, D.: Tutor copilot: A human-ai approach for scaling real-time expertise. arXiv preprint arXiv:2410.03017 (2024)","DOI":"10.21203\/rs.3.rs-5363154\/v1"},{"issue":"3","key":"17_CR40","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3386252","volume":"53","author":"Y Wang","year":"2020","unstructured":"Wang, Y., Yao, Q., Kwok, J.T., Ni, L.M.: Generalizing from a few examples: a survey on few-shot learning. ACM Comput. Surv. (csur) 53(3), 1\u201334 (2020)","journal-title":"ACM Comput. Surv. (csur)"},{"key":"17_CR41","doi-asserted-by":"publisher","first-page":"24824","DOI":"10.52202\/068431-1800","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural. Inf. Process. Syst. 35, 24824\u201324837 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"17_CR42","doi-asserted-by":"crossref","unstructured":"Wetzler, E.L., et al.: Grading the graders: Comparing generative AI and human assessment in essay evaluation. Teaching of Psychology, p. 00986283241282696 (2024)","DOI":"10.1177\/00986283241282696"},{"issue":"1","key":"17_CR43","doi-asserted-by":"publisher","DOI":"10.2196\/57674","volume":"12","author":"J Xu","year":"2024","unstructured":"Xu, J., et al.: Data set and benchmark (medgpteval) to evaluate responses from large language models in medicine: evaluation development and validation. JMIR Med. Inform. 12(1), e57674 (2024)","journal-title":"JMIR Med. Inform."},{"key":"17_CR44","doi-asserted-by":"crossref","unstructured":"Yang, B., Nam, S., Huang, Y.: \u201cwhy my essay received a 4?\u201d: A natural language processing based argumentative essay structure analysis. In: International Conference on Artificial Intelligence in Education, pp. 279\u2013290. Springer (2023)","DOI":"10.1007\/978-3-031-36272-9_23"},{"issue":"1","key":"17_CR45","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1111\/bjet.13494","volume":"56","author":"F Yavuz","year":"2025","unstructured":"Yavuz, F., \u00c7elik, \u00d6., Yava\u015f \u00c7elik, G.: Utilizing large language models for EFL essay grading: an examination of reliability and validity in rubric-based assessments. Br. J. Edu. Technol. 56(1), 150\u2013166 (2025)","journal-title":"Br. J. Edu. Technol."},{"key":"17_CR46","unstructured":"Zimmermann, P., Kadmon, M.: Standardized examinees: development of a new tool to evaluate factors influencing OSCE scores and to train examiners. GMS J. Med. Educ. 37(4) (2020)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-98420-4_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T16:03:25Z","timestamp":1783094605000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-98420-4_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031984198","9783031984204"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-98420-4_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Palermo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2025.itd.cnr.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}