{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T19:30:51Z","timestamp":1772652651599,"version":"3.50.1"},"reference-count":41,"publisher":"Elsevier BV","issue":"4","license":[{"start":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T00:00:00Z","timestamp":1743724800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T00:00:00Z","timestamp":1743724800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Artif Intell Educ"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s40593-025-00467-9","type":"journal-article","created":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T17:09:10Z","timestamp":1743872950000},"page":"2202-2214","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Large Language Models Take on the AAMC Situational Judgment Test: Evaluating Dilemma-Based Scenarios"],"prefix":"10.1016","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9933-2804","authenticated-orcid":false,"given":"Angelo","family":"Cadiente","sequence":"first","affiliation":[]},{"given":"Jamie","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Lora J.","family":"Kasselman","sequence":"additional","affiliation":[]},{"given":"Bryan","family":"Pilkington","sequence":"additional","affiliation":[]}],"member":"78","published-online":{"date-parts":[[2025,4,4]]},"reference":[{"issue":"10","key":"467_CR1","doi-asserted-by":"publisher","first-page":"1457","DOI":"10.3390\/jpm13101457","volume":"13","author":"N Aiumtrakul","year":"2023","unstructured":"Aiumtrakul, N., Thongprayoon, C., Suppadungsuk, S., et al. (2023). Navigating the Landscape of Personalized Medicine: The Relevance of ChatGPT, BingChat, and Bard AI in Nephrology Literature Searches. Journal of Personalized Medicine., 13(10), 1457. https:\/\/doi.org\/10.3390\/jpm13101457","journal-title":"Journal of Personalized Medicine."},{"issue":"6","key":"467_CR2","doi-asserted-by":"publisher","first-page":"1353","DOI":"10.1227\/neu.0000000000002632","volume":"93","author":"R Ali","year":"2023","unstructured":"Ali, R., Tang, O. Y., Connolly, I. D., et al. (2023). Performance of ChatGPT and GPT-4 on Neurosurgery Written Board Examinations. Neurosurgery, 93(6), 1353\u20131365. https:\/\/doi.org\/10.1227\/neu.0000000000002632","journal-title":"Neurosurgery"},{"key":"467_CR3","doi-asserted-by":"publisher","unstructured":"Avidov, D., Ezra, O., Cohen, A., & Bronshtein, A. (2024). Generative AI in the service of situational judgment tests (SJT) for self-regulation learning during problem solving (SRL-PS). Ubiquity Proceedings,\u00a023\u201323. https:\/\/doi.org\/10.5334\/uproc.145","DOI":"10.5334\/uproc.145"},{"issue":"6","key":"467_CR4","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1001\/jamainternmed.2023.1838","volume":"183","author":"JW Ayers","year":"2023","unstructured":"Ayers, J. W., Poliak, A., Dredze, M., et al. (2023). Comparing Physician and Artificial Intelligence Chatbot Responses to Patient Questions Posted to a Public Social Media Forum. JAMA Internal Medicine., 183(6), 589\u2013596. https:\/\/doi.org\/10.1001\/jamainternmed.2023.1838","journal-title":"JAMA Internal Medicine."},{"issue":"6","key":"467_CR5","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1111\/opo.13207","volume":"43","author":"S Biswas","year":"2023","unstructured":"Biswas, S., Logan, N. S., Davies, L. N., Sheppard, A. L., & Wolffsohn, J. S. (2023). Assessing the Utility of Chatgpt as an Artificial Intelligence-Based Large Language Model for Information to Answer Questions on Myopia. Ophthalmic and Physiological Optics., 43(6), 1562\u20131570. https:\/\/doi.org\/10.1111\/opo.13207","journal-title":"Ophthalmic and Physiological Optics."},{"key":"467_CR6","doi-asserted-by":"publisher","unstructured":"Bommineni VL, Bhagwagar S, Balcarcel D, Davazitkos C, Boyer D. Performance of ChatGPT on the MCAT: The Road to Personalized and Equitable Premedical Learning. medRxiv. Published online March 7, 2023. https:\/\/doi.org\/10.1101\/2023.03.05.23286533","DOI":"10.1101\/2023.03.05.23286533"},{"key":"467_CR7","doi-asserted-by":"publisher","unstructured":"Brin D, Sorin V, Vaid A, et al. Comparing ChatGPT and GPT-4 performance in USMLE soft skill assessments. Scientific Reports. 2023;13(1). https:\/\/doi.org\/10.1038\/s41598-023-43436-9","DOI":"10.1038\/s41598-023-43436-9"},{"key":"467_CR8","doi-asserted-by":"publisher","first-page":"e49183","DOI":"10.2196\/49183","volume":"9","author":"CR Buhr","year":"2023","unstructured":"Buhr, C. R., Smith, H., Huppertz, T., et al. (2023). ChatGPT Versus Consultants: Blinded Evaluation on Answering Otorhinolaryngology Case-Based Questions. JMIR Medical Education, 9, e49183. https:\/\/doi.org\/10.2196\/49183","journal-title":"JMIR Medical Education"},{"issue":"2","key":"467_CR9","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1136\/jme-2023-109366","volume":"50","author":"J Chen","year":"2023","unstructured":"Chen, J., Cadiente, A., Kasselman, L. J., & Pilkington, B. (2023). Assessing the Performance of Chatgpt in Bioethics: A Large Language Model\u2019s Moral Compass in Medicine. Journal of Medical Ethics, 50(2), 97\u2013101. https:\/\/doi.org\/10.1136\/jme-2023-109366","journal-title":"Journal of Medical Ethics"},{"issue":"4","key":"467_CR10","doi-asserted-by":"publisher","first-page":"729","DOI":"10.1016\/j.jsurg.2020.03.018","volume":"77","author":"RC Chick","year":"2020","unstructured":"Chick, R. C., Clifton, G. T., Peace, K. M., et al. (2020). Using Technology to Maintain the Education of Residents During the COVID-19 Pandemic. Journal of Surgical Education, 77(4), 729\u2013732. https:\/\/doi.org\/10.1016\/j.jsurg.2020.03.018","journal-title":"Journal of Surgical Education"},{"issue":"5","key":"467_CR11","doi-asserted-by":"publisher","first-page":"592","DOI":"10.1111\/medu.15326","volume":"58","author":"AS de Oliveira","year":"2024","unstructured":"de Oliveira, A. S., & Bollela, V. R. (2024). ChatGPT simulations to develop communication skills in health education. Medical Education, 58(5), 592\u2013593. https:\/\/doi.org\/10.1111\/medu.15326","journal-title":"Medical Education"},{"issue":"20","key":"467_CR12","doi-asserted-by":"publisher","first-page":"6655","DOI":"10.3390\/jcm12206655","volume":"12","author":"A Draschl","year":"2023","unstructured":"Draschl, A., Hauer, G., Fischerauer, S. F., et al. (2023). Are ChatGPT\u2019s Free-Text Responses on Periprosthetic Joint Infections of the Hip and Knee Reliable and Useful? Journal of Clinical Medicine., 12(20), 6655. https:\/\/doi.org\/10.3390\/jcm12206655","journal-title":"Journal of Clinical Medicine."},{"key":"467_CR13","doi-asserted-by":"publisher","unstructured":"Friederichs H, Friederichs WJ, M\u00e4rz M. ChatGPT in medical school: how successful is AI in progress testing? Medical Education Online. 2023;28(1). https:\/\/doi.org\/10.1080\/10872981.2023.2220920","DOI":"10.1080\/10872981.2023.2220920"},{"issue":"4","key":"467_CR14","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1016\/j.gie.2023.06.025","volume":"98","author":"Y Gorelik","year":"2023","unstructured":"Gorelik, Y., Ghersin, I., Maza, I., & Klein, A. (2023). Harnessing Language Models for Streamlined Postcolonoscopy Patient Management: A Novel Approach. Gastrointestinal Endoscopy, 98(4), 639-641.e4. https:\/\/doi.org\/10.1016\/j.gie.2023.06.025","journal-title":"Gastrointestinal Endoscopy"},{"issue":"1","key":"467_CR15","doi-asserted-by":"publisher","first-page":"e34860","DOI":"10.2196\/34860","volume":"8","author":"H Jiang","year":"2021","unstructured":"Jiang, H., Vimalesvaran, S., Wang, J. K., Lim, K. B., Mogali, S. R., & Car, L. T. (2021). Virtual Reality in Medical Students\u2019 Education: A Scoping Review (Preprint). JMIR Medical Education, 8(1), e34860. https:\/\/doi.org\/10.2196\/34860","journal-title":"JMIR Medical Education"},{"key":"467_CR16","unstructured":"Learn More About the AAMC PREview\u00ae Professional Readiness Exam Program. (2024). AAMC. https:\/\/www.aamc.org\/services\/admissions-lifecycle\/aamc-preview-professional-readiness-exam-admissions-officers\/learn-more. Accessed 4 Feb 2024."},{"issue":"3","key":"467_CR17","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1111\/1471-0528.17641","volume":"131","author":"G Levin","year":"2023","unstructured":"Levin, G., Horesh, Nir, Brezinov, Yoav, & Meyer, R. (2023). Performance of ChatGPT in Medical Examinations: A Systematic Review and a Meta-Analysis. BJOG: An International Journal of Obstetrics and Gynaecology, 131(3), 378\u2013380. https:\/\/doi.org\/10.1111\/1471-0528.17641","journal-title":"BJOG: An International Journal of Obstetrics and Gynaecology"},{"issue":"2","key":"467_CR18","doi-asserted-by":"publisher","first-page":"172.e1","DOI":"10.1016\/j.ajog.2023.04.020","volume":"229","author":"SW Li","year":"2023","unstructured":"Li, S. W., Kemp, M. W., Logan, S. J. S., et al. (2023). ChatGPT Outscored Human Candidates in a Virtual Objective Structured Clinical Examination in Obstetrics and Gynecology. American Journal of Obstetrics and Gynecology., 229(2), 172.e1-172.e12. https:\/\/doi.org\/10.1016\/j.ajog.2023.04.020","journal-title":"American Journal of Obstetrics and Gynecology."},{"issue":"2","key":"467_CR19","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s40670-024-02005-z","volume":"34","author":"JC Lin","year":"2024","unstructured":"Lin, J. C., Kurapati, S. S., Younessi, D. N., Scott, I. U., & Gong, D. A. (2024). Ethical and professional decision-making capabilities of artificial intelligence chatbots: Evaluating ChatGPT\u2019s professional competencies in medicine. Medical Science Educator, 34(2), 331\u2013333. https:\/\/doi.org\/10.1007\/s40670-024-02005-z","journal-title":"Medical Science Educator"},{"issue":"11","key":"467_CR20","doi-asserted-by":"publisher","first-page":"1033","DOI":"10.1001\/jama.2020.14136","volume":"324","author":"CR Lucey","year":"2020","unstructured":"Lucey, C. R., & Johnston, S. C. (2020). The Transformational Effects of COVID-19 on Medical Education. JAMA., 324(11), 1033. https:\/\/doi.org\/10.1001\/jama.2020.14136","journal-title":"JAMA."},{"key":"467_CR21","doi-asserted-by":"publisher","unstructured":"Meo SA, Al-Khlaiwi T, AbuKhalaf AA, Meo AS, Klonoff DC. The Scientific Knowledge of Bard and ChatGPT in Endocrinology, Diabetes, and Diabetes Technology: Multiple-Choice Questions Examination-Based Performance. Journal of Diabetes Science and Technology. Published online October 5, 2023. https:\/\/doi.org\/10.1177\/19322968231203987","DOI":"10.1177\/19322968231203987"},{"issue":"6","key":"467_CR22","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1001\/jamaophthalmol.2023.1144","volume":"141","author":"A Mihalache","year":"2023","unstructured":"Mihalache, A., Popovic, M. M., & Muni, R. H. (2023). Performance of an Artificial Intelligence Chatbot in Ophthalmic Knowledge Assessment. JAMA Ophthalmology., 141(6), 589. https:\/\/doi.org\/10.1001\/jamaophthalmol.2023.1144","journal-title":"JAMA Ophthalmology."},{"key":"467_CR23","doi-asserted-by":"publisher","unstructured":"Misra, S. M., & Suresh, S. (2024). Artificial intelligence and objective structured clinical examinations: Using ChatGPT to revolutionize clinical skills assessment in medical education. Journal of Medical Education and Curricular Development, 11,\u00a023821205241263475. https:\/\/doi.org\/10.1177\/23821205241263475","DOI":"10.1177\/23821205241263475"},{"issue":"1","key":"467_CR24","doi-asserted-by":"publisher","first-page":"27449","DOI":"10.1038\/s41598-024-79048-0","volume":"14","author":"JM Mittelst\u00e4dt","year":"2024","unstructured":"Mittelst\u00e4dt, J. M., Maier, J., Goerke, P., Zinn, F., & Hermes, M. (2024). Large language models can outperform humans in social situational judgments. Scientific Reports, 14(1), 27449. https:\/\/doi.org\/10.1038\/s41598-024-79048-0","journal-title":"Scientific Reports"},{"issue":"6","key":"467_CR25","doi-asserted-by":"publisher","first-page":"952","DOI":"10.1159\/000534005","volume":"239","author":"R O\u2019Hagan","year":"2023","unstructured":"O\u2019Hagan, R., Kim, R. H., Abittan, B. J., Caldas, S., Ungar, J., & Ungar, B. (2023). Trends in Accuracy and Appropriateness of Alopecia Areata Information Obtained from a Popular Online Large Language Model ChatGPT. Dermatology, 239(6), 952\u2013957. https:\/\/doi.org\/10.1159\/000534005","journal-title":"Dermatology"},{"key":"467_CR26","unstructured":"Prepare for the AAMC PREview\u00ae Professional Readiness Exam. (2024). Students & Residents. https:\/\/students-residents.aamc.org\/aamc-preview\/prepare-aamc-preview-professional-readiness-exam. Accessed 4 Feb 2024."},{"key":"467_CR27","unstructured":"R Core Team. (2023). R: A language and environment for statistical computing. R Foundation for Statistical Computing. https:\/\/www.R-project.org\/"},{"issue":"8","key":"467_CR28","doi-asserted-by":"publisher","first-page":"937","DOI":"10.1080\/0142159x.2020.1772466","volume":"42","author":"GS Sahota","year":"2020","unstructured":"Sahota, G. S., & Taggar, J. S. (2020). The Association Between Situational Judgement Test (SJT) Scores and Professionalism Concerns in Undergraduate Medical Education. Medical Teacher, 42(8), 937\u2013943. https:\/\/doi.org\/10.1080\/0142159x.2020.1772466","journal-title":"Medical Teacher"},{"issue":"11S","key":"467_CR29","doi-asserted-by":"publisher","first-page":"S203","DOI":"10.1097\/acm.0000000000004280","volume":"96","author":"A Saxena","year":"2021","unstructured":"Saxena, A., Desanghere, L., Dore, K., & Reiter, H. (2021). Incorporating Situational Judgment Tests Into Postgraduate Medical Education Admissions: Examining Educational and Organizational Outcomes. Academic Medicine, 96(11S), S203\u2013S204. https:\/\/doi.org\/10.1097\/acm.0000000000004280","journal-title":"Academic Medicine"},{"key":"467_CR30","doi-asserted-by":"publisher","first-page":"e48933","DOI":"10.2196\/48933","volume":"11","author":"N Schopow","year":"2023","unstructured":"Schopow, N., Osterhoff, G., & Baur, D. (2023). Applications of the Natural Language Processing Tool ChatGPT in Clinical Practice: Comparative Study and Augmented Systematic Review. JMIR Medical Informatics., 11, e48933. https:\/\/doi.org\/10.2196\/48933","journal-title":"JMIR Medical Informatics."},{"issue":"1","key":"467_CR31","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/s10916-023-01957-w","volume":"47","author":"R Seetharaman","year":"2023","unstructured":"Seetharaman, R. (2023). Revolutionizing medical education: Can ChatGPT boost subjective learning and expression? Journal of Medical Systems, 47(1), 61. https:\/\/doi.org\/10.1007\/s10916-023-01957-w","journal-title":"Journal of Medical Systems"},{"issue":"8","key":"467_CR32","doi-asserted-by":"publisher","first-page":"e0238239","DOI":"10.1371\/journal.pone.0238239","volume":"15","author":"A Seifi","year":"2020","unstructured":"Seifi, A., Mirahmadizadeh, A., & Eslami, V. (2020). Perception of Medical Students and Residents about Virtual Interviews for Residency Applications in the United States. Menezes RG, ed. PLOS ONE., 15(8), e0238239. https:\/\/doi.org\/10.1371\/journal.pone.0238239","journal-title":"PLOS ONE."},{"key":"467_CR33","doi-asserted-by":"publisher","unstructured":"Song H, Xia Y, Luo Z, et al. Evaluating the Performance of Different Large Language Models on Health Consultation and Patient Education in Urolithiasis. Journal of Medical Systems. 2023;47(1). https:\/\/doi.org\/10.1007\/s10916-023-02021-3","DOI":"10.1007\/s10916-023-02021-3"},{"key":"467_CR34","doi-asserted-by":"publisher","unstructured":"Taloni A, Borselli M, Scarsi V, et al. Comparative performance of humans versus GPT-4.0 and GPT-3.5 in the self-assessment program of American Academy of Ophthalmology. Scientific Reports. 2023;13(1). https:\/\/doi.org\/10.1038\/s41598-023-45837-2","DOI":"10.1038\/s41598-023-45837-2"},{"key":"467_CR35","doi-asserted-by":"publisher","unstructured":"TANAKA OM, GASPARELLO GG, HARTMANN GC, CASAGRANDE FA, PITHON MM. Assessing the reliability of ChatGPT: a content analysis of self-generated and self-answered questions on clear aligners, TADs and digital imaging. Dental Press Journal of Orthodontics. 2023;28(5). https:\/\/doi.org\/10.1590\/2177-6709.28.5.e2323183.oar","DOI":"10.1590\/2177-6709.28.5.e2323183.oar"},{"issue":"7","key":"467_CR36","doi-asserted-by":"publisher","first-page":"754","DOI":"10.1111\/medu.14801","volume":"56","author":"PA Tiffin","year":"2022","unstructured":"Tiffin, P. A., Sanger, E., Smith, D. T., Troughton, A., & Paton, L. W. (2022). Situational Judgement Test Performance and Subsequent Misconduct in Medical Students. Medical Education, 56(7), 754\u2013763. https:\/\/doi.org\/10.1111\/medu.14801","journal-title":"Medical Education"},{"key":"467_CR37","doi-asserted-by":"publisher","first-page":"30","DOI":"10.3352\/jeehp.2023.20.30","volume":"20","author":"BC Torres-Zegarra","year":"2023","unstructured":"Torres-Zegarra, B. C., Rios-Garcia, W., \u00d1a\u00f1a-Cordova, A. M., et al. (2023). Performance of ChatGPT, Bard, Claude, and Bing on the Peruvian National Licensing Medical Examination: A cross-sectional study. Journal of Educational Evaluation for Health Professions., 20, 30. https:\/\/doi.org\/10.3352\/jeehp.2023.20.30","journal-title":"Journal of Educational Evaluation for Health Professions."},{"key":"467_CR38","doi-asserted-by":"publisher","first-page":"e47479","DOI":"10.2196\/47479","volume":"25","author":"HL Walker","year":"2023","unstructured":"Walker, H. L., Ghani, S., Kuemmerli, C., et al. (2023). Reliability of Medical Information Provided by ChatGPT: Assessment Against Clinical Guidelines and Patient Information Quality Instrument. Journal of Medical Internet Research, 25, e47479. https:\/\/doi.org\/10.2196\/47479","journal-title":"Journal of Medical Internet Research"},{"issue":"10","key":"467_CR39","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1111\/medu.14201","volume":"54","author":"ES Webster","year":"2020","unstructured":"Webster, E. S., Paton, L. W., Crampton, P. E. S., & Tiffin, P. A. (2020). Situational Judgement Test Validity for Selection: A Systematic Review and Meta-Analysis. Medical Education, 54(10), 888\u2013902. https:\/\/doi.org\/10.1111\/medu.14201","journal-title":"Medical Education"},{"key":"467_CR40","doi-asserted-by":"publisher","unstructured":"Xu, T., Weng, H., Liu, F., et al. (2024). Current status of ChatGPT use in medical education: Potentials, challenges, and strategies. Journal of Medical Internet Research, 26, e57896. https:\/\/doi.org\/10.2196\/57896","DOI":"10.2196\/57896"},{"key":"467_CR41","doi-asserted-by":"publisher","unstructured":"Yiu A, Lam K. Performance of large language models at the MRCS Part A: a tool for medical education? Annals of The Royal College of Surgeons of England. Published online December 1, 2023. https:\/\/doi.org\/10.1308\/rcsann.2023.0085","DOI":"10.1308\/rcsann.2023.0085"}],"container-title":["International Journal of Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-025-00467-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40593-025-00467-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-025-00467-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T18:12:39Z","timestamp":1772647959000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40593-025-00467-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,4]]},"references-count":41,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["467"],"URL":"https:\/\/doi.org\/10.1007\/s40593-025-00467-9","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-4560463\/v1","asserted-by":"object"}]},"ISSN":["1560-4292","1560-4306"],"issn-type":[{"value":"1560-4292","type":"print"},{"value":"1560-4306","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,4]]},"assertion":[{"value":"13 March 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}]}}