{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T16:42:01Z","timestamp":1779122521137,"version":"3.51.4"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031643019","type":"print"},{"value":"9783031643026","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-64302-6_14","type":"book-chapter","created":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T06:04:39Z","timestamp":1719813879000},"page":"192-205","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Can GPT4 Answer Educational Tests? Empirical Analysis of\u00a0Answer Quality Based on\u00a0Question Complexity and\u00a0Difficulty"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0343-3701","authenticated-orcid":false,"given":"Luiz","family":"Rodrigues","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4914-3347","authenticated-orcid":false,"given":"Filipe Dwan","family":"Pereira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4235-5753","authenticated-orcid":false,"given":"Luciano","family":"Cabral","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8306-4410","authenticated-orcid":false,"given":"Geber","family":"Ramalho","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9265-1908","authenticated-orcid":false,"given":"Dragan","family":"Gasevic","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3548-9670","authenticated-orcid":false,"given":"Rafael Ferreira","family":"Mello","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,7,2]]},"reference":[{"key":"14_CR1","volume-title":"Bloom\u2019s Taxonomy","author":"LW Anderson","year":"1994","unstructured":"Anderson, L.W., Sosniak, L.A.: Bloom\u2019s Taxonomy. Univ. Chicago Press, Chicago (1994)"},{"key":"14_CR2","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1162\/tacl_a_00236","volume":"1","author":"S Basu","year":"2013","unstructured":"Basu, S., Jacobs, C., Vanderwende, L.: Powergrading: a clustering approach to amplify human effort for short answer grading. Trans. Assoc. Comput. Linguist. 1, 391\u2013402 (2013)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Bates, D., M\u00e4chler, M., Bolker, B., Walker, S.: Fitting linear mixed-effects models using lme4. arXiv preprint arXiv:1406.5823 (2014)","DOI":"10.18637\/jss.v067.i01"},{"key":"14_CR4","unstructured":"Brown, T.B., et al.: Language models are few-shot learners (2020)"},{"key":"14_CR5","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1146\/annurev-statistics-041715-033702","volume":"3","author":"L Cai","year":"2016","unstructured":"Cai, L., Choi, K., Hansen, M., Harrell, L.: Item response theory. Annu. Rev. Stat. Appl. 3, 297\u2013321 (2016)","journal-title":"Annu. Rev. Stat. Appl."},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Divya, A., Haridas, V., Narayanan, J.: Automation of short answer grading techniques: comparative study using deep learning techniques. In: 2023 Fifth International Conference on Electrical, Computer and Communication Technologies (ICECCT), pp.\u00a01\u20137. IEEE (2023)","DOI":"10.1109\/ICECCT56650.2023.10179759"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Gelman, A., Hill, J.: Data Analysis Using Regression and Multilevel\/Hierarchical Models. Cambridge University Press, Cambridge (2006)","DOI":"10.1017\/CBO9780511790942"},{"key":"14_CR8","doi-asserted-by":"crossref","unstructured":"Hackl, V., M\u00fcller, A.E., Granitzer, M., Sailer, M.: Is GPT-4 a reliable rater? Evaluating consistency in GPT-4 text ratings. arXiv preprint arXiv:2308.02575 (2023)","DOI":"10.3389\/feduc.2023.1272229"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Horbach, A., Stennmanns, S., Zesch, T.: Cross-lingual content scoring. In: Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications, pp. 410\u2013419 (2018)","DOI":"10.18653\/v1\/W18-0550"},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"Hox, J.J., Moerbeek, M., Van\u00a0de Schoot, R.: Multilevel Analysis: Techniques and Applications. Routledge, London (2010)","DOI":"10.4324\/9780203852279"},{"key":"14_CR11","first-page":"22199","volume":"35","author":"T Kojima","year":"2022","unstructured":"Kojima, T., Gu, S.S., Reid, M., Matsuo, Y., Iwasawa, Y.: Large language models are zero-shot reasoners. Adv. Neural. Inf. Process. Syst. 35, 22199\u201322213 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"14_CR12","doi-asserted-by":"publisher","unstructured":"Kuznetsova, A., Brockhoff, P.B., Christensen, R.H.B.: lmerTest package: tests in linear mixed effects models. J. Stat. Softw. 82(13), 1\u201326 (2017). https:\/\/doi.org\/10.18637\/jss.v082.i13","DOI":"10.18637\/jss.v082.i13"},{"key":"14_CR13","series-title":"Lecture Notes in Educational Technology","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1007\/978-981-13-6908-7_9","volume-title":"Foundations and Trends in Smart Learning","author":"Y Liu","year":"2019","unstructured":"Liu, Y., Xu, B., Yang, Y., Chung, T., Zhang, P.: Constructing a hybrid automatic Q&A system integrating knowledge graph and information retrieval technologies. In: Foundations and Trends in Smart Learning. LNET, pp. 67\u201376. Springer, Singapore (2019). https:\/\/doi.org\/10.1007\/978-981-13-6908-7_9"},{"key":"14_CR14","unstructured":"Lord, F., Novick, M.: Statistical Theories of Mental Test Scores. Addison-Wesley Series in Behavioral Sciences: Quantitative Methods, Information Age Publishing, Incorporated (2008)"},{"key":"14_CR15","unstructured":"OpenAI: GPT-4 technical report (2023)"},{"key":"14_CR16","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer (2023)"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Roso\u0142, M., Gasior, J.S., \u0141aba, J., Korzeniewski, K., M\u0142y\u0144czak, M.: Evaluation of the performance of GPT-3.5 and GPT-4 on the Polish medical final examination. Sci. Rep. 13(1), 20512 (2023)","DOI":"10.1038\/s41598-023-46995-z"},{"key":"14_CR18","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Neural Information Processing Systems (2017). https:\/\/api.semanticscholar.org\/CorpusID:13756489"},{"key":"14_CR19","doi-asserted-by":"publisher","unstructured":"Wang, R., Demszky, D.: Is ChatGPT a good teacher coach? Measuring zero-shot performance for scoring and providing actionable insights on classroom instruction. In: Kochmar, E., et al. (eds.) Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023), pp. 626\u2013667. Association for Computational Linguistics, Toronto, Canada (2023). https:\/\/doi.org\/10.18653\/v1\/2023.bea-1.53","DOI":"10.18653\/v1\/2023.bea-1.53"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"de Winter, J.C.F.: Can ChatGPT pass high school exams on English language comprehension? Int. J. Artif. Intell. Educ. (2023)","DOI":"10.1007\/s40593-023-00372-z"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Xia, Q., Chiu, T.K., Zhou, X., Chai, C.S., Cheng, M.: Systematic literature review on opportunities, challenges, and future research recommendations of artificial intelligence in education. Comput. Educ. Artif. Intell. 100118 (2022)","DOI":"10.1016\/j.caeai.2022.100118"},{"key":"14_CR22","doi-asserted-by":"publisher","unstructured":"Yan, L., et al.: Practical and ethical challenges of large language models in education: a systematic scoping review. Br. J. Educ. Technol. n\/a(n\/a). https:\/\/doi.org\/10.1111\/bjet.13370","DOI":"10.1111\/bjet.13370"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Yenduri, G., et al.: GPT (generative pre-trained transformer) - a comprehensive review on enabling technologies, potential applications, emerging challenges, and future directions (2023)","DOI":"10.1109\/ACCESS.2024.3389497"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-64302-6_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T20:26:16Z","timestamp":1757103976000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-64302-6_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031643019","9783031643026"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-64302-6_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"2 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Recife","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2024.cesar.school\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}