{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T21:33:11Z","timestamp":1777152791988,"version":"3.51.4"},"reference-count":67,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100003593","name":"National Council for Scientific and Technological Development","doi-asserted-by":"publisher","award":["310888\/2021-2"],"award-info":[{"award-number":["310888\/2021-2"]}],"id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computers and Education: Artificial Intelligence"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1016\/j.caeai.2025.100428","type":"journal-article","created":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T11:16:35Z","timestamp":1748862995000},"page":"100428","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":6,"special_numbering":"C","title":["Is GPT-4 fair? An empirical analysis in automatic short answer grading"],"prefix":"10.1016","volume":"8","author":[{"given":"Luiz","family":"Rodrigues","sequence":"first","affiliation":[]},{"given":"Cleon","family":"Xavier","sequence":"additional","affiliation":[]},{"given":"Newarney","family":"Costa","sequence":"additional","affiliation":[]},{"given":"Dragan","family":"Gasevic","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3548-9670","authenticated-orcid":false,"given":"Rafael Ferreira","family":"Mello","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.caeai.2025.100428_br0010","series-title":"A taxonomy for learning, teaching, and assessing: A revision of bloom's taxonomy of educational objectives","author":"Anderson","year":"2001"},{"key":"10.1016\/j.caeai.2025.100428_br0020","author":"Anthis"},{"key":"10.1016\/j.caeai.2025.100428_br0030","doi-asserted-by":"crossref","first-page":"502","DOI":"10.1111\/opo.12131","article-title":"When to use the b onferroni correction","volume":"34","author":"Armstrong","year":"2014","journal-title":"Ophthalmic & Physiological Optics"},{"key":"10.1016\/j.caeai.2025.100428_br0040","author":"Atwood"},{"key":"10.1016\/j.caeai.2025.100428_br0050","first-page":"1","article-title":"Algorithmic bias in education","author":"Baker","year":"2022","journal-title":"International Journal of Artificial Intelligence in Education"},{"key":"10.1016\/j.caeai.2025.100428_br0060","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1162\/tacl_a_00236","article-title":"Powergrading: A clustering approach to amplify human effort for short answer grading","volume":"1","author":"Basu","year":"2013","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"10.1016\/j.caeai.2025.100428_br0070","series-title":"International conference on artificial intelligence in education","first-page":"71","article-title":"Learning analytics and fairness: Do existing algorithms serve everyone equally?","author":"Bayer","year":"2021"},{"key":"10.1016\/j.caeai.2025.100428_br0080","series-title":"Machine learning and knowledge extraction: 5th IFIP TC 5, TC 12, WG 8.4, WG 8.9, WG 12.9 international cross-domain conference, CD-MAKE 2021, virtual event, August 17\u201320, 2021, proceedings 5","first-page":"61","article-title":"Automated short answer grading using deep learning: A survey","author":"Bonthu","year":"2021"},{"key":"10.1016\/j.caeai.2025.100428_br0090","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1007\/s40593-014-0026-8","article-title":"The eras and trends of automatic short answer grading","volume":"25","author":"Burrows","year":"2015","journal-title":"International Journal of Artificial Intelligence in Education"},{"key":"10.1016\/j.caeai.2025.100428_br0100","series-title":"Doing better statistics in human-computer interaction","author":"Cairns","year":"2019"},{"key":"10.1016\/j.caeai.2025.100428_br0110","article-title":"Automatic feedback in online learning environments: A systematic literature review","volume":"2","author":"Cavalcanti","year":"2021","journal-title":"Computers and Education: Artificial Intelligence"},{"key":"10.1016\/j.caeai.2025.100428_br0120","series-title":"Proceedings of the AAAI conference on artificial intelligence, Vol. 38","first-page":"23173","article-title":"Automatic short answer grading for finish with chatgpt","author":"Chang","year":"2024"},{"key":"10.1016\/j.caeai.2025.100428_br0130","article-title":"Systematic literature review on opportunities, challenges, and future research recommendations of artificial intelligence in education","volume":"4","author":"Chiu","year":"2023","journal-title":"Computers and Education: Artificial Intelligence"},{"key":"10.1016\/j.caeai.2025.100428_br0140","article-title":"Automatic short answer grading with sbert on out-of-sample questions","author":"Condor","year":"2021","journal-title":"International Educational Data Mining Society"},{"key":"10.1016\/j.caeai.2025.100428_br0150","series-title":"Mathematical methods of statistics, Vol. 26","author":"Cram\u00e9r","year":"1999"},{"key":"10.1016\/j.caeai.2025.100428_br0160","doi-asserted-by":"crossref","first-page":"822","DOI":"10.1111\/bjet.13217","article-title":"How do the existing fairness metrics and unfairness mitigation algorithms contribute to ethical learning analytics?","volume":"53","author":"Deho","year":"2022","journal-title":"British Journal of Educational Technology"},{"key":"10.1016\/j.caeai.2025.100428_br0170","author":"Devlin"},{"key":"10.1016\/j.caeai.2025.100428_br0180","article-title":"Text mining in education","volume":"9","author":"Ferreira-Mello","year":"2019","journal-title":"Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery"},{"key":"10.1016\/j.caeai.2025.100428_br0190","series-title":"Proceedings of the 15th international learning analytics and knowledge conference","first-page":"93","article-title":"Automatic short answer grading in the llm era: Does gpt-4 with prompt engineering beat traditional models?","author":"Ferreira Mello","year":"2025"},{"key":"10.1016\/j.caeai.2025.100428_br0200","series-title":"International conference on machine learning (PMLR)","first-page":"10764","article-title":"Pal: Program-aided language models","author":"Gao","year":"2023"},{"key":"10.1016\/j.caeai.2025.100428_br0210","series-title":"Towards strengthening links between learning analytics and assessment: Challenges and potentials of a promising new bond","author":"Ga\u0161evi\u0107","year":"2022"},{"key":"10.1016\/j.caeai.2025.100428_br0220","series-title":"Encyclopedia of infant and early childhood development","first-page":"446","article-title":"Vygotsky's sociocultural theory\u2606","author":"Gauvain","year":"2020"},{"key":"10.1016\/j.caeai.2025.100428_br0230","series-title":"Educational assessment in the 21st century: Connecting theory and practice","first-page":"105","article-title":"Fairness in assessment","author":"Gipps","year":"2009"},{"key":"10.1016\/j.caeai.2025.100428_br0240","first-page":"1","article-title":"Prompt engineering with chatgpt: A guide for academic writers","author":"Giray","year":"2023","journal-title":"Annals of Biomedical Engineering"},{"key":"10.1016\/j.caeai.2025.100428_br0250","doi-asserted-by":"crossref","first-page":"1060","DOI":"10.1186\/s12909-024-06026-5","article-title":"Llm-based automatic short answer grading in undergraduate medical education","volume":"24","author":"Gr\u00e9visse","year":"2024","journal-title":"BMC Medical Education"},{"key":"10.1016\/j.caeai.2025.100428_br0260","series-title":"Proceedings of the 14th learning analytics and knowledge conference","first-page":"507","article-title":"Multiple choice vs. fill-in problems: The trade-off between scalability and learning","author":"Gurung","year":"2024"},{"key":"10.1016\/j.caeai.2025.100428_br0270","series-title":"Proceedings of the eleventh ACM conference on learning @ scale, L@S '24","first-page":"300","article-title":"Can large language models make the grade? An empirical study evaluating llms ability to mark short answer questions in k-12 education","author":"Henkel","year":"2024"},{"key":"10.1016\/j.caeai.2025.100428_br0280","series-title":"Proceedings of the thirteenth workshop on innovative use of NLP for building educational applications","first-page":"410","article-title":"Cross-lingual content scoring","author":"Horbach","year":"2018"},{"key":"10.1016\/j.caeai.2025.100428_br0290","series-title":"Proceedings of the 2021 ACM conference on fairness, accountability, and transparency","first-page":"375","article-title":"Measurement and fairness","author":"Jacobs","year":"2021"},{"key":"10.1016\/j.caeai.2025.100428_br0300","series-title":"Proceedings of the eleventh ACM conference on learning@ scale","first-page":"438","article-title":"Short answer scoring with gpt-4","author":"Jiang","year":"2024"},{"key":"10.1016\/j.caeai.2025.100428_br0310","series-title":"EDM","first-page":"554","article-title":"Automatic grading of short answers for mooc via semi-supervised document clustering","author":"Jing","year":"2015"},{"key":"10.1016\/j.caeai.2025.100428_br0320","doi-asserted-by":"crossref","DOI":"10.1016\/j.lindif.2023.102274","article-title":"Chatgpt for good? On opportunities and challenges of large language models for education","volume":"103","author":"Kasneci","year":"2023","journal-title":"Learning and Individual Differences"},{"key":"10.1016\/j.caeai.2025.100428_br0330","series-title":"The ethics of artificial intelligence in education","first-page":"174","article-title":"Algorithmic fairness in education","author":"Kizilcec","year":"2022"},{"key":"10.1016\/j.caeai.2025.100428_br0340","doi-asserted-by":"crossref","first-page":"47","DOI":"10.1007\/s44163-024-00147-y","article-title":"Performance of the pre-trained large language model gpt-4 on automated short answer grading","volume":"4","author":"Kortemeyer","year":"2024","journal-title":"Discover Artificial Intelligence"},{"key":"10.1016\/j.caeai.2025.100428_br0350","doi-asserted-by":"crossref","first-page":"212","DOI":"10.1207\/s15430421tip4104_2","article-title":"A revision of bloom's taxonomy: An overview","volume":"41","author":"Krathwohl","year":"2002","journal-title":"Theory Into Practice"},{"key":"10.1016\/j.caeai.2025.100428_br0360","series-title":"LAK23: 13th international learning analytics and knowledge conference","first-page":"499","article-title":"Moral machines or tyranny of the majority? A systematic review on predictive bias in education","author":"Li","year":"2023"},{"key":"10.1016\/j.caeai.2025.100428_br0370","doi-asserted-by":"crossref","first-page":"608","DOI":"10.3758\/s13428-012-0211-3","article-title":"Computer-based assessment of student-constructed responses","volume":"44","author":"Magliano","year":"2012","journal-title":"Behavior Research Methods"},{"key":"10.1016\/j.caeai.2025.100428_br0380","doi-asserted-by":"crossref","unstructured":"Mair, P., & Wilcox, R. (2019). Robust statistical methods using wrs2. The WRS2 package.","DOI":"10.3758\/s13428-019-01246-w"},{"key":"10.1016\/j.caeai.2025.100428_br0390","series-title":"Proceedings of the fifteenth workshop on innovative use of NLP for building educational applications","first-page":"151","article-title":"Should you fine-tune bert for automated essay scoring?","author":"Mayfield","year":"2020"},{"key":"10.1016\/j.caeai.2025.100428_br0400","series-title":"Advances in neural information processing systems","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"Mikolov","year":"2013"},{"key":"10.1016\/j.caeai.2025.100428_br0410","series-title":"Proceedings of the 12th conference of the European chapter of the ACL (EACL 2009)","first-page":"567","article-title":"Text-to-text semantic similarity for automatic short answer grading","author":"Mohler","year":"2009"},{"key":"10.1016\/j.caeai.2025.100428_br0420","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3597307","article-title":"Biases in large language models: Origins, inventory, and discussion","volume":"15","author":"Navigli","year":"2023","journal-title":"ACM Journal of Data and Information Quality"},{"key":"10.1016\/j.caeai.2025.100428_br0430","doi-asserted-by":"crossref","first-page":"2074","DOI":"10.3390\/app14052074","article-title":"A review of current trends, techniques, and challenges in large language models (llms)","volume":"14","author":"Patil","year":"2024","journal-title":"Applied Sciences"},{"key":"10.1016\/j.caeai.2025.100428_br0440","series-title":"Glove: Global vectors for word representation","first-page":"1532","author":"Pennington","year":"2014"},{"key":"10.1016\/j.caeai.2025.100428_br0450","doi-asserted-by":"crossref","DOI":"10.1109\/TLT.2023.3253071","article-title":"Embeddings for automatic short answer grading: A scoping review","author":"Putnikovic","year":"2023","journal-title":"IEEE Transactions on Learning Technologies"},{"key":"10.1016\/j.caeai.2025.100428_br0460","series-title":"2019 international engineering conference (IEC)","first-page":"200","article-title":"An overview of bag of words, importance, implementation, applications, and challenges","author":"Qader","year":"2019"},{"key":"10.1016\/j.caeai.2025.100428_br0470","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I. et al. (2018). Improving language understanding by generative pre-training."},{"key":"10.1016\/j.caeai.2025.100428_br0480","article-title":"Assessing the quality of automatic-generated short answers using gpt-4","volume":"7","author":"Rodrigues","year":"2024","journal-title":"Computers and Education: Artificial Intelligence"},{"key":"10.1016\/j.caeai.2025.100428_br0490","article-title":"Educational justice. Reliability and consistency of large language models for automated essay scoring and its implications","volume":"8","author":"Rony","year":"2025","journal-title":"Journal of Applied Learning and Teaching"},{"key":"10.1016\/j.caeai.2025.100428_br0500","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1016\/0306-4573(88)90021-0","article-title":"Term-weighting approaches in automatic text retrieval","volume":"24","author":"Salton","year":"1988","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.caeai.2025.100428_br0510","series-title":"International conference on artificial intelligence in education technology","first-page":"69","article-title":"Explainability in automatic short answer grading","author":"Schlippe","year":"2022"},{"key":"10.1016\/j.caeai.2025.100428_br0520","author":"Schneider"},{"key":"10.1016\/j.caeai.2025.100428_br0530","author":"Se\u00dfler"},{"key":"10.1016\/j.caeai.2025.100428_br0540","doi-asserted-by":"crossref","first-page":"481","DOI":"10.1109\/TLT.2022.3196278","article-title":"Leveraging class balancing techniques to alleviate algorithmic bias for predictive tasks in education","volume":"15","author":"Sha","year":"2022","journal-title":"IEEE Transactions on Learning Technologies"},{"key":"10.1016\/j.caeai.2025.100428_br0550","first-page":"81","article-title":"Assessing the reliability of assigning bloom's cognitive process levels","volume":"4","author":"Stanny","year":"2016","journal-title":"Teaching and Learning Inquiry"},{"key":"10.1016\/j.caeai.2025.100428_br0560","series-title":"Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP)","first-page":"6071","article-title":"Pre-training bert on domain resources for short answer grading","author":"Sung","year":"2019"},{"key":"10.1016\/j.caeai.2025.100428_br0570","series-title":"The handbook of learning analytics","first-page":"205","article-title":"An introduction to fairness, absence of bias, and equity in learning analytics","author":"Uttamchandani","year":"2022"},{"key":"10.1016\/j.caeai.2025.100428_br0580","first-page":"360","article-title":"Understanding interobserver agreement: The kappa statistic","volume":"37","author":"Viera","year":"2005","journal-title":"Family Medicine"},{"key":"10.1016\/j.caeai.2025.100428_br0590","series-title":"Proceedings of the annual symposium on computer-human interaction in play","first-page":"4","article-title":"Statistical significance testing at chi play: Challenges and opportunities for more transparency","author":"Vornhagen","year":"2020"},{"key":"10.1016\/j.caeai.2025.100428_br0600","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.caeai.2025.100428_br0610","series-title":"Developing and applying assessments in the music classroom","first-page":"82","article-title":"Validity, reliability, and fairness in classroom tests","author":"Wesolowski","year":"2020"},{"key":"10.1016\/j.caeai.2025.100428_br0620","author":"White"},{"key":"10.1016\/j.caeai.2025.100428_br0630","series-title":"Introduction to robust estimation and hypothesis testing","author":"Wilcox","year":"2011"},{"key":"10.1016\/j.caeai.2025.100428_br0640","series-title":"Experimentation in software engineering, Vol. 236","author":"Wohlin","year":"2012"},{"key":"10.1016\/j.caeai.2025.100428_br0650","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1111\/bjet.13370","article-title":"Practical and ethical challenges of large language models in education: A systematic scoping review","volume":"55","author":"Yan","year":"2024","journal-title":"British Journal of Educational Technology"},{"key":"10.1016\/j.caeai.2025.100428_br0660","doi-asserted-by":"crossref","DOI":"10.1002\/rev3.3433","article-title":"Exploring the impact of language models, such as chatgpt, on student learning and assessment","volume":"11","author":"Zirar","year":"2023","journal-title":"Review of Education"},{"key":"10.1016\/j.caeai.2025.100428_br0670","series-title":"Proceedings of the 22nd Chinese national conference on computational linguistics (volume 2: Frontier forum)","first-page":"88","article-title":"Through the lens of core competency: Survey on evaluation of large language models","author":"Ziyu","year":"2023"}],"container-title":["Computers and Education: Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2666920X25000682?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2666920X25000682?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,12,15]],"date-time":"2025-12-15T14:06:00Z","timestamp":1765807560000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2666920X25000682"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":67,"alternative-id":["S2666920X25000682"],"URL":"https:\/\/doi.org\/10.1016\/j.caeai.2025.100428","relation":{},"ISSN":["2666-920X"],"issn-type":[{"value":"2666-920X","type":"print"}],"subject":[],"published":{"date-parts":[[2025,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Is GPT-4 fair? An empirical analysis in automatic short answer grading","name":"articletitle","label":"Article Title"},{"value":"Computers and Education: Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.caeai.2025.100428","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Authors. Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"100428"}}