{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T14:56:16Z","timestamp":1774968976365,"version":"3.50.1"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031984167","type":"print"},{"value":"9783031984174","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-98417-4_6","type":"book-chapter","created":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T10:06:08Z","timestamp":1752746768000},"page":"75-89","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Does the\u00a0Prompt-Based Large Language Model Recognize Students\u2019 Demographics and\u00a0Introduce Bias in\u00a0Essay Scoring?"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0308-0258","authenticated-orcid":false,"given":"Kaixun","family":"Yang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1413-1103","authenticated-orcid":false,"given":"Mladen","family":"Rakovi\u0107","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9265-1908","authenticated-orcid":false,"given":"Dragan","family":"Ga\u0161evi\u0107","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8236-3133","authenticated-orcid":false,"given":"Guanliang","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,15]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Binhammad, M.H.Y., Othman, A., Abuljadayel, L., Al\u00a0Mheiri, H., Alkaabi, M., Almarri, M.: Investigating how generative ai can create personalized learning materials tailored to individual student needs. Creative Educ. 15(7), 1499\u20131523 (2024)","DOI":"10.4236\/ce.2024.157091"},{"issue":"3","key":"6_CR2","first-page":"27","volume":"25","author":"J Burstein","year":"2004","unstructured":"Burstein, J., Chodorow, M., Leacock, C.: Automated essay evaluation: the criterion online writing service. AI Mag. 25(3), 27\u201327 (2004)","journal-title":"AI Mag."},{"key":"6_CR3","unstructured":"Chen, B., Zhang, Z., Langren\u00e9, N., Zhu, S.: Unleashing the potential of prompt engineering in large language models: a comprehensive review. arXiv preprint arXiv:2310.14735 (2023)"},{"key":"6_CR4","unstructured":"Chen, E., Wang, D., Xu, L., Cao, C., Fang, X., Lin, J.: A systematic review on prompt engineering in large language models for k-12 stem education. arXiv preprint arXiv:2410.11123 (2024)"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Chen, H., He, B.: Automated essay scoring by maximizing human-machine agreement. In: Proceedings of the 2013 Conference On Empirical Methods in Natural Language Processing, pp. 1741\u20131752 (2013)","DOI":"10.18653\/v1\/D13-1180"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Crossley, S.A., Tian, Y., Baffour, P., Franklin, A., Benner, M., Boser, U.: A large-scale corpus for assessing written argumentation: Persuade 2.0. Assess. Writ. 61, 100865 (2024)","DOI":"10.1016\/j.asw.2024.100865"},{"key":"6_CR7","unstructured":"Deane, P.: The importance of assessing student writing and improving writing instruction. research notes. Educational Testing Service (2022)"},{"key":"6_CR8","unstructured":"Doewes, A., Kurdhi, N., Saxena, A.: Evaluating quadratic weighted kappa as the standard performance metric for automated essay scoring. In: 16th International Conference on Educational Data Mining, EDM 2023, pp. 103\u2013113. International Educational Data Mining Society (IEDMS) (2023)"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Dong, F., Zhang, Y., Yang, J.: Attention-based recurrent convolutional neural network for automatic essay scoring. In: Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017), pp. 153\u2013162 (2017)","DOI":"10.18653\/v1\/K17-1017"},{"key":"6_CR10","unstructured":"Ganguli, D., et\u00a0al.: Red teaming language models to reduce harms: Methods, scaling behaviors, and lessons learned. arXiv preprint arXiv:2209.07858 (2022)"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Garc\u00eda-M\u00e9ndez, S., de\u00a0Arriba-P\u00e9rez, F., Somoza-L\u00f3pez, M.d.C.: A review on the use of large language models as virtual tutors. Science and Education, pp. 1\u201316 (2024)","DOI":"10.1007\/s11191-024-00530-2"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Jones, S., Myhill, D.: Discourses of difference? examining gender differences in linguistic characteristics of writing. Canadian J. Educ.n\/Revue canadienne de l\u2019\u00e9ducation, pp. 456\u2013482 (2007)","DOI":"10.2307\/20466646"},{"key":"6_CR13","unstructured":"Kwako, A., Ormerod, C.: Can language models guess your identity? analyzing demographic biases in ai essay scoring. In: Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024), pp. 78\u201386 (2024)"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Lagakis, P., Demetriadis, S.: Automated essay scoring: A review of the field. In: 2021 International Conference on Computer, Information and Telecommunication Systems (CITS). pp.\u00a01\u20136. IEEE (2021)","DOI":"10.1109\/CITS52676.2021.9618476"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Lee, M., Liang, P., Yang, Q.: Coauthor: Designing a human-AI collaborative writing dataset for exploring language model capabilities. In: Proceedings of the 2022 CHI Conference on Human Factors in computing systems, pp. 1\u201319 (2022)","DOI":"10.1145\/3491102.3502030"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Can large language models write reflectively. Comput. Educ.: Artif. Intell. 4, 100140 (2023)","DOI":"10.1016\/j.caeai.2023.100140"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Litman, D., Zhang, H., Correnti, R., Matsumura, L.C., Wang, E.: A fairness evaluation of automated methods for scoring text evidence usage in writing. In: International Conference on Artificial Intelligence in Education, pp. 255\u2013267. Springer (2021)","DOI":"10.1007\/978-3-030-78292-4_21"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Loukina, A., Madnani, N., Zechner, K.: The many dimensions of algorithmic fairness in educational applications. In: Proceedings of the fourteenth workshop on innovative use of NLP for building educational applications, pp. 1\u201310 (2019)","DOI":"10.18653\/v1\/W19-4401"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Mayfield, E., Black, A.W.: Should you fine-tune bert for automated essay scoring? In: Proceedings of the Fifteenth Workshop on Innovative Use of NLP for Building Educational Applications, pp. 151\u2013162 (2020)","DOI":"10.18653\/v1\/2020.bea-1.15"},{"issue":"6","key":"6_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3457607","volume":"54","author":"N Mehrabi","year":"2021","unstructured":"Mehrabi, N., Morstatter, F., Saxena, N., Lerman, K., Galstyan, A.: A survey on bias and fairness in machine learning. ACM Comput. Surv. (CSUR) 54(6), 1\u201335 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Olea, C., et al.: Evaluating persona prompting for question answering tasks. In: Proceedings of the 10th International Conference on Artificial Intelligence and Soft Computing, Sydney, Australia (2024)","DOI":"10.5121\/csit.2024.141106"},{"key":"6_CR22","unstructured":"Rodriguez, P.U., Jafari, A., Ormerod, C.M.: Language models and automated essay scoring. arXiv preprint arXiv:1909.09482 (2019)"},{"key":"6_CR23","unstructured":"Sahoo, P., Singh, A.K., Saha, S., Jain, V., Mondal, S., Chadha, A.: A systematic survey of prompt engineering in large language models: Techniques and applications. arXiv preprint arXiv:2402.07927 (2024)"},{"key":"6_CR24","unstructured":"Schaller, N.J., Ding, Y., Horbach, A., Meyer, J., Jansen, T.: Fairness in automated essay scoring: a comparative analysis of algorithms on German learner essays from secondary education. In: Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024), pp. 210\u2013221 (2024)"},{"key":"6_CR25","unstructured":"Sha, L., Li, Y., Gasevic, D., Chen, G.: Bigger data or fairer data?: augmenting Bert via active sampling for educational text classification. In: International Conference on Computational Linguistics 2022, pp. 1275\u20131285. Association for Computational Linguistics (ACL) (2022)"},{"key":"6_CR26","unstructured":"Stahl, M., Biermann, L., Nehring, A., Wachsmuth, H.: Exploring LLM prompting strategies for joint essay scoring and feedback generation. arXiv preprint arXiv:2404.15845 (2024)"},{"key":"6_CR27","doi-asserted-by":"crossref","unstructured":"Taghipour, K., Ng, H.T.: A neural approach to automated essay scoring. In: Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, pp. 1882\u20131891 (2016)","DOI":"10.18653\/v1\/D16-1193"},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"de\u00a0Vassimon\u00a0Manela, D., Errington, D., Fisher, T., van Breugel, B., Minervini, P.: Stereotype and skew: quantifying gender bias in pre-trained and fine-tuned language models. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pp. 2232\u20132242 (2021)","DOI":"10.18653\/v1\/2021.eacl-main.190"},{"key":"6_CR29","unstructured":"Wang, X., et al.: Self-consistency improves chain of thought reasoning in language models. arXiv preprint arXiv:2203.11171 (2022)"},{"issue":"1","key":"6_CR30","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1111\/j.1745-3992.2011.00223.x","volume":"31","author":"DM Williamson","year":"2012","unstructured":"Williamson, D.M., Xi, X., Breyer, F.J.: A framework for evaluation and use of automated scoring. Educ. Meas. Issues Pract. 31(1), 2\u201313 (2012)","journal-title":"Educ. Meas. Issues Pract."},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Xiao, C., et al.: Human-AI collaborative essay scoring: a dual-process framework with LLMs. In: Proceedings of the 15th International Learning Analytics and Knowledge Conference, pp. 293\u2013305 (2025)","DOI":"10.1145\/3706468.3706507"},{"issue":"10","key":"6_CR32","doi-asserted-by":"publisher","first-page":"1839","DOI":"10.1038\/s41562-024-02004-5","volume":"8","author":"L Yan","year":"2024","unstructured":"Yan, L., Greiff, S., Teuber, Z., Ga\u0161evi\u0107, D.: Promises and challenges of generative artificial intelligence for human learning. Nat. Hum. Behav. 8(10), 1839\u20131850 (2024)","journal-title":"Nat. Hum. Behav."},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Yancey, K.P., Laflair, G., Verardi, A., Burstein, J.: Rating short l2 essays on the CEFR scale with GPT-4. In: Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023), pp. 576\u2013584 (2023)","DOI":"10.18653\/v1\/2023.bea-1.49"},{"key":"6_CR34","doi-asserted-by":"crossref","unstructured":"Yang, K., Rakovi\u0107, M., Li, Y., Guan, Q., Ga\u0161evi\u0107, D., Chen, G.: Unveiling the tapestry of automated essay scoring: a comprehensive investigation of accuracy, fairness, and generalizability. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a038, pp. 22466\u201322474 (2024)","DOI":"10.1609\/aaai.v38i20.30254"},{"key":"6_CR35","doi-asserted-by":"crossref","unstructured":"Yang, R., Cao, J., Wen, Z., Wu, Y., He, X.: Enhancing automated essay scoring performance via fine-tuning pre-trained language models with combination of regression and ranking. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 1560\u20131569 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.141"},{"issue":"3","key":"6_CR36","doi-asserted-by":"publisher","first-page":"705","DOI":"10.1007\/s11145-020-10085-7","volume":"34","author":"HJ Yoon","year":"2021","unstructured":"Yoon, H.J.: Interactions in EFL argumentative writing: effects of topic, l1 background, and l2 proficiency on interactional metadiscourse. Read. Writ. 34(3), 705\u2013725 (2021)","journal-title":"Read. Writ."},{"key":"6_CR37","doi-asserted-by":"crossref","unstructured":"Yoshida, L.: The impact of example selection in few-shot prompting on automated essay scoring using GPT models. In: International Conference on Artificial Intelligence in Education, pp. 61\u201373. Springer (2024)","DOI":"10.1007\/978-3-031-64315-6_5"},{"key":"6_CR38","doi-asserted-by":"crossref","unstructured":"Zesch, T., Wojatzki, M., Scholten-Akoun, D.: Task-independent features for automated essay grading. In: Proceedings of the Tenth Workshop on Innovative Use of NLP for Building Educational Applications, pp. 224\u2013232 (2015)","DOI":"10.3115\/v1\/W15-0626"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-98417-4_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T12:54:29Z","timestamp":1757249669000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-98417-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031984167","9783031984174"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-98417-4_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Palermo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2025.itd.cnr.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}