{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T12:18:53Z","timestamp":1743077933502,"version":"3.40.3"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031673504"},{"type":"electronic","value":"9783031673511"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T00:00:00Z","timestamp":1726704000000},"content-version":"vor","delay-in-days":262,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"abstract":"<jats:title>Abstract<\/jats:title><jats:p>This paper introduces a new strategy to enhance the trustworthiness of Short Answer Scoring (SAS) systems used in educational settings. Although the development of scoring models with high accuracy has become feasible due to advancements in machine learning methods, particularly recent Transformers, there is a risk of shortcut learning using superficial cues present in training data, leading to behaviors that contradict rubric standards and thus raising issues of model trustworthiness. To address this issue, we introduce an efficient strategy that aligns the features of responses with rubric criteria, mitigating shortcut learning and enhancing model trustworthiness. Our approach includes a detection method that employs a feature attribution method to identify superficial cues and a correction method that re-trains the model to align with annotations related to the rubric, thereby suppressing these superficial cues. Our quantitative experiments demonstrate the effectiveness of our method in consistently suppressing superficial cues, contributing to more trustworthy automated scoring of descriptive questions.<\/jats:p>","DOI":"10.1007\/978-3-031-67351-1_23","type":"book-chapter","created":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T17:05:58Z","timestamp":1726765558000},"page":"337-358","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Take No Shortcuts! Stick to the Rubric: A Method for Building Trustworthy Short Answer Scoring Models"],"prefix":"10.1007","author":[{"given":"Yuya","family":"Asazuma","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiroaki","family":"Funayama","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuichiroh","family":"Matsubayashi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tomoya","family":"Mizumoto","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Paul","family":"Reisert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kentaro","family":"Inui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,19]]},"reference":[{"key":"23_CR1","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate, September 2014"},{"issue":"3","key":"23_CR2","doi-asserted-by":"publisher","first-page":"851","DOI":"10.2466\/pr0.1966.18.3.851","volume":"18","author":"CC Bridges","year":"1966","unstructured":"Bridges, C.C.: Hierarchical cluster analysis. Psychol. Rep. 18(3), 851\u2013854 (1966)","journal-title":"Psychol. Rep."},{"key":"23_CR3","unstructured":"Burstein, J., Kaplan, R., Wolff, S., Lu, C.: Using lexical semantic techniques to classify free-responses. In: Breadth and Depth of Semantic Lexicons (1996)"},{"key":"23_CR4","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1007\/978-3-030-52240-7_8","volume-title":"Artificial Intelligence in Education","author":"L Camus","year":"2020","unstructured":"Camus, L., Filighera, A.: Investigating transformers for automatic short answer grading. In: Bittencourt, I.I., Cukurova, M., Muldner, K., Luckin, R., Mill\u00e1n, E. (eds.) AIED 2020. LNCS (LNAI), vol. 12164, pp. 43\u201348. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-52240-7_8"},{"key":"23_CR5","unstructured":"Cardozo, S., et al.: Explainer divergence scores (EDS): some Post-Hoc explanations may be effective for detecting unknown spurious correlations (2022)"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Del\u00a0Gobbo, E., Guarino, A., Cafarelli, B., Grilli, L.: GradeAid: a framework for automatic short answers grading in educational contexts-design, implementation and evaluation. Knowl. Inf. Syst. 1\u201340 (2023)","DOI":"10.1007\/s10115-023-01892-9"},{"key":"23_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding, pp. 4171\u20134186, June 2019"},{"key":"23_CR8","unstructured":"Doshi-Velez, F., Kim, B.: Towards a rigorous science of interpretable machine learning, February 2017"},{"issue":"4","key":"23_CR9","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/S1364-6613(99)01294-2","volume":"3","author":"RM French","year":"1999","unstructured":"French, R.M.: Catastrophic forgetting in connectionist networks. Trends Cogn. Sci. 3(4), 128\u2013135 (1999)","journal-title":"Trends Cogn. Sci."},{"key":"23_CR10","doi-asserted-by":"publisher","unstructured":"Funayama, H., Asazuma, Y., Matsubayashi, Y., Mizumoto, T., Inui, K.: Reducing the cost: cross-prompt pre-finetuning for short answer scoring. In: Wang, N., Rebolledo-Mendez, G., Matsuda, N., Santos, O.C., Dimitrova, V. (eds.) Artificial Intelligence in Education. AIED 2023. LNCS, vol. 13916, pp. 78\u201389. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-36272-9_7","DOI":"10.1007\/978-3-031-36272-9_7"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Funayama, H., et al.: Preventing critical scoring errors in short answer scoring with confidence estimation. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop, pp. 237\u2013243. Association for Computational Linguistics, Online, July 2020","DOI":"10.18653\/v1\/2020.acl-srw.32"},{"key":"23_CR12","doi-asserted-by":"publisher","unstructured":"Funayama, H., Sato, T., Matsubayashi, Y., Mizumoto, T., Suzuki, J., Inui, K.: Balancing cost and quality: an exploration of human-in-the-loop frameworks for automated short answer scoring. In: Rodrigo, M.M., Matsuda, N., Cristea, A.I., Dimitrova, V. (eds.) Artificial Intelligence in Education. AIED 2022. LNCS, vol. 13355, pp. 465\u2013476. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-11644-5_38","DOI":"10.1007\/978-3-031-11644-5_38"},{"key":"23_CR13","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1007\/978-3-030-03928-8_31","volume-title":"Advances in Artificial Intelligence - IBERAMIA 2018","author":"LB Galhardi","year":"2018","unstructured":"Galhardi, L.B., Brancher, J.D.: Machine learning approach for automatic short answer grading: a systematic review. In: Simari, G.R., Ferm\u00e9, E., Guti\u00e9rrez Segura, F., Rodr\u00edguez Melquiades, J.A. (eds.) IBERAMIA 2018. LNCS (LNAI), vol. 11238, pp. 380\u2013391. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-03928-8_31"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Gao, J., Lanchantin, J., Soffa, M.L., Qi, Y.: Black-Box generation of adversarial text sequences to evade deep learning classifiers. In: 2018 IEEE Security and Privacy Workshops (SPW), pp. 50\u201356, May 2018","DOI":"10.1109\/SPW.2018.00016"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Geirhos, R., et al.: Shortcut learning in deep neural networks, April 2020","DOI":"10.1038\/s42256-020-00257-z"},{"key":"23_CR16","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"586","DOI":"10.1007\/978-3-030-14118-9_59","volume-title":"The International Conference on Advanced Machine Learning Technologies and Applications (AMLTA2019)","author":"WH Gomaa","year":"2020","unstructured":"Gomaa, W.H., Fahmy, A.A.: Ans2vec: a scoring system for short answers. In: Hassanien, A.E., Azar, A.T., Gaber, T., Bhatnagar, R., F. Tolba, M. (eds.) AMLTA 2019. AISC, vol. 921, pp. 586\u2013595. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-14118-9_59"},{"key":"23_CR17","doi-asserted-by":"crossref","unstructured":"Hassija, V., et al.: Interpreting Black-Box models: a review on explainable artificial intelligence. Cognit. Comput. (2023)","DOI":"10.1007\/s12559-023-10179-8"},{"key":"23_CR18","doi-asserted-by":"publisher","unstructured":"Knox, J.: Massive open online courses (MOOCs). In: Peters, M.A. (eds.) Encyclopedia of Educational Philosophy and Theory, pp. 1372\u20131378, LNCS. Springer, Singapore (2017). https:\/\/doi.org\/10.1007\/978-981-287-588-4_219","DOI":"10.1007\/978-981-287-588-4_219"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Kumar, S., Chakrabarti, S., Roy, S.: Earth mover\u2019s distance pooling over Siamese LSTMs for automatic short answer grading. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence, pp. 2046\u20132052. IJCAI\u201917, AAAI Press, August 2017","DOI":"10.24963\/ijcai.2017\/284"},{"issue":"01","key":"23_CR20","doi-asserted-by":"publisher","first-page":"9662","DOI":"10.1609\/aaai.v33i01.33019662","volume":"33","author":"Y Kumar","year":"2019","unstructured":"Kumar, Y., Aggarwal, S., Mahata, D., Shah, R.R., Kumaraguru, P., Zimmermann, R.: Get IT scored using AutoSAS \u2013 an automated system for scoring short answers. AAAI 33(01), 9662\u20139669 (2019)","journal-title":"AAAI"},{"key":"23_CR21","unstructured":"Lipton, Z.C.: The mythos of model interpretability, June 2016"},{"issue":"3","key":"23_CR22","doi-asserted-by":"publisher","first-page":"266","DOI":"10.1504\/IJMLO.2022.124160","volume":"16","author":"AKF Lui","year":"2022","unstructured":"Lui, A.K.F., Ng, S.C., Cheung, S.W.N.: A framework for effectively utilising human grading input in automated short answer grading. Int. J. Mob. Learn. Organ. 16(3), 266 (2022)","journal-title":"Int. J. Mob. Learn. Organ."},{"key":"23_CR23","unstructured":"Lundberg, S., Lee, S.I.: A unified approach to interpreting model predictions, May 2017"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Mizumoto, T., et al.: Analytic score prediction and justification identification in automated short answer scoring, pp. 316\u2013325, August 2019","DOI":"10.18653\/v1\/W19-4433"},{"key":"23_CR25","unstructured":"Mohler, M., Bunescu, R., Mihalcea, R.: Learning to grade short answer questions using semantic similarity measures and dependency graph alignments. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, pp. 752\u2013762. Association for Computational Linguistics, Portland, Oregon, USA, June 2011"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Mohler, M., Mihalcea, R.: Text-to-text semantic similarity for automatic short answer grading. In: Lascarides, A., Gardent, C., Nivre, J. (eds.) Proceedings of the 12th Conference of the European Chapter of the ACL (EACL 2009), pp. 567\u2013575. Association for Computational Linguistics, Athens, Greece, March 2009","DOI":"10.3115\/1609067.1609130"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Nauta, M., Walsh, R., Dubowski, A., Seifert, C.: Uncovering and correcting shortcut learning in machine learning models for skin cancer diagnosis. Diagnostics (Basel) 12(1) (2021)","DOI":"10.3390\/diagnostics12010040"},{"issue":"12","key":"23_CR28","doi-asserted-by":"publisher","first-page":"1051","DOI":"10.1038\/s41560-020-00711-7","volume":"5","author":"S Ou","year":"2020","unstructured":"Ou, S., et al.: Erratum: author correction: machine learning model to project the impact of COVID-19 on US motor gasoline demand. Nat. Energy 5(12), 1051\u20131052 (2020)","journal-title":"Nat. Energy"},{"issue":"7","key":"23_CR29","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1080\/01421590500062749","volume":"27","author":"J Rademakers","year":"2005","unstructured":"Rademakers, J., Ten Cate, T.J., B\u00e4r, P.R.: Progress testing with short answer questions. Med. Teach. 27(7), 578\u2013582 (2005)","journal-title":"Med. Teach."},{"key":"23_CR30","doi-asserted-by":"crossref","unstructured":"Ribeiro, M.T., Singh, S., Guestrin, C.: Why should i trust you?: explaining the predictions of any classifier, February 2016","DOI":"10.18653\/v1\/N16-3020"},{"key":"23_CR31","unstructured":"RIKEN(2020): Riken dataset for short answer assessment (July 2020)"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Riordan, B., Horbach, A., Cahill, A., Zesch, T., Lee, C.M.: Investigating neural architectures for short answer scoring. In: Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications, pp. 159\u2013168. Association for Computational Linguistics, Stroudsburg, PA, USA (2017)","DOI":"10.18653\/v1\/W17-5017"},{"key":"23_CR33","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1007\/978-3-319-27704-2_10","volume-title":"Computer Assisted Assessment. Research into E-Assessment","author":"S Roy","year":"2015","unstructured":"Roy, S., Narahari, Y., Deshmukh, O.D.: A perspective on computer assisted assessment techniques for short free-text answers. In: Ras, E., Joosten-ten Brinke, D. (eds.) CAA 2015. CCIS, vol. 571, pp. 96\u2013109. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-27704-2_10"},{"issue":"5","key":"23_CR34","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1038\/s42256-019-0048-x","volume":"1","author":"C Rudin","year":"2019","unstructured":"Rudin, C.: Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead. Nat. Mach. Intell. 1(5), 206\u2013215 (2019)","journal-title":"Nat. Mach. Intell."},{"key":"23_CR35","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1007\/978-3-319-93843-1_37","volume-title":"Artificial Intelligence in Education","author":"S Saha","year":"2018","unstructured":"Saha, S., Dhamecha, T.I., Marvaniya, S., Sindhgatta, R., Sengupta, B.: Sentence level or token level features for automatic short answer grading?: use both. In: Penstein Ros\u00e9, C., et al. (eds.) AIED 2018. LNCS (LNAI), vol. 10947, pp. 503\u2013517. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-93843-1_37"},{"issue":"4","key":"23_CR36","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1111\/medu.13504","volume":"52","author":"AH Sam","year":"2018","unstructured":"Sam, A.H., et al.: Very-short-answer questions: reliability, discrimination and acceptability. Med. Educ. 52(4), 447\u2013455 (2018)","journal-title":"Med. Educ."},{"key":"23_CR37","doi-asserted-by":"publisher","unstructured":"Sato, T., Funayama, H., Hanawa, K., Inui, K.: Plausibility and faithfulness of feature attribution-based explanations in automated short answer scoring. In: Rodrigo, M.M., Matsuda, N., Cristea, A.I., Dimitrova, V. (eds.) Artificial Intelligence in Education. AIED 2022. LNCS, vol. 13355, pp. 231\u2013242. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-11644-5_19","DOI":"10.1007\/978-3-031-11644-5_19"},{"key":"23_CR38","unstructured":"Shrikumar, A., Greenside, P., Kundaje, A.: Learning important features through propagating activation differences, April 2017"},{"key":"23_CR39","unstructured":"Shrikumar, A., Greenside, P., Shcherbina, A., Kundaje, A.: Not just a black box: learning important features through propagating activation differences, May 2016"},{"issue":"267","key":"23_CR40","first-page":"467","volume":"49","author":"HA Simon","year":"1954","unstructured":"Simon, H.A.: Spurious correlation: a causal interpretation. J. Am. Stat. Assoc. 49(267), 467\u2013479 (1954)","journal-title":"J. Am. Stat. Assoc."},{"key":"23_CR41","unstructured":"Simonyan, K., Vedaldi, A., Zisserman, A.: Deep inside convolutional networks: visualising image classification models and saliency maps, December 2013"},{"key":"23_CR42","unstructured":"Sundararajan, M., Taly, A., Yan, Q.: Axiomatic attribution for deep networks, March 2017"},{"key":"23_CR43","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1007\/978-3-030-23204-7_39","volume-title":"Artificial Intelligence in Education","author":"C Sung","year":"2019","unstructured":"Sung, C., Dhamecha, T.I., Mukhi, N.: Improving short answer grading using transformer-based pre-training. In: Isotani, S., Mill\u00e1n, E., Ogan, A., Hastings, P., McLaren, B., Luckin, R. (eds.) AIED 2019. LNCS (LNAI), vol. 11625, pp. 469\u2013481. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-23204-7_39"},{"key":"23_CR44","doi-asserted-by":"crossref","unstructured":"Tornqvist, M., Mahamud, M., Mendez\u00a0Guzman, E., Farazouli, A.: ExASAG: explainable framework for automatic short answer grading. In: Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023), pp. 361\u2013371. Association for Computational Linguistics, Toronto, Canada, July 2023","DOI":"10.18653\/v1\/2023.bea-1.29"},{"key":"23_CR45","unstructured":"Vaswani, A., et al.: Attention is all you need, June 2017"},{"issue":"1","key":"23_CR46","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1080\/15434303.2012.750660","volume":"10","author":"SC Weigle","year":"2013","unstructured":"Weigle, S.C., Yang, W., Montee, M.: Exploring reading processes in an academic reading test using Short-Answer questions. Lang. Assess. Q. 10(1), 28\u201348 (2013)","journal-title":"Lang. Assess. Q."}],"container-title":["Communications in Computer and Information Science","Higher Education Learning Methodologies and Technologies Online"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-67351-1_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T17:13:07Z","timestamp":1726765987000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-67351-1_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031673504","9783031673511"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-67351-1_23","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"19 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HELMeTO","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Higher Education Learning Methodologies and Technologies Online","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Foggia","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"helmeto2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.helmeto2023.it","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}