{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T19:29:57Z","timestamp":1772652597056,"version":"3.50.1"},"reference-count":64,"publisher":"Elsevier BV","issue":"3","license":[{"start":{"date-parts":[[2022,6,2]],"date-time":"2022-06-02T00:00:00Z","timestamp":1654128000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,2]],"date-time":"2022-06-02T00:00:00Z","timestamp":1654128000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Artif Intell Educ"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s40593-022-00294-2","type":"journal-article","created":{"date-parts":[[2022,6,2]],"date-time":"2022-06-02T19:03:41Z","timestamp":1654196621000},"page":"467-496","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Automated Short Answer Scoring Using an Ensemble of Neural Networks and Latent Semantic Analysis Classifiers"],"prefix":"10.1016","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2129-7021","authenticated-orcid":false,"given":"Christopher","family":"Ormerod","sequence":"first","affiliation":[]},{"given":"Susan","family":"Lottridge","sequence":"additional","affiliation":[]},{"given":"Amy E.","family":"Harris","sequence":"additional","affiliation":[]},{"given":"Milan","family":"Patel","sequence":"additional","affiliation":[]},{"given":"Paul","family":"van Wamelen","sequence":"additional","affiliation":[]},{"given":"Balaji","family":"Kodeswaran","sequence":"additional","affiliation":[]},{"given":"Sharon","family":"Woolf","sequence":"additional","affiliation":[]},{"given":"Mackenzie","family":"Young","sequence":"additional","affiliation":[]}],"member":"78","published-online":{"date-parts":[[2022,6,2]]},"reference":[{"key":"294_CR1","volume-title":"Standards for educational and psychological testing","author":"American Educational Research Association","year":"2014","unstructured":"American Educational Research Association, American Psychological Association, National Council on Measurement in Education. (2014). Standards for educational and psychological testing. American Educational Research Association."},{"key":"294_CR2","doi-asserted-by":"crossref","unstructured":"Anscombe, F. J. (1948). The transformation of Poisson, binomial and negative-binomial data. Biometrika, 246\u2013254.","DOI":"10.1093\/biomet\/35.3-4.246"},{"key":"294_CR3","unstructured":"Anson, C. S. (2013). NCTE position statement on machine scoring: Machine scoring fails the test."},{"key":"294_CR4","unstructured":"Arter, J. (2000). Rubrics, scoring guides, and performance criteria: Classroom tools for. The Annual Conference of the American Educational Research Association. New Orleans."},{"key":"294_CR5","doi-asserted-by":"crossref","unstructured":"Basu, S., Jacobs, C., & Vanderwende, L. (2013). Powergrading: A clustering approach to amplify human effort for short answer grading. Transactions of the Association for Computational Linguistics, 391\u2013402.","DOI":"10.1162\/tacl_a_00236"},{"key":"294_CR6","unstructured":"Bridgeman, B. C. (2009). Considering fairness and validity in evaluating automated scoring. National Council on Measurement in Education. San Diego."},{"key":"294_CR7","unstructured":"Cho, K. v. (2012). Learning phrase representations using rnn encoderdecoder for statistical machine translation. preprint arxivs, 1406.1078."},{"key":"294_CR8","doi-asserted-by":"crossref","unstructured":"Cohen, J. (1968). Weighted kappa: Nominal scale agreement provision for scaled disagreement or partial credit. Psychological bulletin, 213.","DOI":"10.1037\/h0026256"},{"key":"294_CR9","volume-title":"Criteria for high-quality assessment","author":"LJ Darling-Hammond","year":"2013","unstructured":"Darling-Hammond, L. J. (2013). Criteria for high-quality assessment. Stanford Center for Opportunity Policy in Education."},{"issue":"6","key":"294_CR10","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester, S., Dumais, S., Furnas, G., Landauer, T., & Harshman, R. (1990). Indexing by latent semantic analysis. Journal of the American Society for Information Science, 41(6), 391\u2013407.","journal-title":"Journal of the American Society for Information Science"},{"key":"294_CR11","unstructured":"Devlin, J. M.-W. (2018). BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805."},{"key":"294_CR12","unstructured":"Dikli, S. (2006). An overview of automated scoring of essays. The Journal of Technology, Learning and Assessment."},{"key":"294_CR13","unstructured":"Dzikovska, M. O., Nielsen, R. D., Brew, C., Leacock, C., Giampiccolo, D., Bentivogli, L., Clark, P., Dagan, I., & Dang, H. T. (2013). Semeval-2013 task 7: The joint student response analysis and 8th recognizing textual entailment challenge. NORTH TEXAS STATE UNIV DENTON."},{"key":"294_CR14","doi-asserted-by":"crossref","unstructured":"Esteva, A. B. (2017). Dermatologist-level classification of skin cancer with deep neural networks. Nature, 115.","DOI":"10.1038\/nature21056"},{"issue":"1","key":"294_CR15","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1177\/0016986210390635","volume":"55","author":"Xitao Fan","year":"2011","unstructured":"Fan, Xitao, & Nowell, Dana L. (2011). Using propensity score matching in educational research. Gifted Child Quarterly, 55(1), 74\u201379.","journal-title":"Gifted Child Quarterly"},{"key":"294_CR16","unstructured":"Gewertz, C. (2013, June 9). States Ponder Costs of Common Tests. Education Week, pp. 20\u201322."},{"key":"294_CR17","unstructured":"Gong, T. a. (2019). An Attention-based Deep Model for Automatic Short Answer Score. International Journal of Computer Science and Software Engineering, 127\u2013132."},{"key":"294_CR18","unstructured":"Hand-Scoring Rules. (2016). Retrieved from\u00a0http:\/\/www.smarterapp.org\/documents\/Smarter_Balanced_Hand_Scoring_Rules.pdf. Accessed 18 June."},{"key":"294_CR19","doi-asserted-by":"crossref","unstructured":"Harris, Z. S. (1954). Distributional structure. Word, 146\u2013162.","DOI":"10.1080\/00437956.1954.11659520"},{"key":"294_CR20","doi-asserted-by":"crossref","unstructured":"Hochreiter, S. a. (1997). Long short-term memory. Neural computation, 1735\u20131780.","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"294_CR21","doi-asserted-by":"crossref","unstructured":"Kumar, Y., Swati A., Debanjan M., Rajiv R. S., Ponnurangam K., & Roger Z. (2019). Get it scored using autosas\u2014an automated system for scoring short answers. In Proceedings of the AAAI Conference on Artificial Intelligence, 33(1), pp. 9662\u20139669.","DOI":"10.1609\/aaai.v33i01.33019662"},{"key":"294_CR22","doi-asserted-by":"crossref","unstructured":"Leacock, C. a. (2003). C-rater: Automated scoring of short-answer questions. Computers and the Humanities, 389\u2013405.","DOI":"10.1023\/A:1025779619903"},{"key":"294_CR23","unstructured":"Lee, N. T. (2018). Detecting racial bias in algorithms and machine learning. Journal of Information, Communication and Ethics in Society."},{"key":"294_CR24","doi-asserted-by":"crossref","unstructured":"Madnani, N. & Loukina, A. (2016). RSMTool: A Collection of Tools for Building and Evaluating Automated Scoring Models. Journal of Open Source Software.","DOI":"10.21105\/joss.00033"},{"key":"294_CR25","doi-asserted-by":"crossref","unstructured":"McCurry, D. (2010). Can machine scoring deal with broad and open writing. Assessing Writing, 118\u2013129.","DOI":"10.1016\/j.asw.2010.04.002"},{"key":"294_CR26","unstructured":"McGraw-Hill Education, C. T. (2014). Smarter balanced assessment consortium field test: Automated scoring research studies (in accordance with smarter balanced RFP 17)."},{"key":"294_CR27","unstructured":"Mikolov, T. I. (2013). Distributed representations of words and phrases and their compositionality. . Advances in neural information processing systems, 3111\u20133119."},{"key":"294_CR28","doi-asserted-by":"crossref","unstructured":"Mohler, M. a. (2009). Text-to-text semantic similarity for automatic short answer grading. Proceedings of the 12th Conference of the European Chapter of the Association for Computational Linguistics. (pp. 567\u2013575). Association for Computational Linguistics.","DOI":"10.3115\/1609067.1609130"},{"key":"294_CR29","volume-title":"Artificial Intelligence Applications to Support K\u201312 Teachers and Teaching","author":"RF Murphy","year":"2019","unstructured":"Murphy, R. F. (2019). Artificial Intelligence Applications to Support K\u201312 Teachers and Teaching. RAND Corporation."},{"key":"294_CR30","unstructured":"Norvig, P. (2007a). Retrieved from How to write a spelling corrector: http:\/\/norvig.com\/spell-correct.html. Accessed July 2018"},{"key":"294_CR31","unstructured":"Norvig, P. (2007b). How to write a spelling corrector. Retrieved from How to write a spelling corrector: http:\/\/norvig.com\/spell-correct.html. Accessed July 2018"},{"key":"294_CR32","unstructured":"Ormerod, C. M. & Harris, A. E. (2018). Neural network approach to classifying alarming student responses to online assessment. arXiv preprint, 1809.08899."},{"key":"294_CR33","unstructured":"Ormerod, C. M., Malhotra, A., & Jafari, A. (2021). Automated essay scoring using efficient transformer-based language models.\u00a0arXiv preprint\u00a0arXiv:2102.13136."},{"key":"294_CR34","doi-asserted-by":"crossref","unstructured":"Osgood, C. E. (1964). Semantic differential technique in the comparative study of cultures 1. American Anthropologist, 171\u2013200.","DOI":"10.1525\/aa.1964.66.3.02a00880"},{"key":"294_CR35","unstructured":"Page, E. B.  & Petersen, N. S. (1995). The computer moves into essay grading: Updating the ancient test. Phi delta kappan, 561."},{"key":"294_CR36","first-page":"43","volume-title":"Automated essay scoring: A cross-disciplinary perspective","author":"EB Page","year":"2003","unstructured":"Page, E. B. (2003). Project Essay Grade: PEG. In M. D. Shermis (Ed.), Automated essay scoring: A cross-disciplinary perspective (p. 43). New Jersey: Lawrence Erlbaum Associates."},{"key":"294_CR37","unstructured":"Pearson and ETS. (2015). Research results of PARCC automated scoring proof of concept study. Retrieved from\u00a0http:\/\/www.parcconline.org\/images\/Resources\/Educator-resources\/PARCC_AI_Research_Report.pdf. Accessed Sept 2019."},{"key":"294_CR38","unstructured":"Perelman, L. C. (2013). Critique of Mark D. Shermis & Ben Hammer, Contrasting state-of-the-art automated scoring of essays: Analysis. Journal of Writing Assessment."},{"key":"294_CR39","doi-asserted-by":"crossref","unstructured":"Powers, D. E. (2002). Stumping e-rater: challenging the validity of automated essay scoring. Computers in Human Behavior, 103\u2013134.","DOI":"10.1016\/S0747-5632(01)00052-8"},{"key":"294_CR40","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P. J. (2016). Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:, 1606.05250.","DOI":"10.18653\/v1\/D16-1264"},{"key":"294_CR41","doi-asserted-by":"crossref","unstructured":"Riordan, B., Horbach, A., Cahill,\u00a0A., Zesch,\u00a0T., & Lee, C. (2017). Investigating neural architectures for short answer scoring. In Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications, pp. 159\u2013168.","DOI":"10.18653\/v1\/W17-5017"},{"key":"294_CR42","unstructured":"Roberts, K. (2016). Assessing the corpus size vs. similarity trade-off for word embeddings in clinical NLP. Proceedings of the Clinical Natural Language Processing Workshop, (pp. 54\u201363). Osaka, Japan."},{"key":"294_CR43","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511810725","volume-title":"Matched sampling for causal effects","author":"DB Rubin","year":"2006","unstructured":"Rubin, D. B. (2006). Matched sampling for causal effects. Cambridge University Press."},{"key":"294_CR44","doi-asserted-by":"crossref","unstructured":"Sakaguchi, K. M. (2015). Effective feature integration for automated short answer scoring. Proceedings of the 2015 conference of the North American Chapter of the association for computational linguistics: Human language technologies, (pp. 1049\u20131054).","DOI":"10.3115\/v1\/N15-1111"},{"key":"294_CR45","unstructured":"Sato, E. R. (2011). SMARTER balanced assessment consortium common core state standards analysis: Eligible content for the summative assessment. Final Report. Smarter Balanced Assessment Consortium."},{"key":"294_CR46","unstructured":"Shermis, M. D. (2013a). Contrasting state-of-the-art automated scoring of essays: Analysis. Annual national council on measurement in education meeting."},{"key":"294_CR47","doi-asserted-by":"publisher","DOI":"10.4324\/9780203122761","volume-title":"Handbook of automated essay evaluation: Current applications and new directions","author":"MD Shermis","year":"2013","unstructured":"Shermis, M. D. (2013). Handbook of automated essay evaluation: Current applications and new directions. Routledge."},{"key":"294_CR48","doi-asserted-by":"crossref","unstructured":"Shermis, M. D. (2015). Contrasting state-of-the-art in the machine scoring of short-form constructed responses. Educational Assessment, 46\u201365.","DOI":"10.1080\/10627197.2015.997617"},{"key":"294_CR49","doi-asserted-by":"crossref","unstructured":"Silver, D. A. (2016). Mastering the game of Go with deep neural networks and tree search. Nature, 484.","DOI":"10.1038\/nature16961"},{"key":"294_CR50","unstructured":"Smith, C. (2017). iOS 10: Siri now works in third-party apps, comes with extra AI features. BGR."},{"key":"294_CR51","doi-asserted-by":"crossref","unstructured":"Sultan, M. A. (2015). Fast and easy short answer grading with high accuracy. Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, (pp. 1070\u20131075).","DOI":"10.18653\/v1\/N16-1123"},{"key":"294_CR52","doi-asserted-by":"crossref","unstructured":"Sung, Chul, Tejas Indulal Dhamecha, and Nirmal Mukhi. (2019). Improving short answer grading using transformer-based pre-training. International Conference on Artificial Intelligence in Education, (pp. 469\u2013481).","DOI":"10.1007\/978-3-030-23204-7_39"},{"key":"294_CR53","doi-asserted-by":"crossref","unstructured":"Szegedy, C. S. (2017). Inception-v4, inception-resnet and the impact of residual connections on learning. In Thirty-First AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"294_CR54","unstructured":"Tomas Mikolov, K. C. (2013). Efficient Estimation of Word Representations in Vector Space. Proceedings of Workshop at ICLR."},{"key":"294_CR55","doi-asserted-by":"crossref","unstructured":"Turney, P. D. (2010). From frequency to meaning: Vector space models of semantics. Journal of artificial intelligence research, 141\u2013188.","DOI":"10.1613\/jair.2934"},{"key":"294_CR56","unstructured":"Vaswani, A. N. (2017). Attention is all you need. Advances in neural information processing systems., 5998\u20136008."},{"key":"294_CR57","unstructured":"Vogels, W. (2017). Bringing the Magic of Amazon AI and Alexa to Apps on AWS. Retrieved from All Things Distributed: www.allthingsdistributed.com"},{"key":"294_CR58","doi-asserted-by":"crossref","unstructured":"Williamson, D. M., Xiaoming X., & Breyer, F. J. (2012). A framework for evaluation and use of automated scoring. Educational measurement: issues and practice, 31(1), 2\u201313.","DOI":"10.1111\/j.1745-3992.2011.00223.x"},{"key":"294_CR59","unstructured":"Wu, Y. M. (2016). Google's neural machine translation system: Bridging the gap between human and machine translation. arxiv preprints, 1609.08144."},{"key":"294_CR60","unstructured":"Zhilin Yang, Z. D. (2019). XLNet: Generalized Autoregressive Pretraining for Language Understanding. preprint Paper arxiv, 1906.08237."},{"key":"294_CR61","doi-asserted-by":"crossref","unstructured":"Zhou, Z.-H. J. (2002a). Ensembling neural networks: many could be better than all. Artificial Intelligence, 239\u2013263.","DOI":"10.1016\/S0004-3702(02)00190-X"},{"key":"294_CR62","doi-asserted-by":"crossref","unstructured":"Zhou, Z.-H. J. (2002b). Ensembling neural networks: many could be better than all. Artificial Intelligence, 239\u2013263.","DOI":"10.1016\/S0004-3702(02)00190-X"},{"key":"294_CR63","doi-asserted-by":"crossref","unstructured":"Zhai, X. Y. (2020). Applying machine learning in science assessment: a systematic review. Studies in Science Education, 111\u2013151.","DOI":"10.1080\/03057267.2020.1735757"},{"key":"294_CR64","doi-asserted-by":"crossref","unstructured":"Zou, J. a. (2018). AI can be sexist and racist\u2014it\u2019s time to make it fair. Nature, 324\u2013326.","DOI":"10.1038\/d41586-018-05707-8"}],"container-title":["International Journal of Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-022-00294-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40593-022-00294-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-022-00294-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T18:12:18Z","timestamp":1772647938000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40593-022-00294-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,2]]},"references-count":64,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["294"],"URL":"https:\/\/doi.org\/10.1007\/s40593-022-00294-2","relation":{},"ISSN":["1560-4292","1560-4306"],"issn-type":[{"value":"1560-4292","type":"print"},{"value":"1560-4306","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,2]]},"assertion":[{"value":"2 May 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 June 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not Applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest\/Competing Interests"}}]}}