{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T14:41:30Z","timestamp":1775486490700,"version":"3.50.1"},"reference-count":44,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,15]],"date-time":"2024-12-15T00:00:00Z","timestamp":1734220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,15]],"date-time":"2024-12-15T00:00:00Z","timestamp":1734220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,15]]},"DOI":"10.1109\/bigdata62323.2024.10825385","type":"proceedings-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:31:23Z","timestamp":1737052283000},"page":"8154-8163","source":"Crossref","is-referenced-by-count":7,"title":["SteLLA: A Structured Grading System Using LLMs with RAG"],"prefix":"10.1109","author":[{"given":"Hefei","family":"Qiu","sequence":"first","affiliation":[{"name":"Fitchburg State University,Department of Computer Science,Fitchburg,MA,01420-2697"}]},{"given":"Brian","family":"White","sequence":"additional","affiliation":[{"name":"University of Massachusetts Boston,Department of Computer Science,Boston,MA,02125"}]},{"given":"Ashley","family":"Ding","sequence":"additional","affiliation":[{"name":"Chantilly High School,Chantilly,VA,20151"}]},{"given":"Reinaldo","family":"Costa","sequence":"additional","affiliation":[{"name":"University of Massachusetts Boston,Department of Computer Science,Boston,MA,02125"}]},{"given":"Ali","family":"Hachem","sequence":"additional","affiliation":[{"name":"University of Massachusetts Boston,Department of Computer Science,Boston,MA,02125"}]},{"given":"Wei","family":"Ding","sequence":"additional","affiliation":[{"name":"University of Massachusetts Boston,Department of Computer Science,Boston,MA,02125"}]},{"given":"Ping","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Massachusetts Boston,Department of Computer Science,Boston,MA,02125"}]}],"member":"263","reference":[{"key":"ref1","first-page":"370","article-title":"Generating high-precision feedback for programming syntax errors using large language models","volume-title":"Proceedings of the 16th International Conference on Educational Data Mining","author":"Phung"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEET.2019.00022"},{"key":"ref3","first-page":"362","article-title":"Auto-scoring student responses with images in mathematics","volume-title":"Proceedings of the 16th International Conference on Educational Data Mining","author":"Baral"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1111\/jcal.12793"},{"key":"ref5","first-page":"2777","article-title":"Can large language models automatically score proficiency of written essays?","volume-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)","author":"Mansour"},{"key":"ref6","first-page":"3416","article-title":"On the use of bert for automated essay scoring: Joint learning of multi-scale essay representation","volume-title":"Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"Wang"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00236"},{"key":"ref8","first-page":"726","article-title":"Automatic short answer grading and feedback using text mining methods","volume-title":"postproceedings of the 10th Annual International Conference on Biologically Inspired Cognitive Architectures, BICA 2019 (Tenth Annual Meeting of the BICA Society), held August 15-19, 2019 in Seattle, Washington, USA","volume":"169","author":"S\u00fczen"},{"key":"ref9","article-title":"Short answer grading using one-shot prompting and text similarity scoring model","author":"Yoon","year":"2023"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1287\/isre.3.1.17"},{"issue":"3","key":"ref11","first-page":"411","article-title":"Discourse constraints for document compression","volume-title":"Computational Linguistics","volume":"36","author":"Clarke","year":"2010"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11911"},{"key":"ref13","first-page":"3938","article-title":"Question answering as an automatic evaluation metric for news article summarization","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Eyal"},{"key":"ref14","first-page":"3246","article-title":"Answers unite! unsupervised metrics for reinforced summarization models","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Scialom"},{"key":"ref15","first-page":"5008","article-title":"Asking and answering questions to evaluate the factual consistency of summaries","volume-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","author":"Wang"},{"key":"ref16","first-page":"5055","article-title":"FEQA: A question answering evaluation framework for faithfulness assessment in abstractive summarization","volume-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","author":"Durmus"},{"key":"ref17","first-page":"6594","article-title":"QuestEval: Summarization asks for fact-based evaluation","volume-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","author":"Scialom"},{"key":"ref18","article-title":"Rethinking automatic evaluation in sentence simplification","author":"Scialom","year":"2021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.633"},{"key":"ref20","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"ref21","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Brown"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbad467"},{"key":"ref23","first-page":"6251","article-title":"Factual error correction for abstractive summarization models","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)","author":"Cao"},{"key":"ref24","first-page":"1172","article-title":"The curious case of hallucinations in neural machine translation","volume-title":"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"Raunak"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"ref26","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Lewis"},{"key":"ref27","first-page":"6769","article-title":"Dense passage retrieval for open-domain question answering","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)","author":"Karpukhin"},{"key":"ref28","article-title":"Realm: retrieval-augmented language model pre-training","volume-title":"Proceedings of the 37th International Conference on Machine Learning","author":"Guu"},{"key":"ref29","first-page":"874","article-title":"Leveraging passage retrieval with generative models for open domain question answering","volume-title":"Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume","author":"Izacard"},{"key":"ref30","article-title":"Improving language models by retrieving from trillions of tokens","author":"Borgeaud","year":"2022"},{"key":"ref31","article-title":"Generalization through memorization: Nearest neighbor language models","author":"Khandelwal","year":"2020"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.461"},{"key":"ref33","first-page":"60","article-title":"The eras and trends of automatic short answer grading","volume-title":"International Journal of Artificial Intelligence in Education","volume":"25","author":"Burrows","year":"2015"},{"key":"ref34","article-title":"Using lexical semantic techniques to classify free-responses","volume-title":"Breadth and Depth of Semantic Lexicons","author":"Burstein","year":"1996"},{"key":"ref35","first-page":"389","article-title":"C-rater: Automated scoring of short-answer questions","volume-title":"Computers and the Humanities","volume":"37","author":"Leacock","year":"2003"},{"key":"ref36","first-page":"567","article-title":"Text-to-text semantic similarity for automatic short answer grading","volume-title":"Proceedings of the 12th Conference of the European Chapter of the ACL (EACL 2009)","author":"Mohler"},{"key":"ref37","article-title":"Towards robust computerised marking of free-text responses","author":"Mitchell","year":"2002"},{"key":"ref38","doi-asserted-by":"crossref","DOI":"10.3115\/1071884.1071907","article-title":"A reliable approach to automatic assessment of short answer free responses","volume-title":"COLING 2002: The 17th International Conference on Computational Linguistics: Project Notes","author":"Bachman"},{"key":"ref39","first-page":"107","article-title":"Diagnosing meaning errors in short answers to reading comprehension questions","volume-title":"Proceedings of the Third Workshop on Innovative Use of NLP for Building Educational Applications","author":"Bailey"},{"key":"ref40","first-page":"102274","article-title":"Chatgpt for good? on opportunities and challenges of large language models for education","volume-title":"Learning and Individual Differences","volume":"103","author":"Kasneci","year":"2023"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"ref42","article-title":"Towards llm-based autograding for short textual answers","author":"Schneider","year":"2023"},{"key":"ref43","first-page":"61","article-title":"Automating model building in c-rater","volume-title":"Proceedings of the 2009 Workshop on Applied Textual Inference (TextInfer)","author":"Sukkarieh"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1177\/001316446002000104"}],"event":{"name":"2024 IEEE International Conference on Big Data (BigData)","location":"Washington, DC, USA","start":{"date-parts":[[2024,12,15]]},"end":{"date-parts":[[2024,12,18]]}},"container-title":["2024 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10824975\/10824942\/10825385.pdf?arnumber=10825385","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T08:18:47Z","timestamp":1737101927000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10825385\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,15]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/bigdata62323.2024.10825385","relation":{},"subject":[],"published":{"date-parts":[[2024,12,15]]}}}