{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T08:01:35Z","timestamp":1776931295622,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":96,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3772318.3790814","type":"proceedings-article","created":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T04:12:36Z","timestamp":1776053556000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EvaluAId: Human-AI Collaborative Evaluation of Open-Ended Student Essays"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4286-8468","authenticated-orcid":false,"given":"Chao","family":"Zhang","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, New York, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8272-6552","authenticated-orcid":false,"given":"Kexin","family":"Phyllis Ju","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2610-6084","authenticated-orcid":false,"given":"Xinyi","family":"Lu","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Michigan, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5442-6934","authenticated-orcid":false,"given":"Yu-Chun","family":"Grace Yen","sequence":"additional","affiliation":[{"name":"Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4317-9501","authenticated-orcid":false,"given":"Jeffrey","family":"M. Rzeszotarski","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Loyola University Maryland, Baltimore, Maryland, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,4,13]]},"reference":[{"key":"e_1_3_3_3_2_2","doi-asserted-by":"crossref","unstructured":"Vaibhav Adlakha Parishad BehnamGhader Xing\u00a0Han Lu Nicholas Meade and Siva Reddy. 2024. Evaluating Correctness and Faithfulness of Instruction-Following Models for Question Answering. Transactions of the Association for Computational Linguistics 12 (May 2024) 681\u2013699.","DOI":"10.1162\/tacl_a_00667"},{"key":"e_1_3_3_3_3_2","unstructured":"Omar Alsaiari Nilufar Baghaei Hatim Lahza Jason Lodge Marie Boden and Hassan Khosravi. 2024. Emotionally Enriched Feedback via Generative AI. arxiv:https:\/\/arXiv.org\/abs\/2410.15077\u00a0[cs]"},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300233"},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376277"},{"key":"e_1_3_3_3_6_2","unstructured":"Yigal Attali and Jill Burstein. 2006. Automated Essay Scoring With E-Rater\u00ae V.2. The Journal of Technology Learning and Assessment 4 3 (Feb. 2006) 1\u201331."},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"publisher","unstructured":"Giovanna Badia. 2019. Holistic or Analytic Rubrics? Grading Information Literacy Instruction. College & Undergraduate Libraries 26 2 (April 2019) 109\u2013116. 10.1080\/10691316.2019.1638081","DOI":"10.1080\/10691316.2019.1638081"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","unstructured":"Karim Benharrak Tim Zindulka Florian Lehmann Hendrik Heuer and Daniel Buschek. 2024. Writer-Defined AI Personas for On-Demand Feedback Generation. arxiv:https:\/\/arXiv.org\/abs\/2309.10433\u00a0[cs] 10.1145\/3613904.3642406","DOI":"10.1145\/3613904.3642406"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","unstructured":"Virginia Braun and Victoria Clarke. 2006. Using Thematic Analysis in Psychology. Qualitative Research in Psychology 3 2 (Jan. 2006) 77\u2013101. 10.1191\/1478088706qp063oa","DOI":"10.1191\/1478088706qp063oa"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173868"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","unstructured":"Ten Cate and Olle\u00a0Th J. 2013. Why Receiving Feedback Collides with Self Determination. Adv in Health Sci Educ 18 4 (Oct. 2013) 845\u2013849. 10.1007\/s10459-012-9401-0","DOI":"10.1007\/s10459-012-9401-0"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.870"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376638"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"publisher","unstructured":"Paul Deane. 2013. On the Relation between Automated Essay Scoring and Modern Views of the Writing Construct. Assessing Writing 18 1 (Jan. 2013) 7\u201324. 10.1016\/j.asw.2012.10.002","DOI":"10.1016\/j.asw.2012.10.002"},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3659023"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","unstructured":"Mica\u00a0R. Endsley and Esin\u00a0O. Kiris. 1995. The Out-of-the-Loop Performance Problem and Level of Control in Automation. Hum Factors 37 2 (June 1995) 381\u2013394. 10.1518\/001872095779064555","DOI":"10.1518\/001872095779064555"},{"key":"e_1_3_3_3_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676390"},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","unstructured":"Antonio Ferrara Francesco Bonchi Francesco Fabbri Fariba Karimi and Claudia Wagner. 2024. Bias-Aware Ranking from Pairwise Comparisons. Data Min Knowl Disc 38 4 (July 2024) 2062\u20132086. 10.1007\/s10618-024-01024-z","DOI":"10.1007\/s10618-024-01024-z"},{"key":"e_1_3_3_3_19_2","unstructured":"Brianna Finnegan. 2024. Teacher Use Of Rubrics To Assess Claim Evidence And Reasonings In The High School Science Classroom. Ph.\u00a0D. Dissertation. University of Northern Iowa."},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"crossref","unstructured":"John\u00a0H. Flavell. 1979. Metacognition and Cognitive Monitoring: A New Area of Cognitive\u2013Developmental Inquiry. American psychologist 34 10 (1979) 906.","DOI":"10.1037\/0003-066X.34.10.906"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","unstructured":"Atta Gebril and Lia Plakans. 2014. Assembling Validity Evidence for Assessing Academic Writing: Rater Reactions to Integrated Tasks. Assessing Writing 21 (July 2014) 56\u201373. 10.1016\/j.asw.2014.03.002","DOI":"10.1016\/j.asw.2014.03.002"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","unstructured":"Sharan\u00a0A. Gibson and Pamela Ross. 2016. Teachers\u2019 Professional Noticing. Theory Into Practice 55 3 (July 2016) 180\u2013188. 10.1080\/00405841.2016.1173996","DOI":"10.1080\/00405841.2016.1173996"},{"key":"e_1_3_3_3_23_2","unstructured":"Grammarly. 2025. Grammarly: Free AI Writing Assistance. https:\/\/www.grammarly.com\/."},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"publisher","unstructured":"Shirley Gregor Leona\u00a0Chandra Kruse and Stefan Seidel. 2020. Research Perspectives: The Anatomy of a Design Principle. Journal of the Association for Information Systems 21 6 (Nov. 2020) 1\u201349. 10.17705\/1jais.00649","DOI":"10.17705\/1jais.00649"},{"key":"e_1_3_3_3_25_2","unstructured":"Douglas Grimes and Mark Warschauer. 2010. Utility in a Fallible Tool: A Multi-Site Case Study of Automated Writing Evaluation. The Journal of Technology Learning and Assessment 8 6 (2010) 1\u201344."},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581054"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"crossref","unstructured":"Vidar Gynnild. 2011. Student appeals of grades: a comparative study of university policies and practices. Assessment in Education: Principles Policy & Practice 18 1 (2011) 41\u201357.","DOI":"10.1080\/0969594X.2011.535301"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-61691-4_18"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","unstructured":"Bingyi Han Simon Coghlan George Buchanan and Dana McKay. 2024. Who Is Helping Whom? Student Concerns about AI- Teacher Collaboration in Higher Education Classrooms. arxiv:https:\/\/arXiv.org\/abs\/2412.14469\u00a0[cs] 10.48550\/arXiv.2412.14469","DOI":"10.48550\/arXiv.2412.14469"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","unstructured":"Jieun Han Haneul Yoo Junho Myung Minsun Kim Hyunseung Lim Yoonsu Kim Tak\u00a0Yeon Lee Hwajung Hong Juho Kim So-Yeon Ahn and Alice Oh. 2023. FABRIC: Automated Scoring and Feedback Generation for Essays. arxiv:https:\/\/arXiv.org\/abs\/2310.05191\u00a0[cs] 10.48550\/arXiv.2310.05191","DOI":"10.48550\/arXiv.2310.05191"},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"publisher","unstructured":"Maralee Harrell. 2005. Grading According to a Rubric. Teaching Philosophy 28 1 (2005) 3\u201315. 10.5840\/teachphil200528111","DOI":"10.5840\/teachphil200528111"},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713210"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.745"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","unstructured":"John Hattie and Helen Timperley. 2007. The Power of Feedback. Review of Educational Research 77 1 (March 2007) 81\u2013112. 10.3102\/003465430298487","DOI":"10.3102\/003465430298487"},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"publisher","unstructured":"Kenneth Holstein Vincent Aleven and Nikol Rummel. 2020. A Conceptual Framework for Human\u2013AI Hybrid Adaptivity in Education. Artificial Intelligence in Education 12163 (June 2020) 240\u2013254. 10.1007\/978-3-030-52237-7_20","DOI":"10.1007\/978-3-030-52237-7_20"},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445424"},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","unstructured":"Jussi\u00a0S. Jauhiainen and Agust\u00edn\u00a0Garagorry Guerra. 2024. Evaluating Students\u2019 Open-Ended Written Responses with LLMs: Using the RAG Framework for GPT-3.5 GPT-4 Claude-3 and Mistral-Large. arxiv:https:\/\/arXiv.org\/abs\/2405.05444\u00a0[cs] 10.48550\/arXiv.2405.05444","DOI":"10.48550\/arXiv.2405.05444"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","unstructured":"Ziwei Ji Nayeon Lee Rita Frieske Tiezheng Yu Dan Su Yan Xu Etsuko Ishii Ye\u00a0Jin Bang Andrea Madotto and Pascale Fung. 2023. Survey of Hallucination in Natural Language Generation. ACM Comput. Surv. 55 12 (March 2023) 248:1\u2013248:38. 10.1145\/3571730","DOI":"10.1145\/3571730"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","unstructured":"Jiun-Yin Jian Ann\u00a0M. Bisantz and Colin\u00a0G. Drury. 2000. Foundations for an Empirically Determined Scale of Trust in Automated Systems. International Journal of Cognitive Ergonomics 4 1 (March 2000) 53\u201371. 10.1207\/S15327566IJCE0401_04","DOI":"10.1207\/S15327566IJCE0401_04"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"publisher","unstructured":"Anders Jonsson and Gunilla Svingby. 2007. The Use of Scoring Rubrics: Reliability Validity and Educational Consequences. Educational Research Review 2 2 (Jan. 2007) 130\u2013144. 10.1016\/j.edurev.2007.05.002","DOI":"10.1016\/j.edurev.2007.05.002"},{"key":"e_1_3_3_3_41_2","volume-title":"Thinking, Fast and Slow","author":"Kahneman Daniel","year":"2011","unstructured":"Daniel Kahneman. 2011. Thinking, Fast and Slow. Farrar, Straus and Giroux, New York."},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","unstructured":"Enkelejda Kasneci Kathrin Se\u00dfler Stefan K\u00fcchemann Maria Bannert Daryna Dementieva Frank Fischer Urs Gasser Georg Groh Stephan G\u00fcnnemann Eyke H\u00fcllermeier Stephan Krusche Gitta Kutyniok Tilman Michaeli Claudia Nerdel J\u00fcrgen Pfeffer Oleksandra Poquet Michael Sailer Albrecht Schmidt Tina Seidel Matthias Stadler Jochen Weller Jochen Kuhn and Gjergji Kasneci. 2023. ChatGPT for Good? On Opportunities and Challenges of Large Language Models for Education. 10.35542\/osf.io\/5er8f","DOI":"10.35542\/osf.io\/5er8f"},{"key":"e_1_3_3_3_43_2","first-page":"37","volume-title":"Personal Epistemology","author":"King Patricia\u00a0M.","year":"2012","unstructured":"Patricia\u00a0M. King and Karen\u00a0Strohm Kitchener. 2012. The Reflective Judgment Model: Twenty Years of Research on Epistemic Cognition. In Personal Epistemology, Barbara\u00a0K. Hofer and Paul\u00a0R. Pintrich (Eds.). Routledge, New York, 37\u201361."},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"publisher","unstructured":"Chokri Kooli and Nadia Yusuf. 2025. Transforming Educational Assessment: Insights Into the Use of ChatGPT and Large Language Models in Grading. International Journal of Human\u2013Computer Interaction 41 5 (March 2025) 3388\u20133399. 10.1080\/10447318.2024.2338330","DOI":"10.1080\/10447318.2024.2338330"},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025883"},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"publisher","unstructured":"Yuheng Li Mladen Rakovi\u0107 Namrata Srivastava Xinyu Li Quanlong Guan Dragan Ga\u0161evi\u0107 and Guanliang Chen. 2025. Can AI Support Human Grading? Examining Machine Attention and Confidence in Short Answer Scoring. Computers & Education 228 C (April 2025) 105244. 10.1016\/j.compedu.2025.105244","DOI":"10.1016\/j.compedu.2025.105244"},{"key":"e_1_3_3_3_47_2","unstructured":"Gregory\u00a0C Lisby. 2000. College Student Grade Disputes: Adjudicative vs. Mediative Models of Conflict Resolution. Ph.\u00a0D. Dissertation. Georgia State University."},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"crossref","unstructured":"Shusen Liu Peer-Timo Bremer Jayaraman\u00a0J. Thiagarajan Vivek Srikumar Bei Wang Yarden Livnat and Valerio Pascucci. 2017. Visual Exploration of Semantic Relationships in Neural Word Embeddings. IEEE transactions on visualization and computer graphics 24 1 (2017) 553\u2013562.","DOI":"10.1109\/TVCG.2017.2745141"},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.464"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"publisher","unstructured":"Adian Liusie Potsawee Manakul and Mark J.\u00a0F. Gales. 2024. LLM Comparative Assessment: Zero-Shot NLG Evaluation through Pairwise Comparisons Using Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2307.07889\u00a0[cs] 10.48550\/arXiv.2307.07889","DOI":"10.48550\/arXiv.2307.07889"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580957"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","unstructured":"Oscar Luaces Jorge D\u00edez and Antonio Bahamonde. 2018. A Peer Assessment Method to Provide Feedback Consistent Grading and Reduce Students\u2019 Burden in Massive Teaching Settings. Computers & Education 126 (Nov. 2018) 283\u2013295. 10.1016\/j.compedu.2018.07.016","DOI":"10.1016\/j.compedu.2018.07.016"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","unstructured":"Tom Lumley. 2002. Assessment Criteria in a Large-Scale Writing Test: What Do They Really Mean to the Raters? Language Testing 19 3 (July 2002) 246\u2013276. 10.1191\/0265532202lt230oa","DOI":"10.1191\/0265532202lt230oa"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"crossref","unstructured":"Roberto Martinez-Maldonado. 2019. A Handheld Classroom Dashboard: Teachers\u2019 Perspectives on the Use of Real-Time Collaborative Learning Analytics. International Journal of Computer-Supported Collaborative Learning 14 3 (2019) 383\u2013411.","DOI":"10.1007\/s11412-019-09308-z"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","unstructured":"Roberto Martinez-Maldonado Andrew Clayphan Kalina Yacef and Judy Kay. 2015. MTFeedback: Providing Notifications to Enhance Teacher Awareness of Small Group Work in the Classroom. IEEE Transactions on Learning Technologies 8 2 (April 2015) 187\u2013200. 10.1109\/TLT.2014.2365027","DOI":"10.1109\/TLT.2014.2365027"},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","unstructured":"Suzanne McMahon and Ian Jones. 2015. A Comparative Judgement Approach to Teacher Assessment. Assessment in Education: Principles Policy & Practice 22 3 (July 2015) 368\u2013389. 10.1080\/0969594X.2014.978839","DOI":"10.1080\/0969594X.2014.978839"},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","unstructured":"Emma Mercier. 2016. Teacher Orchestration and Student Learning during Mathematics Activities in a Smart Classroom. IJSMARTTL 1 1 (2016) 33. 10.1504\/IJSMARTTL.2016.078160","DOI":"10.1504\/IJSMARTTL.2016.078160"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","unstructured":"Marcus Messer Neil C.\u00a0C. Brown Michael K\u00f6lling and Miaojing Shi. 2024. Automated Grading and Feedback Tools for Programming Education: A Systematic Review. ACM Trans. Comput. Educ. 24 1 (Feb. 2024) 10:1\u201310:43. 10.1145\/3636515","DOI":"10.1145\/3636515"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.5555\/2002472.2002568"},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-long.4"},{"key":"e_1_3_3_3_61_2","unstructured":"Emily\u00a0T. Ott. 2022. Using Grade Appeals as a Learning Tool. Teaching in the University 1 (2022) 1."},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"publisher","unstructured":"Corey Palermo and Margareta\u00a0Maria Thomson. 2018. Teacher Implementation of Self-Regulated Strategy Development with an Automated Writing Evaluation System: Effects on the Argumentative Writing Performance of Middle School Students. Contemporary Educational Psychology 54 (July 2018) 255\u2013270. 10.1016\/j.cedpsych.2018.07.002","DOI":"10.1016\/j.cedpsych.2018.07.002"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"publisher","unstructured":"Raja Parasuraman and Victor Riley. 1997. Humans and Automation: Use Misuse Disuse Abuse. Hum Factors 39 2 (June 1997) 230\u2013253. 10.1518\/001872097778543886","DOI":"10.1518\/001872097778543886"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","unstructured":"Les Perelman. 2014. When \u201cthe State of the Art\u201d Is Counting Words. Assessing Writing 21 (July 2014) 104\u2013111. 10.1016\/j.asw.2014.05.001","DOI":"10.1016\/j.asw.2014.05.001"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","unstructured":"Gustavo Pinto Isadora Cardoso-Pereira Danilo\u00a0Monteiro Ribeiro Danilo Lucena Alberto de Souza and Kiev Gama. 2023. Large Language Models for Education: Grading Open-Ended Questions Using ChatGPT. arxiv:https:\/\/arXiv.org\/abs\/2307.16696\u00a0[cs] 10.48550\/arXiv.2307.16696","DOI":"10.48550\/arXiv.2307.16696"},{"key":"e_1_3_3_3_66_2","first-page":"1","volume-title":"BERA Annual Conference, UMIST Manchester, England","author":"Pollitt Alastair","year":"2004","unstructured":"Alastair Pollitt and Victoria Crisp. 2004. Could Comparative Judgements of Script Quality Replace Traditional Marking and Improve the Validity of Exam Questions. In BERA Annual Conference, UMIST Manchester, England. British Educational Research Association, Manchester, UK, 1\u201317."},{"key":"e_1_3_3_3_67_2","unstructured":"QuillBot. 2025. Free AI-Powered Essay and Paper Checker\u2014QuillBot AI. https:\/\/quillbot.com\/essay-checker."},{"key":"e_1_3_3_3_68_2","doi-asserted-by":"publisher","unstructured":"Federica\u00a0Zoe Ricci Catalina\u00a0Mari Medina and Mine Dogucu. 2024. Automated Grading Workflows for Providing Personalized Feedback to Open-Ended Data Science Assignments. arxiv:https:\/\/arXiv.org\/abs\/2309.1292410.48550\/arXiv.2309.12924","DOI":"10.48550\/arXiv.2309.12924"},{"key":"e_1_3_3_3_69_2","doi-asserted-by":"crossref","unstructured":"Raymond Scupin. 1997. The KJ Method: A Technique for Analyzing Data Derived from Japanese Ethnology. Human Organization 56 2 (1997) 233\u2013237. jstor:44126786","DOI":"10.17730\/humo.56.2.x335923511444655"},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"publisher","unstructured":"Ben Shneiderman. 2020. Human-Centered Artificial Intelligence: Reliable Safe & Trustworthy. International Journal of Human\u2013Computer Interaction 36 6 (April 2020) 495\u2013504. 10.1080\/10447318.2020.1741118","DOI":"10.1080\/10447318.2020.1741118"},{"key":"e_1_3_3_3_71_2","doi-asserted-by":"publisher","unstructured":"Valerie\u00a0J. Shute. 2008. Focus on Formative Feedback. Review of Educational Research 78 1 (March 2008) 153\u2013189. 10.3102\/0034654307313795","DOI":"10.3102\/0034654307313795"},{"key":"e_1_3_3_3_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/3573051.3596191"},{"key":"e_1_3_3_3_73_2","doi-asserted-by":"publisher","unstructured":"Jacob Steiss Tamara Tate Steve Graham Jazmin Cruz Michael Hebert Jiali Wang Youngsun Moon Waverly Tseng Mark Warschauer and Carol\u00a0Booth Olson. 2024. Comparing the Quality of Human and ChatGPT Feedback of Students\u2019 Writing. Learning and Instruction 91 (June 2024) 101894. 10.1016\/j.learninstruc.2024.101894","DOI":"10.1016\/j.learninstruc.2024.101894"},{"key":"e_1_3_3_3_74_2","doi-asserted-by":"publisher","unstructured":"Marie Stevenson and Aek Phakiti. 2014. The Effects of Computer-Generated Feedback on the Quality of Writing. Assessing Writing 19 (Jan. 2014) 51\u201365. 10.1016\/j.asw.2013.11.007","DOI":"10.1016\/j.asw.2013.11.007"},{"key":"e_1_3_3_3_75_2","doi-asserted-by":"publisher","unstructured":"Lu Sun Aaron Chan Yun\u00a0Seo Chang and Steven\u00a0P. Dow. 2024. ReviewFlow: Intelligent Scaffolding to Support Academic Peer Reviewing. arxiv:https:\/\/arXiv.org\/abs\/2402.03530\u00a0[cs] 10.1145\/3640543.3645159","DOI":"10.1145\/3640543.3645159"},{"key":"e_1_3_3_3_76_2","doi-asserted-by":"publisher","unstructured":"Chad\u00a0C. Tossell Nathan\u00a0L. Tenhundfeld Ali Momen Katrina Cooley and Ewart\u00a0J. De\u00a0Visser. 2024. Student Perceptions of ChatGPT Use in a College Essay Assignment: Implications for Learning Grading and Trust in Artificial Intelligence. IEEE Transactions on Learning Technologies 17 (2024) 1069\u20131081. 10.1109\/TLT.2024.3355015","DOI":"10.1109\/TLT.2024.3355015"},{"key":"e_1_3_3_3_77_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706599.3720100"},{"key":"e_1_3_3_3_78_2","doi-asserted-by":"crossref","unstructured":"Peter\u00a0D. Turney. 2006. Similarity of Semantic Relations. Computational Linguistics 32 3 (2006) 379\u2013416.","DOI":"10.1162\/coli.2006.32.3.379"},{"key":"e_1_3_3_3_79_2","doi-asserted-by":"publisher","unstructured":"Amos Tversky and Daniel Kahneman. 1974. Judgment under Uncertainty: Heuristics and Biases: Biases in Judgments Reveal Some Heuristics of Thinking under Uncertainty. Science 185 4157 (Sept. 1974) 1124\u20131131. 10.1126\/science.185.4157.1124","DOI":"10.1126\/science.185.4157.1124"},{"key":"e_1_3_3_3_80_2","doi-asserted-by":"publisher","unstructured":"Amos Tversky and Daniel Kahneman. 1981. The Framing of Decisions and the Psychology of Choice. Science 211 4481 (Jan. 1981) 453\u2013458. 10.1126\/science.7455683","DOI":"10.1126\/science.7455683"},{"key":"e_1_3_3_3_81_2","doi-asserted-by":"publisher","unstructured":"Oleksandra Vereschak Gilles Bailly and Baptiste Caramiaux. 2021. How to Evaluate Trust in AI-Assisted Decision Making? A Survey of Empirical Methodologies. Proc. ACM Hum.-Comput. Interact. 5 CSCW2 (Oct. 2021) 327:1\u2013327:39. 10.1145\/3476068","DOI":"10.1145\/3476068"},{"key":"e_1_3_3_3_82_2","doi-asserted-by":"publisher","unstructured":"Izia\u00a0Xiaoxiao Wang Xihan Wu Edith Coates Min Zeng Jiexin Kuang Siliang Liu Mengyang Qiu and Jungyeul Park. 2024. Neural Automated Writing Evaluation with Corrective Feedback. arxiv:https:\/\/arXiv.org\/abs\/2402.17613\u00a0[cs] 10.48550\/arXiv.2402.17613","DOI":"10.48550\/arXiv.2402.17613"},{"key":"e_1_3_3_3_83_2","doi-asserted-by":"publisher","unstructured":"D. Watson L.\u00a0A. Clark and A. Tellegen. 1988. Development and Validation of Brief Measures of Positive and Negative Affect: The PANAS Scales. J Pers Soc Psychol 54 6 (June 1988) 1063\u20131070. 10.1037\/\/0022-3514.54.6.1063","DOI":"10.1037\/\/0022-3514.54.6.1063"},{"key":"e_1_3_3_3_84_2","doi-asserted-by":"publisher","unstructured":"Ping Wei Xiaosai Wang and Hui Dong. 2023. The Impact of Automated Writing Evaluation on Second Language Writing Skills of Chinese EFL Learners: A Randomized Controlled Trial. Front Psychol 14 (Sept. 2023) 1249991. 10.3389\/fpsyg.2023.1249991","DOI":"10.3389\/fpsyg.2023.1249991"},{"key":"e_1_3_3_3_85_2","doi-asserted-by":"publisher","unstructured":"Joshua Wilson and Amanda Czik. 2016. Automated Essay Evaluation Software in English Language Arts Classrooms: Effects on Teacher Feedback Student Motivation and Writing Quality. Computers & Education 100 (Sept. 2016) 94\u2013109. 10.1016\/j.compedu.2016.05.004","DOI":"10.1016\/j.compedu.2016.05.004"},{"key":"e_1_3_3_3_86_2","unstructured":"Cynthia\u00a0S. Wiseman. 2012. A Comparison of the Performance of Analytic vs. Holistic Scoring Rubrics to Assess L2 Writing. International Journal of Language Testing 2 1 (March 2012) 59\u201392."},{"key":"e_1_3_3_3_87_2","unstructured":"Kenneth Wolf and Ellen Stevens. 2007. The Role of Rubrics in Advancing and Assessing Student Learning. Journal of Effective Teaching 7 1 (2007) 3\u201314."},{"key":"e_1_3_3_3_88_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517582"},{"key":"e_1_3_3_3_89_2","doi-asserted-by":"publisher","unstructured":"Changrong Xiao Wenxing Ma Sean\u00a0Xin Xu Kunpeng Zhang Yufang Wang and Qi Fu. 2024. From Automation to Augmentation: Large Language Models Elevating Essay Scoring Landscape. arxiv:https:\/\/arXiv.org\/abs\/2401.06431\u00a0[cs] 10.48550\/arXiv.2401.06431","DOI":"10.48550\/arXiv.2401.06431"},{"key":"e_1_3_3_3_90_2","unstructured":"Wenjing Xie Juxin Niu Chun\u00a0Jason Xue and Nan Guan. 2024. Grade Like a Human: Rethinking Automated Assessment with Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2405.19694\u00a0[cs]"},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86436-1_20"},{"key":"e_1_3_3_3_92_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-11644-5_66"},{"key":"e_1_3_3_3_93_2","unstructured":"Haneul Yoo Jieun Han So-Yeon Ahn and Alice Oh. 2024. DREsS: Dataset for Rubric-Based Essay Scoring on EFL Writing. arxiv:https:\/\/arXiv.org\/abs\/2402.16733"},{"key":"e_1_3_3_3_94_2","doi-asserted-by":"publisher","unstructured":"Audrey Zhang Yifei Gao Wannapon Suraworachet Tanya Nazaretsky and Mutlu Cukurova. 2025. Evaluating Trust in AI Human and Co-Produced Feedback Among Undergraduate Students. arxiv:https:\/\/arXiv.org\/abs\/2504.10961\u00a0[cs] 10.48550\/arXiv.2504.10961","DOI":"10.48550\/arXiv.2504.10961"},{"key":"e_1_3_3_3_95_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714316"},{"key":"e_1_3_3_3_96_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746059.3747703"},{"key":"e_1_3_3_3_97_2","doi-asserted-by":"publisher","unstructured":"Xuanming Zhang Anthony Diaz Zixun Chen Qingyang Wu Kun Qian Erik Voss and Zhou Yu. 2024. DECOR: Improving Coherence in L2 English Writing with a Novel Benchmark for Incoherence Detection Reasoning and Rewriting. arxiv:https:\/\/arXiv.org\/abs\/2406.19650\u00a0[cs] 10.48550\/arXiv.2406.19650","DOI":"10.48550\/arXiv.2406.19650"}],"event":{"name":"CHI 2026: CHI Conference on Human Factors in Computing Systems","location":"Barcelona Spain","acronym":"CHI '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772318.3790814","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T10:24:56Z","timestamp":1776421496000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772318.3790814"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,13]]},"references-count":96,"alternative-id":["10.1145\/3772318.3790814","10.1145\/3772318"],"URL":"https:\/\/doi.org\/10.1145\/3772318.3790814","relation":{},"subject":[],"published":{"date-parts":[[2026,4,13]]},"assertion":[{"value":"2026-04-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}