{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T10:34:22Z","timestamp":1777113262150,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","funder":[{"DOI":"10.13039\/100000865","name":"Bill and Melinda Gates Foundation","doi-asserted-by":"publisher","award":["INV-068961"],"award-info":[{"award-number":["INV-068961"]}],"id":[{"id":"10.13039\/100000865","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014989","name":"Chan Zuckerberg Initiative","doi-asserted-by":"publisher","award":["2024-351541"],"award-info":[{"award-number":["2024-351541"]}],"id":[{"id":"10.13039\/100014989","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3785022.3785095","type":"proceedings-article","created":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T09:39:01Z","timestamp":1777109941000},"page":"447-456","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AI Annotation Orchestration: Evaluating LLM Verifiers to Improve the Quality of LLM Annotations in Learning Analytics"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9331-5069","authenticated-orcid":false,"given":"Bakhtawar","family":"Ahtisham","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0673-5721","authenticated-orcid":false,"given":"Kirk","family":"Vanacore","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9957-1342","authenticated-orcid":false,"given":"Jinsook","family":"Lee","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8045-6213","authenticated-orcid":false,"given":"Zhuqian","family":"Zhou","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5130-428X","authenticated-orcid":false,"given":"Doug","family":"Pietrzak","sequence":"additional","affiliation":[{"name":"Freshcognate LLC, Somerville, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6283-5546","authenticated-orcid":false,"given":"Rene F.","family":"Kizilcec","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,4,26]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.6852936"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-64299-9_10"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","unstructured":"Andrea\u00a0J. Bingham. 2023. From Data Management to Actionable Findings: A Five-Phase Process of Qualitative Data Analysis. International Journal of Qualitative Methods 22 (2023) 1\u201313. 10.1177\/16094069231183620","DOI":"10.1177\/16094069231183620"},{"key":"e_1_3_3_2_5_2","volume-title":"Transforming Qualitative Information: Thematic Analysis and Code Development","author":"Boyatzis Richard\u00a0E.","year":"1998","unstructured":"Richard\u00a0E. Boyatzis. 1998. Transforming Qualitative Information: Thematic Analysis and Code Development. SAGE, Thousand Oaks, CA."},{"key":"e_1_3_3_2_6_2","first-page":"7671","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics","author":"Cao Jie","year":"2025","unstructured":"Jie Cao, Abhijit Suresh, Jennifer Jacobs, Charis Clevenger, Amanda Howard, Chelsea Brown, Brent Milne, Tom Fischaber, Tamara Sumner, and James\u00a0H. Martin. 2025. Enhancing Talk Moves Analysis in Mathematics Tutoring: Datasets, Transfer, and Modeling. In Proceedings of the 31st International Conference on Computational Linguistics, Owen Rambow, Leo Wanner, Marianna Apidianaki, Hend Al-Khalifa, Barbara\u00a0Di Eugenio, and Steven Schockaert (Eds.). Association for Computational Linguistics, 7671\u20137684. https:\/\/aclanthology.org\/2025.coling-main.513\/"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","unstructured":"Michelene T.\u00a0H. Chi Nicholas De\u00a0Leeuw Mei-Ling Chiu and Christiane LaVancher. 1994. Eliciting Self-Explanations Improves Understanding. Cognitive Science 18 3 (1994) 439\u2013477. 10.1207\/s15516709cog1803_3","DOI":"10.1207\/s15516709cog1803_3"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","unstructured":"Michelene T.\u00a0H. Chi and Amanda Wylie. 2014. The ICAP Framework: Linking Cognitive Engagement to Active Learning Outcomes. Educational Psychologist 49 4 (2014) 219\u2013243. 10.1080\/00461520.2014.965823","DOI":"10.1080\/00461520.2014.965823"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","unstructured":"Christine Chin. 2006. Classroom Interaction in Science: Teacher Questioning and Feedback to Students\u2019 Responses. International Journal of Science Education 28 11 (2006) 1315\u20131346. 10.1080\/09500690600621100","DOI":"10.1080\/09500690600621100"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","unstructured":"Jacob Cohen. 1960. A Coefficient of Agreement for Nominal Scales. Educational and Psychological Measurement 20 1 (1960) 37\u201346. 10.1177\/001316446002000104","DOI":"10.1177\/001316446002000104"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","unstructured":"Meghan\u00a0E. Dale Amanda\u00a0J. Godley Sarah\u00a0A. Capello Patrick\u00a0J. Donnelly Sidney\u00a0K. D\u2019Mello and Sean\u00a0P. Kelly. 2022. Toward the Automated Analysis of Teacher Talk in Secondary ELA Classrooms. Teaching and Teacher Education 110 (2022) 1\u201313. 10.1016\/j.tate.2021.103584","DOI":"10.1016\/j.tate.2021.103584"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","unstructured":"Edward\u00a0L. Deci Richard Koestner and Richard\u00a0M. Ryan. 1999. A Meta-Analytic Review of Experiments Examining the Effects of Extrinsic Rewards on Intrinsic Motivation. Psychological Bulletin 125 6 (1999) 627\u2013668. 10.1037\/0033-2909.125.6.627","DOI":"10.1037\/0033-2909.125.6.627"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","unstructured":"Yilun Du Shuang Li Antonio Torralba Joshua\u00a0B. Tenenbaum and Igor Mordatch. 2023. Improving Factuality and Reasoning in Language Models through Multi-Agent Debate. arXiv preprint (2023). 10.48550\/arXiv.2305.14325","DOI":"10.48550\/arXiv.2305.14325"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/2330601.2330616"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372862"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","unstructured":"Arthur\u00a0C. Graesser Philip Chipman Bradley\u00a0C. Haynes and Andrew Olney. 2004. AutoTutor: A Tutor with Dialogue in Natural Language. Behavior Research Methods Instruments & Computers 36 2 (2004) 180\u2013192. 10.3758\/BF03195563","DOI":"10.3758\/BF03195563"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1080\/00461520.2010.515933"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.findings-naacl.1"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","unstructured":"John Hattie and Helen Timperley. 2007. The Power of Feedback. Review of Educational Research 77 1 (2007) 81\u2013112. 10.3102\/003465430298487","DOI":"10.3102\/003465430298487"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","unstructured":"Jennifer Jacobs Kelly Scornavacco Charis Harty Abhijit Suresh Vivian Lai and Tamara Sumner. 2022. Promoting Rich Discussions in Mathematics Classrooms: Using Personalized Automated Feedback to Support Reflection and Instructional Change. Teaching and Teacher Education Article 103631 (2022). 10.1016\/j.tate.2022.103736","DOI":"10.1016\/j.tate.2022.103736"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3448139.3448168"},{"key":"e_1_3_3_2_22_2","volume-title":"Content Analysis: An Introduction to Its Methodology (2 ed.)","author":"Krippendorff Klaus","year":"2004","unstructured":"Klaus Krippendorff. 2004. Content Analysis: An Introduction to Its Methodology (2 ed.). Sage Publications, Thousand Oaks, CA."},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"J.\u00a0Richard Landis and Gary\u00a0G. Koch. 1977. The Measurement of Observer Agreement for Categorical Data. Biometrics 33 1 (1977) 159\u2013174. 10.2307\/2529310","DOI":"10.2307\/2529310"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Jinsook Lee Yann Hicke Renzhe Yu Christopher Brooks and Ren\u00e9\u00a0F Kizilcec. 2024. The life cycle of large language models in education: A framework for understanding sources of bias. British Journal of Educational Technology 55 5 (2024) 1982\u20132002.","DOI":"10.1111\/bjet.13505"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","unstructured":"Xiner Liu Andres\u00a0F. Zambrano Ryan\u00a0S. Baker et\u00a0al. 2025. Qualitative Coding with GPT-4: Where It Works Better. Journal of Learning Analytics 12 1 (2025) 169\u2013185. 10.18608\/jla.2025.8575","DOI":"10.18608\/jla.2025.8575"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-76335-9_7"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.5555\/3666122.3668141"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511811678"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"Mary\u00a0L. McHugh. 2012. Interrater Reliability: The Kappa Statistic. Biochemia Medica 22 3 (2012) 276\u2013282. 10.11613\/BM.2012.031","DOI":"10.11613\/BM.2012.031"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3657604.3664664"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v19i1.35883"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","unstructured":"Abhay Patwardhan et\u00a0al. 2025. Automated Consistency Analysis of LLMs. arXiv preprint (2025). arxiv:https:\/\/arXiv.org\/abs\/2502.0703610.48550\/arXiv.2502.07036","DOI":"10.48550\/arXiv.2502.07036"},{"key":"e_1_3_3_2_34_2","unstructured":"Barak Rosenshine. 2012. Principles of Instruction: Research-Based Strategies That All Teachers Should Know. American Educator 36 1 (2012) 12\u201319 39."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706468.3706501"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713120"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"Antonette Shibani Simon Knight and Simon\u00a0Buckingham Shum. 2021. Contextualizable Learning Analytics Design: A Generic Model and Practical Application. Journal of Learning Analytics 8 2 (2021) 28\u201356. 10.1145\/3303772.3303785","DOI":"10.1145\/3303772.3303785"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2303.11366"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"Valerie\u00a0J. Shute. 2008. Focus on Formative Feedback. Review of Educational Research 78 1 (2008) 153\u2013189. 10.3102\/0034654307313795","DOI":"10.3102\/0034654307313795"},{"key":"e_1_3_3_2_40_2","first-page":"4654","volume-title":"Proceedings of the 13th Language Resources and Evaluation Conference (LREC)","author":"Suresh Abhijit","year":"2022","unstructured":"Abhijit Suresh, Jennifer Jacobs, Charis Harty, Margaret Perkoff, James\u00a0H. Martin, and Tamara Sumner. 2022. The TalkMoves Dataset: K-12 Mathematics Lesson Transcripts Annotated for Teacher and Student Discursive Moves. In Proceedings of the 13th Language Resources and Evaluation Conference (LREC). ELRA, Marseille, France, 4654\u20134662. https:\/\/aclanthology.org\/2022.lrec-1.497.pdf"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.bea-1.11"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","unstructured":"Robert\u00a0H. Tai et\u00a0al. 2024. An Examination of the Use of Large Language Models to Aid Analysis of Textual Data. International Journal of Qualitative Methods 23 Article 16094069241231168 (2024). 10.1177\/16094069241231168","DOI":"10.1177\/16094069241231168"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","unstructured":"Kurt VanLehn. 2011. The Relative Effectiveness of Human Tutoring Intelligent Tutoring Systems and Other Tutoring Systems. Educational Psychologist 46 4 (2011) 197\u2013221. 10.1080\/00461520.2011.611369","DOI":"10.1080\/00461520.2011.611369"},{"key":"e_1_3_3_2_44_2","volume-title":"Mind in Society: The Development of Higher Psychological Processes","author":"Vygotsky Lev\u00a0S.","year":"1978","unstructured":"Lev\u00a0S. Vygotsky. 1978. Mind in Society: The Development of Higher Psychological Processes. Harvard University Press, Cambridge, MA."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3641960"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","unstructured":"Xuezhi Wang Jason Wei Dale Schuurmans Quoc Le Ed Chi Sharan Narang Aakanksha Chowdhery and Denny Zhou. 2022. Self-Consistency Improves Chain of Thought Reasoning in Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.11171 (2022). 10.48550\/arXiv.2203.11171","DOI":"10.48550\/arXiv.2203.11171"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","unstructured":"Cynthia Weston Terry Gandell Julie Beauchamp Lynn McAlpine Carolyn Wiseman and Caroline Beauchamp. 2001. Analyzing Interview Data: The Development and Evolution of a Coding System. Qualitative sociology 24 3 (2001) 381\u2013400. 10.1023\/A:1010690908200","DOI":"10.1023\/A:1010690908200"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","unstructured":"Alyssa\u00a0Friend Wise and David\u00a0Williamson Shaffer. 2015. Why Theory Matters More than Ever in the Age of Big Data. Journal of Learning Analytics 2 2 (2015) 5\u201313. 10.18608\/jla.2015.22.2","DOI":"10.18608\/jla.2015.22.2"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","unstructured":"David\u00a0J. Wood Jerome\u00a0S. Bruner and Gail Ross. 1976. The Role of Tutoring in Problem Solving. Journal of Child Psychology and Psychiatry 17 2 (1976) 89\u2013100. 10.1111\/j.1469-7610.1976.tb00381.x","DOI":"10.1111\/j.1469-7610.1976.tb00381.x"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-373594-2.X0001-9"},{"key":"e_1_3_3_2_51_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Ye Jiayi","year":"2025","unstructured":"Jiayi Ye, Yanbo Wang, Yue Huang, Dongping Chen, Qihui Zhang, Nuno Moniz, Tian Gao, Werner Geyer, Chao Huang, Pin-Yu Chen, Nitesh\u00a0V Chawla, and Xiangliang Zhang. 2025. Justice or Prejudice? Quantifying Biases in LLM-as-a-Judge. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=3GTtZFiajM"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"crossref","unstructured":"Bin Yu. 2024. After Computational Reproducibility: Scientific Reproducibility and Trustworthy AI. Harvard Data Science Review 6 3 (2024). https:\/\/hdsr.mitpress.mit.edu\/pub\/8qexde24\/release\/1","DOI":"10.1162\/99608f92.ea5e6f9a"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-47014-1_32"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","unstructured":"Luping Zheng et\u00a0al. 2024. A Survey on LLM-as-a-Judge. arXiv preprint (2024). arxiv:https:\/\/arXiv.org\/abs\/2411.1559410.48550\/arXiv.2411.15594","DOI":"10.48550\/arXiv.2411.15594"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.5555\/3666122.3668142"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","unstructured":"Shuyan Zhou Frank\u00a0F. Xu Hao Zhu Xuhui Zhou Robert Lo Abishek Sridhar Xianyi Cheng Yonatan Bisk Daniel Fried Uri Alon et\u00a0al. 2023. WebArena: A Realistic Web Environment for Building Autonomous Agents. arXiv preprint (2023). 10.48550\/arXiv.2307.13854","DOI":"10.48550\/arXiv.2307.13854"}],"event":{"name":"LAK 2026: LAK26: 16th International Learning Analytics and Knowledge Conference","location":"Bergen Norway","acronym":"LAK 2026"},"container-title":["Proceedings of the LAK26: 16th International Learning Analytics and Knowledge Conference"],"original-title":[],"deposited":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T09:40:24Z","timestamp":1777110024000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3785022.3785095"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,26]]},"references-count":55,"alternative-id":["10.1145\/3785022.3785095","10.1145\/3785022"],"URL":"https:\/\/doi.org\/10.1145\/3785022.3785095","relation":{},"subject":[],"published":{"date-parts":[[2026,4,26]]},"assertion":[{"value":"2026-04-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}