{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:48:20Z","timestamp":1778759300702,"version":"3.51.4"},"publisher-location":"Cham","reference-count":55,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031984167","type":"print"},{"value":"9783031984174","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-98417-4_9","type":"book-chapter","created":{"date-parts":[[2025,7,17]],"date-time":"2025-07-17T10:05:28Z","timestamp":1752746728000},"page":"119-134","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["An LLM-Enhanced Multi-agent Architecture for\u00a0Conversation-Based Assessment"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1182-5839","authenticated-orcid":false,"given":"Xinying","family":"Hou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4830-5156","authenticated-orcid":false,"given":"Carol","family":"Forsyth","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0501-9782","authenticated-orcid":false,"given":"Jessica","family":"Andrews-Todd","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7976-5304","authenticated-orcid":false,"given":"James","family":"Rice","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2107-3378","authenticated-orcid":false,"given":"Zhiqiang","family":"Cai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2195-5776","authenticated-orcid":false,"given":"Yang","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0620-7622","authenticated-orcid":false,"given":"Diego","family":"Zapata-Rivera","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0345-6866","authenticated-orcid":false,"given":"Art","family":"Graesser","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,15]]},"reference":[{"key":"9_CR1","unstructured":"Langchain (2025). https:\/\/www.langchain.com\/"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Akhtar, H., Firdiyanti, R.: Test-taking motivation and performance: do self-report and time-based measures of effort reflect the same aspects of test-taking motivation? Learn. Individ. Differ. (2023)","DOI":"10.1016\/j.lindif.2023.102323"},{"key":"9_CR3","unstructured":"Chan, C.M., et al.: ChatEval: towards better LLM-based evaluators through multi-agent debate. arXiv preprint arXiv:2308.07201 (2023)"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Chen, E., Huang, R., Chen, H.S., Tseng, Y.H., Li, L.Y.: GPTutor: a ChatGPT-powered programming tool for code explanation. In: International Conference on Artificial Intelligence in Education, pp. 321\u2013327. Springer (2023)","DOI":"10.1007\/978-3-031-36336-8_50"},{"key":"9_CR5","unstructured":"Cheng, Y., et\u00a0al.: Exploring large language model based intelligent agents: definitions, methods, and prospects. arXiv preprint arXiv:2401.03428 (2024)"},{"key":"9_CR6","unstructured":"Dasgupta, I., et al.: Language models show human-like content effects on reasoning. arXiv preprint arXiv:2207.07051, 2(3) (2022)"},{"issue":"6","key":"9_CR7","doi-asserted-by":"publisher","first-page":"345","DOI":"10.3390\/info15060345","volume":"15","author":"S Filippi","year":"2024","unstructured":"Filippi, S., Motyl, B.: Large language models (LLMS) in engineering education: a systematic review and suggestions for practical adoption. Information 15(6), 345 (2024)","journal-title":"Information"},{"issue":"4","key":"9_CR8","doi-asserted-by":"publisher","first-page":"398","DOI":"10.1080\/09588221.2018.1517126","volume":"32","author":"CM Forsyth","year":"2019","unstructured":"Forsyth, C.M., Luce, C., Zapata-Rivera, D., Jackson, G.T., Evanini, K., So, Y.: Evaluating English language learners\u2019 conversations: man vs. machine. Comput. Assist. Lang. Learn. 32(4), 398\u2013417 (2019)","journal-title":"Comput. Assist. Lang. Learn."},{"key":"9_CR9","unstructured":"Forsyth, C.M., Zapata-Rivera, D., Graf, E.A., Jiang, Y.: Complex conversations: LLM vs. knowledge engineering conversation-based assessment (2024)"},{"key":"9_CR10","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1007\/s40593-015-0086-4","volume":"26","author":"AC Graesser","year":"2016","unstructured":"Graesser, A.C.: Conversations with AutoTutor help students learn. Int. J. Artif. Intell. Educ. 26, 124\u2013132 (2016)","journal-title":"Int. J. Artif. Intell. Educ."},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Graesser, A.C., Dowell, N., Clewley, D.: Assessing collaborative problem solving through conversational agents. In: Innovative Assessment of Collaboration, pp. 65\u201380 (2017)","DOI":"10.1007\/978-3-319-33261-1_5"},{"issue":"4","key":"9_CR12","first-page":"39","volume":"22","author":"AC Graesser","year":"2001","unstructured":"Graesser, A.C., VanLehn, K., Ros\u00e9, C.P., Jordan, P.W., Harter, D.: Intelligent tutoring systems with conversational dialogue. AI Mag. 22(4), 39\u201339 (2001)","journal-title":"AI Mag."},{"issue":"1","key":"9_CR13","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.stueduc.2012.04.001","volume":"38","author":"A Havnes","year":"2012","unstructured":"Havnes, A., Smith, K., Dysthe, O., Ludvigsen, K.: Formative assessment and feedback: making learning visible. Stud. Educ. Eval. 38(1), 21\u201327 (2012)","journal-title":"Stud. Educ. Eval."},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"He, J., Treude, C., Lo, D.: LLM-based multi-agent systems for software engineering: literature review, vision and the road ahead. ACM Trans. Softw. Eng. Methodol. (2025)","DOI":"10.1145\/3712003"},{"key":"9_CR15","unstructured":"Hong, S., et\u00a0al.: MetaGPT: meta programming for multi-agent collaborative framework. arXiv preprint arXiv:2308.00352 (2023)"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Hou, X., Wu, Z., Wang, X., Ericson, B.J.: CodeTailor: LLM-powered personalized parsons puzzles for engaging support while learning programming. In: Proceedings of the Eleventh ACM Conference on Learning@ Scale, pp. 51\u201362 (2024)","DOI":"10.1145\/3657604.3662032"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Irons, A., Elkington, S.: Enhancing Learning Through Formative Assessment and Feedback. Routledge (2021)","DOI":"10.4324\/9781138610514"},{"key":"9_CR18","unstructured":"Jiang, B., et al.: Multi-modal and multi-agent systems meet rationality: a survey. In: ICML 2024 Workshop on LLMs and Cognition (2024)"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Jin, H., Lee, S., Shin, H., Kim, J.: Teach AI how to code: using large language models as teachable agents for programming education. In: Proceedings of the CHI Conference on Human Factors in Computing Systems, pp. 1\u201328 (2024)","DOI":"10.1145\/3613904.3642349"},{"key":"9_CR20","unstructured":"Jurafsky, D., Martin, J.H.: Speech and Language Processing (2000)"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Kannan, S.S., Venkatesh, V.L., Min, B.C.: Smart-LLM: smart multi-agent robot task planning using large language models. In: 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 12140\u201312147. IEEE (2024)","DOI":"10.1109\/IROS58592.2024.10802322"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Landauer, T.K., McNamara, D.S., Dennis, S., Kintsch, W.: Handbook of Latent Semantic Analysis. Psychology Press (2007)","DOI":"10.4324\/9780203936399"},{"issue":"1","key":"9_CR23","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1109\/TLT.2018.2810878","volume":"11","author":"BA Lehman","year":"2018","unstructured":"Lehman, B.A., Zapata-Rivera, D.: Student emotions in conversation-based assessments. IEEE Trans. Learn. Technol. 11(1), 41\u201353 (2018)","journal-title":"IEEE Trans. Learn. Technol."},{"issue":"1","key":"9_CR24","first-page":"10","volume":"18","author":"L Liu","year":"2016","unstructured":"Liu, L., Steinberg, J., Qureshi, F., Bejar, I., Yan, F.: Conversation-based assessments: an innovative approach to measure scientific reasoning. Bull. IEEE Tech. Committee Learn. Technol. 18(1), 10\u201313 (2016)","journal-title":"Bull. IEEE Tech. Committee Learn. Technol."},{"issue":"1","key":"9_CR25","first-page":"1","volume":"2021","author":"AA Lopez","year":"2021","unstructured":"Lopez, A.A., Guzman-Orth, D., Zapata-Rivera, D., Forsyth, C.M., Luce, C.: Examining the accuracy of a conversation-based assessment in interpreting English learners\u2019 written responses. ETS Res. Rep. Ser. 2021(1), 1\u201315 (2021)","journal-title":"ETS Res. Rep. Ser."},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Lu, X., Wang, X.: Generative students: using LLM-simulated student profiles to support question item evaluation. In: Proceedings of the Eleventh ACM Conference on Learning@ Scale, pp. 16\u201327 (2024)","DOI":"10.1145\/3657604.3662031"},{"issue":"5","key":"9_CR27","first-page":"892","volume":"8","author":"S Manish","year":"2024","unstructured":"Manish, S.: An autonomous multi-agent LLM framework for agile software development. Int. J. Trend Sci. Res. Dev. 8(5), 892\u2013898 (2024)","journal-title":"Int. J. Trend Sci. Res. Dev."},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"McNamara, D.S., Graesser, A.C., McCarthy, P.M., Cai, Z.: Automated Evaluation of Text and Discourse with Coh-Metrix. Cambridge University Press (2014)","DOI":"10.1017\/CBO9780511894664"},{"key":"9_CR29","doi-asserted-by":"crossref","unstructured":"Mislevy, R.J., Almond, R.G., Lukas, J.F.: A brief introduction to evidence-centered design. ETS Res. Rep. Ser. 2003(1), i\u201329 (2003)","DOI":"10.1002\/j.2333-8504.2003.tb01908.x"},{"key":"9_CR30","doi-asserted-by":"crossref","unstructured":"Mislevy, R.J., Riconscente, M.M.: Evidence-centered assessment design. In: Handbook of Test Development, pp. 75\u2013104. Routledge (2011)","DOI":"10.4324\/9780203874776-9"},{"key":"9_CR31","doi-asserted-by":"crossref","unstructured":"Nguyen, H.A., Stec, H., Hou, X., Di, S., McLaren, B.M.: Evaluating ChatGPT\u2019s decimal skills and feedback generation in a digital learning game. In: European Conference on Technology Enhanced Learning, pp. 278\u2013293. Springer (2023)","DOI":"10.1007\/978-3-031-42682-7_19"},{"key":"9_CR32","unstructured":"Nye, B.D., Mee, D., Core, M.G.: Generative large language models for dialog-based tutoring: an early consideration of opportunities and concerns. In: LLM@ AIED, pp. 78\u201388 (2023)"},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Shao, Z., Yuan, S., Gao, L., He, Y., Yang, D., Chen, S.: Unlocking scientific concepts: how effective are LLM-generated analogies for student understanding and classroom practice? arXiv preprint arXiv:2502.16895 (2025)","DOI":"10.1145\/3706598.3714313"},{"issue":"2","key":"9_CR34","first-page":"1","volume":"2015","author":"KM Sheehan","year":"2015","unstructured":"Sheehan, K.M., Flor, M., Napolitano, D., Ramineni, C.: Using textevaluator\u00ae to quantify sources of linguistic complexity in textbooks targeted at first-grade readers over the past half century. ETS Res. Rep. Ser. 2015(2), 1\u201317 (2015)","journal-title":"ETS Res. Rep. Ser."},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Stamper, J., Xiao, R., Hou, X.: Enhancing LLM-based feedback: insights from intelligent tutoring systems and the learning sciences. In: International Conference on Artificial Intelligence in Education, pp. 32\u201343. Springer (2024)","DOI":"10.1007\/978-3-031-64315-6_3"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Sun, G., Zhan, N., Such, J.: Building better AI agents: a provocation on the utilisation of persona in LLM-based conversational agents. In: Proceedings of the 6th International Conference on Conversational User Interfaces, CUI 2024 (2024)","DOI":"10.1145\/3640794.3665887"},{"key":"9_CR37","unstructured":"Talebirad, Y., Nadiri, A.: Multi-agent collaboration: harnessing the power of intelligent LLM agents. arXiv preprint arXiv:2306.03314 (2023)"},{"key":"9_CR38","unstructured":"Tao, W., Zhou, Y., Wang, Y., Zhang, W., Zhang, H., Cheng, Y.: MAGIS: LLM-based multi-agent framework for GitHub issue resolution. arXiv preprint arXiv:2403.17927 (2024)"},{"key":"9_CR39","doi-asserted-by":"crossref","unstructured":"Tovani, C.: So What Do They Really Know?: Assessment that Informs Teaching and Learning. Routledge (2023)","DOI":"10.4324\/9781032682389"},{"issue":"2","key":"9_CR40","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1080\/10627197.2020.1858786","volume":"26","author":"E Ulitzsch","year":"2021","unstructured":"Ulitzsch, E., Penk, C., von Davier, M., Pohl, S.: Model meets reality: validating a new behavioral measure for test-taking effort. Educ. Assess. 26(2), 104\u2013124 (2021)","journal-title":"Educ. Assess."},{"key":"9_CR41","doi-asserted-by":"crossref","unstructured":"Wang, T., et al.: LLM-powered multi-agent framework for goal-oriented learning in intelligent tutoring system. arXiv preprint arXiv:2501.15749 (2025)","DOI":"10.1145\/3701716.3715244"},{"issue":"2","key":"9_CR42","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1177\/07356331241305519","volume":"63","author":"X Wei","year":"2025","unstructured":"Wei, X., Wang, L., Lee, L.K., Liu, R.: Multiple generative AI pedagogical agents in augmented reality environments: a study on implementing the 5E model in science education. J. Educ. Comput. Res. 63(2), 336\u2013371 (2025)","journal-title":"J. Educ. Comput. Res."},{"issue":"1","key":"9_CR43","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.stueduc.2011.03.001","volume":"37","author":"D Wiliam","year":"2011","unstructured":"Wiliam, D.: What is assessment for learning? Stud. Educ. Eval. 37(1), 3\u201314 (2011)","journal-title":"Stud. Educ. Eval."},{"issue":"4","key":"9_CR44","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1080\/08957347.2017.1353992","volume":"30","author":"SL Wise","year":"2017","unstructured":"Wise, S.L., Gao, L.: A general approach to measuring test-taking effort on computer-based tests. Appl. Measur. Educ. 30(4), 343\u2013354 (2017)","journal-title":"Appl. Measur. Educ."},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Xiao, R., Hou, X., Stamper, J.: Exploring how multiple levels of GPT-generated programming hints support or disappoint novices. In: Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1\u201310 (2024)","DOI":"10.1145\/3613905.3650937"},{"key":"9_CR46","doi-asserted-by":"crossref","unstructured":"Yildirim-Erbasli, S., Bulut, O.: Innovating assessment with conversational agents: a technology-enhanced approach to formative assessments. In: 2023 IEEE International Conference on Advanced Learning Technologies (ICALT), pp. 331\u2013335. IEEE (2023)","DOI":"10.1109\/ICALT58122.2023.00103"},{"key":"9_CR47","unstructured":"Yildirim-Erbasli, S.N., Bulut, O.: Designing predictive models for early prediction of students\u2019 test-taking engagement in computerized formative assessments. J. Appl. Test. Technol. (2022)"},{"key":"9_CR48","doi-asserted-by":"publisher","first-page":"100135","DOI":"10.1016\/j.caeai.2023.100135","volume":"4","author":"SN Yildirim-Erbasli","year":"2023","unstructured":"Yildirim-Erbasli, S.N., Bulut, O.: Conversation-based assessment: a novel approach to boosting test-taking effort in digital formative assessment. Comput. Educ. Artif. Intell. 4, 100135 (2023)","journal-title":"Comput. Educ. Artif. Intell."},{"issue":"1","key":"9_CR49","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1177\/00472395231178943","volume":"52","author":"SN Yildirim-Erbasli","year":"2023","unstructured":"Yildirim-Erbasli, S.N., Bulut, O., Demmans Epp, C., Cui, Y.: Conversation-based assessments in education: design, implementation, and cognitive walkthroughs for usability testing. J. Educ. Technol. Syst. 52(1), 27\u201351 (2023)","journal-title":"J. Educ. Technol. Syst."},{"issue":"7\u20138","key":"9_CR50","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1080\/13803611.2021.1977152","volume":"26","author":"SN Yildirim-Erbasli","year":"2020","unstructured":"Yildirim-Erbasli, S.N., Bulut, O.: The impact of students\u2019 test-taking effort on growth estimates in low-stakes educational assessments. Educ. Res. Eval. 26(7\u20138), 368\u2013386 (2020)","journal-title":"Educ. Res. Eval."},{"key":"9_CR51","unstructured":"Yue, M., Mifdal, W., Zhang, Y., Suh, J., Yao, Z.: MathVC: an LLM-simulated multi-character virtual classroom for mathematics education (2024)"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Zapata-Rivera, D., Forsyth, C.M.: Learner modeling in conversation-based assessment. In: International Conference on Human-Computer Interaction, pp. 73\u201383. Springer (2022)","DOI":"10.1007\/978-3-031-05887-5_6"},{"key":"9_CR53","unstructured":"Zapata-Rivera, D., Forsyth, C., Graf, A., Jiang, Y.: Designing and evaluating evidence-centered-design-based conversations for assessment with LLMs. In: Proceedings of EDM 2024 Workshop: Leveraging Large Language Models for Next Generation Educational Technologies (2024)"},{"key":"9_CR54","doi-asserted-by":"crossref","unstructured":"Zapata-Rivera, D., Liu, L., Chen, L., Hao, J., von Davier, A.A.: Assessing science inquiry skills in an immersive, conversation-based scenario. In: Big Data and Learning Analytics in Higher Education: Current Theory and Practice, pp. 237\u2013252 (2017)","DOI":"10.1007\/978-3-319-06520-5_14"},{"key":"9_CR55","doi-asserted-by":"crossref","unstructured":"Zapata-Rivera, D., Sparks, J.R., Forsyth, C.M., Lehman, B.: Conversation-based assessment: current findings and future work (2023)","DOI":"10.1016\/B978-0-12-818630-5.10063-6"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-98417-4_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T12:55:14Z","timestamp":1757249714000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-98417-4_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031984167","9783031984174"],"references-count":55,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-98417-4_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Palermo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2025.itd.cnr.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}