{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:27:33Z","timestamp":1773246453743,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031984587","type":"print"},{"value":"9783031984594","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-98459-4_3","type":"book-chapter","created":{"date-parts":[[2025,7,19]],"date-time":"2025-07-19T19:28:34Z","timestamp":1752953314000},"page":"31-45","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Reasoning and\u00a0Sampling-Augmented MCQ Difficulty Prediction via\u00a0LLMs"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1091-6306","authenticated-orcid":false,"given":"Wanyong","family":"Feng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3980-4684","authenticated-orcid":false,"given":"Peter","family":"Tran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2174-8777","authenticated-orcid":false,"given":"Stephen","family":"Sireci","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8475-6600","authenticated-orcid":false,"given":"Andrew S.","family":"Lan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"3_CR1","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1007\/978-3-030-78292-4_3","volume-title":"Artificial Intelligence in Education","author":"S AlKhuzaey","year":"2021","unstructured":"AlKhuzaey, S., Grasso, F., Payne, T.R., Tamma, V.: A systematic review of data-driven approaches to item difficulty prediction. In: Roll, I., McNamara, D., Sosnovsky, S., Luckin, R., Dimitrova, V. (eds.) AIED 2021. LNCS (LNAI), vol. 12748, pp. 29\u201341. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-78292-4_3"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"AlKhuzaey, S., Grasso, F., Payne, T.R., Tamma, V.: Text-based question difficulty prediction: a systematic review of automatic approaches. Int. J. Artif. Intell. Educ. 1\u201353 (2023)","DOI":"10.1007\/s40593-023-00362-1"},{"key":"3_CR3","unstructured":"Benedetto, L., Aradelli, G., Cremonesi, P., Cappelli, A., Giussani, A., Turrin, R.: On the application of transformers for estimating the difficulty of multiple-choice questions from text. In: Proceedings of the 16th Workshop on Innovative Use of NLP for Building Educational Applications, pp. 147\u2013157 (2021)"},{"key":"3_CR4","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1007\/978-3-030-52237-7_4","volume-title":"Artificial Intelligence in Education","author":"L Benedetto","year":"2020","unstructured":"Benedetto, L., Cappelli, A., Turrin, R., Cremonesi, P.: Introducing a framework to assess newly created questions with natural language processing. In: Bittencourt, I.I., Cukurova, M., Muldner, K., Luckin, R., Mill\u00e1n, E. (eds.) AIED 2020. LNCS (LNAI), vol. 12163, pp. 43\u201354. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-52237-7_4"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Benedetto, L., Cappelli, A., Turrin, R., Cremonesi, P.: R2de: a nlp approach to estimating irt parameters of newly generated questions. In: Proceedings of the Tenth International Conference on Learning Analytics & Knowledge, pp. 412\u2013421 (2020)","DOI":"10.1145\/3375462.3375517"},{"key":"3_CR6","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.623","volume":"7","author":"D Chicco","year":"2021","unstructured":"Chicco, D., Warrens, M.J., Jurman, G.: The coefficient of determination r-squared is more informative than smape, mae, mape, mse and rmse in regression analysis evaluation. Peerj Comput. Sci. 7, e623 (2021)","journal-title":"Peerj Comput. Sci."},{"issue":"1","key":"3_CR7","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1080\/15434303.2019.1674315","volume":"17","author":"IC Choi","year":"2020","unstructured":"Choi, I.C., Moon, Y.: Predicting the difficulty of efl tests based on corpus linguistic features and expert judgment. Lang. Assess. Q. 17(1), 18\u201342 (2020)","journal-title":"Lang. Assess. Q."},{"key":"3_CR8","unstructured":"Due\u00f1as, G., Jimenez, S., Ferro, G.M.: Upn-icc at bea 2024 shared task: leveraging llms for multiple-choice questions difficulty prediction. In: Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024), pp. 542\u2013550 (2024)"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Eignor, D.R.: The standards for educational and psychological testing (2013)","DOI":"10.1037\/14047-013"},{"key":"3_CR10","doi-asserted-by":"publisher","unstructured":"Feng, W., Ghosh, A., Sireci, S., Lan, A.S.: Balancing test accuracy and security in computerized adaptive testing. In: International Conference on Artificial Intelligence in Education, pp. 708\u2013713. Springer, Heidelberg (2023). https:\/\/doi.org\/10.1007\/978-3-031-36272-9_60","DOI":"10.1007\/978-3-031-36272-9_60"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Ghosh, A., Lan, A.: Bobcat: bilevel optimization-based computerized adaptive testing. In: Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence (IJCAI), pp. 2410\u20132417 (2021)","DOI":"10.24963\/ijcai.2021\/332"},{"issue":"6","key":"3_CR12","doi-asserted-by":"publisher","first-page":"969","DOI":"10.1016\/j.ipm.2018.06.007","volume":"54","author":"FY Hsu","year":"2018","unstructured":"Hsu, F.Y., Lee, H.M., Chang, T.H., Sung, Y.T.: Automated estimation of item difficulty for multiple-choice tests: an application of word embedding techniques. Inf. Process. Manag. 54(6), 969\u2013984 (2018)","journal-title":"Inf. Process. Manag."},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Hwang, K., Wang, K., Alomair, M., Choa, F.S., Chen, L.K.: Towards automated multiple choice question generation and evaluation: aligning with bloom\u2019s taxonomy. In: International Conference on Artificial Intelligence in Education, pp. 389\u2013396. Springer, Heidelberg (2024). https:\/\/doi.org\/10.1007\/978-3-031-64299-9_35","DOI":"10.1007\/978-3-031-64299-9_35"},{"key":"3_CR14","doi-asserted-by":"publisher","unstructured":"Jiao, Y., Shridhar, K., Cui, P., Zhou, W., Sachan, M.: Automatic educational question generation with difficulty level controls. In: International Conference on Artificial Intelligence in Education, pp. 476\u2013488. Springer, Heidelberg (2023). https:\/\/doi.org\/10.1007\/978-3-031-36272-9_39","DOI":"10.1007\/978-3-031-36272-9_39"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Kincaid, J.: Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel. Chief Naval Techn. Train. (1975)","DOI":"10.21236\/ADA006655"},{"key":"3_CR16","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1007\/s40593-019-00186-y","volume":"30","author":"G Kurdi","year":"2020","unstructured":"Kurdi, G., Leo, J., Parsia, B., Sattler, U., Al-Emari, S.: A systematic review of automatic question generation for educational purposes. Int. J. Artif. Intell. Educ. 30, 121\u2013204 (2020)","journal-title":"Int. J. Artif. Intell. Educ."},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Loginova, E., Benedetto, L., Benoit, D., Cremonesi, P.: Towards the application of calibrated transformers to the unsupervised estimation of question difficulty from text. In: RANLP 2021, pp. 846\u2013855. INCOMA (2021)","DOI":"10.26615\/978-954-452-072-4_097"},{"key":"3_CR18","unstructured":"Loukina, A., Yoon, S.Y., Sakano, J., Wei, Y., Sheehan, K.: Textual complexity as a predictor of difficulty of listening items in language proficiency tests. In: Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers, pp. 3245\u20133253 (2016)"},{"key":"3_CR19","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"3_CR20","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1007\/s40593-019-00180-4","volume":"29","author":"I Pandarova","year":"2019","unstructured":"Pandarova, I., Schmidt, T., Hartig, J., Boubekki, A., Jones, R.D., Brefeld, U.: Predicting the difficulty of exercise items for dynamic difficulty adaptation in adaptive language tutoring. Int. J. Artif. Intell. Educ. 29, 342\u2013367 (2019)","journal-title":"Int. J. Artif. Intell. Educ."},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Qiu, Z., Wu, X., Fan, W.: Question difficulty prediction for multiple choice problems in medical exams. In: Proceedings of the 28th ACM International Conference on Information and Knowledge Management, pp. 139\u2013148 (2019)","DOI":"10.1145\/3357384.3358013"},{"key":"3_CR22","unstructured":"Rogoz, A.C., Ionescu, R.T.: Unibucllm: harnessing llms for automated prediction of item difficulty and response time for multiple-choice questions. arXiv preprint arXiv:2404.13343 (2024)"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Rust, J., Golombok, S.: Modern Psychometrics: The Science of Psychological Assessment. Routledge, Abingdon (2014)","DOI":"10.4324\/9781315787527"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Salton, G.: Modern information retrieval (1983)","DOI":"10.1145\/182.358466"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Srivatsa, K., Kochmar, E.: What makes math word problems challenging for llms? arXiv preprint arXiv:2403.11369 (2024)","DOI":"10.18653\/v1\/2024.findings-naacl.72"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Thomas, D.R., et al.: Does multiple choice have a future in the age of generative AI? A posttest-only rct. arXiv preprint arXiv:2412.10267 (2024)","DOI":"10.1145\/3706468.3706530"},{"key":"3_CR27","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"482","DOI":"10.1007\/978-3-030-23204-7_40","volume-title":"Artificial Intelligence in Education","author":"M Ueno","year":"2019","unstructured":"Ueno, M., Miyazawa, Y.: Uniform adaptive testing using maximum clique algorithm. In: Isotani, S., Mill\u00e1n, E., Ogan, A., Hastings, P., McLaren, B., Luckin, R. (eds.) AIED 2019. LNCS (LNAI), vol. 11625, pp. 482\u2013493. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-23204-7_40"},{"issue":"7","key":"3_CR28","doi-asserted-by":"publisher","first-page":"3797","DOI":"10.1109\/TIT.2014.2320500","volume":"60","author":"T Van Erven","year":"2014","unstructured":"Van Erven, T., Harremos, P.: R\u00e9nyi divergence and kullback-leibler divergence. IEEE Trans. Inf. Theory 60(7), 3797\u20133820 (2014)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"3_CR29","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: Educational question mining at scale: prediction, analysis and personalization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 15669\u201315677 (2021)","DOI":"10.1609\/aaai.v35i17.17846"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Xue, K., Yaneva, V., Runyon, C., Baldwin, P.: Predicting the difficulty and response time of multiple choice questions using transfer learning. In: Proceedings of the Fifteenth Workshop on Innovative Use of NLP for Building Educational Applications, pp. 193\u2013197 (2020)","DOI":"10.18653\/v1\/2020.bea-1.20"},{"key":"3_CR31","doi-asserted-by":"crossref","unstructured":"Yaneva, V., Baldwin, P., Mee, J., et\u00a0al.: Predicting the difficulty of multiple choice questions in a high-stakes medical exam. In: Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications, pp. 11\u201320 (2019)","DOI":"10.18653\/v1\/W19-4402"},{"key":"3_CR32","unstructured":"Zenisky, A.L., et\u00a0al.: Massachusetts adult proficiency tests-college and career readiness (mapt-ccr) technical manual1 (2018)"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tao, C.: Multi-task bert for problem difficulty prediction. In: 2020 International Conference on Communications, Information System and Computer Engineering (CISCE), pp. 213\u2013216. IEEE (2020)","DOI":"10.1109\/CISCE50729.2020.00048"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-98459-4_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T16:39:27Z","timestamp":1757263167000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-98459-4_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031984587","9783031984594"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-98459-4_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"20 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Palermo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2025.itd.cnr.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}