{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T06:22:08Z","timestamp":1781763728368,"version":"3.54.5"},"reference-count":88,"publisher":"Elsevier BV","issue":"3","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T00:00:00Z","timestamp":1777248000000},"content-version":"vor","delay-in-days":603,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/100018948","name":"Saudi Arabian Cultural Bureau","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100018948","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006701","name":"Umm Al-Qura University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006701","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["International Journal of Artificial Intelligence in Education"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s40593-023-00362-1","type":"journal-article","created":{"date-parts":[[2023,9,8]],"date-time":"2023-09-08T12:16:09Z","timestamp":1694175369000},"page":"862-914","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":37,"title":["Text-based Question Difficulty Prediction: A Systematic Review of Automatic Approaches"],"prefix":"10.1016","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8883-1172","authenticated-orcid":false,"given":"Samah","family":"AlKhuzaey","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8419-6554","authenticated-orcid":false,"given":"Floriana","family":"Grasso","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0106-8731","authenticated-orcid":false,"given":"Terry R.","family":"Payne","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1320-610X","authenticated-orcid":false,"given":"Valentina","family":"Tamma","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1007\/s40593-023-00362-1_bib1","doi-asserted-by":"crossref","unstructured":"AlKhuzaey, S., Grasso, F., Payne, T. R., & Tamma, V. (2021). A systematic review of data-driven approaches to item difficulty prediction. In International Conference on Artificial Intelligence in Education (pp. 29\u201341). Springer.","DOI":"10.1007\/978-3-030-78292-4_3"},{"key":"10.1007\/s40593-023-00362-1_bib2","doi-asserted-by":"crossref","unstructured":"Alsubait, T., Parsia, B., & Sattler, U. (2013). A similarity-based theory of controlling MCQ difficulty. In 2013 Second International Conference on Elearning and E-Technologies in Education (ICEEE) (pp. 283\u2013288). IEEE.","DOI":"10.1109\/ICeLeTE.2013.6644389"},{"issue":"2","key":"10.1007\/s40593-023-00362-1_bib3","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1007\/s13218-015-0405-9","article-title":"Ontology-based multiple choice question generation","volume":"30","author":"Alsubait","year":"2016","journal-title":"KI-K\u00fcnstliche Intelligenz"},{"key":"10.1007\/s40593-023-00362-1_bib4","doi-asserted-by":"crossref","unstructured":"Amidei, J., Piwek, P., & Willis, A. (2018). Evaluation methodologies in automatic question generation 2013\u20132018. In Proceedings of the 11th International Natural Language Generation Conference (pp. 307\u2013317).","DOI":"10.18653\/v1\/W18-6537"},{"key":"10.1007\/s40593-023-00362-1_bib5","doi-asserted-by":"crossref","unstructured":"Aryadoust, V. (2013). Predicting item difficulty in a language test with an adaptive neuro fuzzy inference system. In IEEE Workshop on Hybrid Intelligent Models and Applications (HIMA) (Vol. 2013, pp. 43\u201350).","DOI":"10.1109\/HIMA.2013.6615021"},{"key":"10.1007\/s40593-023-00362-1_bib6","series-title":"The basics of item response theory using R","author":"Baker","year":"2017"},{"key":"10.1007\/s40593-023-00362-1_bib7","doi-asserted-by":"crossref","first-page":"517","DOI":"10.1162\/tacl_a_00200","article-title":"Predicting the difficulty of language proficiency tests","volume":"2","author":"Beinborn","year":"2014","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"10.1007\/s40593-023-00362-1_bib8","doi-asserted-by":"crossref","unstructured":"Beinborn, L., Zesch, T., & Gurevych, I. (2015). Candidate evaluation strategies for improved difficulty prediction of language tests. In Proceedings of the Tenth Workshop on Innovative use of NLP for Building Educational Applications (pp. 1\u201311).","DOI":"10.3115\/v1\/W15-0601"},{"key":"10.1007\/s40593-023-00362-1_bib9","unstructured":"Benedetto, L., Aradelli, G., Cremonesi, P., Cappelli, A., Giussani, A., & Turrin, R. (2021). On the application of transformers for estimating the difficulty of multiple-choice questions from text. In Proceedings of the 16th Workshop on Innovative Use of NLP for Building Educational Applications (pp. 147\u2013157)."},{"key":"10.1007\/s40593-023-00362-1_bib10","doi-asserted-by":"crossref","unstructured":"Benedetto, L., Cappelli, A., Turrin, R., & Cremonesi, P. (2020a). Introducing a framework to assess newly created questions with natural language processing. In I. I. Bittencourt, M. Cukurova, K. Muldner, R. Luckin, & E. Mill\u00e1n (Eds.), Artificial intelligence in education (pp. 43\u201354). Springer.","DOI":"10.1007\/978-3-030-52237-7_4"},{"key":"10.1007\/s40593-023-00362-1_bib11","doi-asserted-by":"crossref","unstructured":"Benedetto, L., Cappelli, A., Turrin, R., & Cremonesi, P. (2020b). R2DE: A NLP approach to estimating IRT parameters of newly generated questions. In Proceedings of the 10th International Conference on Learning Analytics & Knowledge (pp. 412\u2013421).","DOI":"10.1145\/3375462.3375517"},{"issue":"9","key":"10.1007\/s40593-023-00362-1_bib12","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3556538","article-title":"A survey on recent approaches to question difficulty estimation from text","volume":"55","author":"Benedetto","year":"2023","journal-title":"ACM Computing Surveys"},{"key":"10.1007\/s40593-023-00362-1_bib13","doi-asserted-by":"crossref","unstructured":"Bi, S., Cheng, X., Li, Y.-F., Qu, L., Shen, S., Qi, G., \u2026 Jiang, Y. (2021). Simple or complex? complexity-controllable question generation with soft templates and deep mixture of experts model. arXivpreprint arXiv:2110.06560","DOI":"10.18653\/v1\/2021.findings-emnlp.397"},{"issue":"2","key":"10.1007\/s40593-023-00362-1_bib14","doi-asserted-by":"crossref","first-page":"i","DOI":"10.1002\/j.2333-8504.1998.tb01786.x","article-title":"GRE analytical reasoning item statistics prediction study","volume":"1998","author":"Boldt","year":"1998","journal-title":"ETS Research Report Series"},{"issue":"2","key":"10.1007\/s40593-023-00362-1_bib15","doi-asserted-by":"crossref","first-page":"i","DOI":"10.1002\/j.2333-8504.1996.tb01709.x","article-title":"Using a neural net to predict item difficulty","volume":"1996","author":"Boldt","year":"1996","journal-title":"ETS Research Report Series"},{"key":"10.1007\/s40593-023-00362-1_bib16","doi-asserted-by":"crossref","unstructured":"Cheng, S., Liu, Q., Chen, E., Huang, Z., Huang, Z., Chen, Y., & Hu, G. (2019). DIRT: Deep learning enhanced item response theory for cognitive diagnosis. In Proceedings of the 28th ACM International Conference on Information and Knowledge Management (pp. 2397\u20132400).","DOI":"10.1145\/3357384.3358070"},{"issue":"1","key":"10.1007\/s40593-023-00362-1_bib17","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1080\/15434303.2019.1674315","article-title":"Predicting the difficulty of EFL tests based on corpus linguistic features and expert judgment","volume":"17","author":"Choi","year":"2020","journal-title":"Language Assessment Quarterly"},{"issue":"4","key":"10.1007\/s40593-023-00362-1_bib18","doi-asserted-by":"crossref","first-page":"497","DOI":"10.1080\/14640748108400805","article-title":"The MRC psycholinguistic database","volume":"33","author":"Coltheart","year":"1981","journal-title":"The Quarterly Journal of Experimental Psychology Section A"},{"issue":"2","key":"10.1007\/s40593-023-00362-1_bib19","doi-asserted-by":"crossref","first-page":"594","DOI":"10.1016\/j.eswa.2013.07.084","article-title":"An empirical study on the quantitative notion of task difficulty","volume":"41","author":"Conejo","year":"2014","journal-title":"Expert Systems with Applications"},{"key":"10.1007\/s40593-023-00362-1_bib20","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K. (2018). Bert: Pre-training of deep bidirectional transformers for language understanding. arXivpreprint arXiv:1810.04805"},{"key":"10.1007\/s40593-023-00362-1_bib21","unstructured":"Dhillon, D. (2011). Predictive models of question difficulty-a critical review of the literature. The Assessment and Qualifications Alliance, 21"},{"issue":"1","key":"10.1007\/s40593-023-00362-1_bib22","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1080\/09585176.2016.1232201","article-title":"Predicting item difficulty of science national curriculum tests: The case of key stage 2 assessments","volume":"28","author":"El Masri","year":"2017","journal-title":"The Curriculum Journal"},{"key":"10.1007\/s40593-023-00362-1_bib23","doi-asserted-by":"crossref","unstructured":"Faizan, A., & Lohmann, S. (2018). Automatic generation of multiple choice questions from slide content using linked data. In Proceedings of the 8th International Conference on Web Intelligence, Mining and Semantics (pp. 1\u20138).","DOI":"10.1145\/3227609.3227656"},{"key":"10.1007\/s40593-023-00362-1_bib24","doi-asserted-by":"crossref","unstructured":"Fei, T., Heng, W. J., Toh, K. C., & Qi, T. (2003). Question classification for e-learning by artificial neural network. In Fourth international conference on information, communications and signal processing, 2003 and the fourth pacific rim conference on multimedia. Proceedings of the 2003 joint (Vol. 3, pp. 1757\u20131761).","DOI":"10.1109\/ICICS.2003.1292768"},{"key":"10.1007\/s40593-023-00362-1_bib25","doi-asserted-by":"crossref","unstructured":"Felice, M., & Buttery, P. (2019). Entropy as a proxy for gap complexity in open cloze tests. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019) (pp. 323\u2013327).","DOI":"10.26615\/978-954-452-056-4_037"},{"issue":"3","key":"10.1007\/s40593-023-00362-1_bib26","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1037\/h0057532","article-title":"A new readability yardstick","volume":"32","author":"Flesch","year":"1948","journal-title":"Journal of Applied Psychology"},{"key":"10.1007\/s40593-023-00362-1_bib27","doi-asserted-by":"crossref","unstructured":"Franzen, M. (2011). Item difficulty. Encyclopedia of Clinical Neuropsychology, 100\u2013100.","DOI":"10.1007\/978-0-387-79948-3_1208"},{"key":"10.1007\/s40593-023-00362-1_bib28","doi-asserted-by":"crossref","unstructured":"Gao, Y., Bing, L., Chen, W., Lyu, M.R., King, I. (2018). Difficulty controllable generation of reading comprehension questions. arXivpreprint arXiv:1807.03586","DOI":"10.24963\/ijcai.2019\/690"},{"issue":"3","key":"10.1007\/s40593-023-00362-1_bib29","doi-asserted-by":"crossref","first-page":"347","DOI":"10.1007\/s10462-013-9417-8","article-title":"Teaching assistance and automatic difficulty estimation in converting first order logic to clause form","volume":"42","author":"Grivokostopoulou","year":"2014","journal-title":"Artificial Intelligence Review"},{"key":"10.1007\/s40593-023-00362-1_bib30","doi-asserted-by":"crossref","unstructured":"Grivokostopoulou, F., Perikos, I., & Hatzilygeroudis, I. (2015). Estimating the difficulty of exercises on search algorithms using a neuro-fuzzy approach. In 2015 IEEE 27th International Conference on Tools with Artificial Intelligence (ICTAI) (pp. 866\u2013872).","DOI":"10.1109\/ICTAI.2015.126"},{"key":"10.1007\/s40593-023-00362-1_bib31","doi-asserted-by":"crossref","unstructured":"Grivokostopoulou, F., Perikos, I., & Hatzilygeroudis, I. (2017). Difficulty estimation of exercises on tree-based search algorithms using neuro-fuzzy and neuro-symbolic approaches. In Advances in combining intelligent methods (pp. 75\u201391). Springer.","DOI":"10.1007\/978-3-319-46200-4_4"},{"key":"10.1007\/s40593-023-00362-1_bib32","doi-asserted-by":"crossref","unstructured":"Ha, V., Baldwin, P., Mee, J., et al. (2019). Predicting the difficulty of multiple choice questions in a high-stakes medical exam. In Proceedings of the 14th workshop on innovative use of NLP for building educational applications (pp. 11\u201320).","DOI":"10.18653\/v1\/W19-4402"},{"issue":"3","key":"10.1007\/s40593-023-00362-1_bib33","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1111\/j.1745-3992.1993.tb00543.x","article-title":"Comparison of classical test theory and item response theory and their applications to test development","volume":"12","author":"Hambleton","year":"1993","journal-title":"Educational Measurement: Issues and Practice"},{"key":"10.1007\/s40593-023-00362-1_bib34","doi-asserted-by":"crossref","unstructured":"He, J., Peng, L., Sun, B., Yu, L., & Zhang, Y. (2021). Automatically predict question difficulty for reading comprehension exercises. In 2021 IEEE 33rd International Conference on Tools with Artificial Intelligence (ICTAI) (pp. 1398\u20131402).","DOI":"10.1109\/ICTAI52525.2021.00222"},{"key":"10.1007\/s40593-023-00362-1_bib35","unstructured":"Hoshino, A., & Nakagawa, H. (2010). Predicting the difficulty of multiple-choice close questions for computer-adaptive testing. In Proceedings of the 11th International Conference on Intelligent Text Processing and Computational Linguistics, 46 (pp. 279\u2013292)."},{"key":"10.1007\/s40593-023-00362-1_bib36","doi-asserted-by":"crossref","unstructured":"Hou, J., Koppatz, M., Hoya Quecedo, J. M., Stoyanova, N., Kopotev, M., & Yangarber, R. (2019). Modeling language learning using specialized Elo ratings. Innovative use of NLP for Building Educational Applications, 494\u2013506.","DOI":"10.18653\/v1\/W19-4451"},{"issue":"6","key":"10.1007\/s40593-023-00362-1_bib37","doi-asserted-by":"crossref","first-page":"969","DOI":"10.1016\/j.ipm.2018.06.007","article-title":"Automated estimation of item difficulty for multiple-choice tests: An application of word embedding techniques","volume":"54","author":"Hsu","year":"2018","journal-title":"Information Processing & Management"},{"key":"10.1007\/s40593-023-00362-1_bib38","doi-asserted-by":"crossref","unstructured":"Huang, Z., Liu, Q., Chen, E., Zhao, H., Gao, M., Wei, S., Hu, G. (2017). Question difficulty prediction for reading problems in standard tests. Proceedings of the AAAI Conference on Artificial Intelligence, 31(1), 1352\u20131359 AAAI.","DOI":"10.1609\/aaai.v31i1.10740"},{"key":"10.1007\/s40593-023-00362-1_bib39","doi-asserted-by":"crossref","unstructured":"Hutzler, D., David, E., Avigal, M., & Azoulay, R. (2014). Learning methods for rating the difficulty of reading comprehension questions. In 2014 IEEE International Conference on Software Science (pp. 54\u201362). Technology and Engineering.","DOI":"10.1109\/SWSTE.2014.16"},{"key":"10.1007\/s40593-023-00362-1_bib40","unstructured":"Keele, S. (2007). Guidelines for performing systematic literature reviews in software engineering. In Technical Report Technical report, Ver. 2.3 EBSE Technical Report."},{"issue":"3","key":"10.1007\/s40593-023-00362-1_bib41","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1108\/IJILT-09-2017-0085","article-title":"Generating story problems via controlled parameters in a web-based intelligent tutoring system","volume":"35","author":"Khodeir","year":"2018","journal-title":"The International Journal of Information and Learning Technology"},{"key":"10.1007\/s40593-023-00362-1_bib42","doi-asserted-by":"crossref","unstructured":"Kincaid, J. P., Fishburne, R. P., Jr., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count and Flesch reading ease formula) for navy enlisted personnel (Tech. Rep.), CNTECHTRA Research Branch Report (pp. 8\u201375).","DOI":"10.21236\/ADA006655"},{"key":"10.1007\/s40593-023-00362-1_bib43","doi-asserted-by":"crossref","unstructured":"Kurdi, G., Leo, J., Matentzoglu, N., Parsia, B., Sattler, U., Forge, S., \u2026 Dowling, W. (2021). A comparative study of methods for a priori prediction of MCQ difficulty. Semantic Web, 12(3), 449\u2013465","DOI":"10.3233\/SW-200390"},{"key":"10.1007\/s40593-023-00362-1_bib44","doi-asserted-by":"crossref","unstructured":"Lai, G., Xie, Q., Liu, H., Yang, Y., Hovy, E. (2017). Race: Large-scale reading comprehension dataset from examinations. arXivpreprint arXiv:1704.04683","DOI":"10.18653\/v1\/D17-1082"},{"key":"10.1007\/s40593-023-00362-1_bib45","doi-asserted-by":"crossref","unstructured":"Lee, J.-U., Schwan, E., Meyer, C. M. (2019). Manipulating the difficulty of c-tests. arXivpreprint arXiv:1906.06905","DOI":"10.18653\/v1\/P19-1035"},{"key":"10.1007\/s40593-023-00362-1_bib46","doi-asserted-by":"crossref","unstructured":"Leo, J., Kurdi, G., Matentzoglu, N., Parsia, B., Sattler, U., Forge, S., \u2026 Dowling, W. (2019). Ontology-based generation of medical, multiterm mcqs. International Journal of Artificial Intelligence in Education, 29(2), 145\u2013188","DOI":"10.1007\/s40593-018-00172-w"},{"key":"10.1007\/s40593-023-00362-1_bib47","doi-asserted-by":"crossref","unstructured":"Lin, C., Liu, D., Pang, W., & Apeh, E. (2015). Automatically predicting quiz difficulty level using similarity measures. In Proceedings of the 8th international conference on knowledge capture (pp. 1\u20138).","DOI":"10.1145\/2815833.2815842"},{"key":"10.1007\/s40593-023-00362-1_bib48","doi-asserted-by":"crossref","unstructured":"Lin, L.-H., Chang, T.-H., & Hsu, F.-Y. (2019). Automated prediction of item difficulty in reading comprehension using long short-term memory. In 2019 International Conference on Asian Language Processing (IALP) (pp. 132\u2013135). IEEE.","DOI":"10.1109\/IALP48816.2019.9037716"},{"key":"10.1007\/s40593-023-00362-1_bib49","doi-asserted-by":"crossref","unstructured":"Ling, T., Kang, B. H., Johns, D. P., Walls, J., & Bindoff, I. (2008). Expert-driven knowledge discovery. In Fifth International Conference on Information Technology: New Generations (ITNG 2008) (pp. 174\u2013178). IEEE.","DOI":"10.1109\/ITNG.2008.194"},{"key":"10.1007\/s40593-023-00362-1_bib50","doi-asserted-by":"crossref","unstructured":"Loginova, E., Benedetto, L., Benoit, D., & Cremonesi, P. (2021). Towards the application of calibrated transformers to the unsupervised estimation of question difficulty from text. In RANLP 2021 (pp. 846\u2013855). INCOMA.","DOI":"10.26615\/978-954-452-072-4_097"},{"key":"10.1007\/s40593-023-00362-1_bib51","unstructured":"Loukina, A., Yoon, S.-Y., Sakano, J., Wei, Y., & Sheehan, K. (2016). Textual complexity as a predictor of difficulty of listening items in language proficiency tests. In Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: TECHNICAL papers (pp. 3245\u20133253)."},{"key":"10.1007\/s40593-023-00362-1_bib52","doi-asserted-by":"crossref","unstructured":"Manning, C. D., Surdeanu, M., Bauer, J., Finkel, J. R., Bethard, S., & McClosky, D. (2014). The Stanford CoreNLP natural language processing toolkit. In Proceedings of 52nd annual meeting of the association for computational linguistics: system demonstrations (pp. 55\u201360).","DOI":"10.3115\/v1\/P14-5010"},{"key":"10.1007\/s40593-023-00362-1_bib53","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J. (2013a). Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781"},{"key":"10.1007\/s40593-023-00362-1_bib54","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G., & Dean, J. (2013b). Distributed representations of words and phrases and their compositionality. In Proceedings of the 26th international conference on neural information processing systems (Vol. 2, pp. 3111\u20133119). Curran Associates Inc."},{"key":"10.1007\/s40593-023-00362-1_bib55","doi-asserted-by":"crossref","unstructured":"Mou, L., Li, G., Zhang, L., Wang, T., & Jin, Z. (2016). Convolutional neural networks over tree structures for programming language processing. Thirtieth AAAI Conference on Artificial Intelligence, 30(1), 1287\u20131293.","DOI":"10.1609\/aaai.v30i1.10139"},{"issue":"3","key":"10.1007\/s40593-023-00362-1_bib56","doi-asserted-by":"crossref","first-page":"342","DOI":"10.1007\/s40593-019-00180-4","article-title":"Predicting the difficulty of exercise items for dynamic difficulty adaptation in adaptive language tutoring","volume":"29","author":"Pandarova","year":"2019","journal-title":"International Journal of Artificial Intelligence in Education"},{"key":"10.1007\/s40593-023-00362-1_bib57","doi-asserted-by":"crossref","unstructured":"P\u00e9rez, E. V., Santos, L. M. R., P\u00e9rez, M. J. V., de Castro Fern\u00e1ndez, J. P., & Mart\u00edn, R. G. (2012). Automatic classification of question difficulty level: Teachers\u2019 estimation vs. students\u2019 perception. In 2012 Frontiers in Education Conference Proceedings (pp. 1\u20135). IEEE.","DOI":"10.1109\/FIE.2012.6462398"},{"key":"10.1007\/s40593-023-00362-1_bib58","doi-asserted-by":"crossref","unstructured":"Perikos, I., Grivokostopoulou, F., Hatzilygeroudis, I., & Kovas, K. (2011). Difficulty estimator for converting natural language into first order logic. In Intelligent Decision Technologies (pp. 135\u2013144).","DOI":"10.1007\/978-3-642-22194-1_14"},{"issue":"6","key":"10.1007\/s40593-023-00362-1_bib59","doi-asserted-by":"crossref","first-page":"569","DOI":"10.1111\/exsy.12182","article-title":"Automatic estimation of exercises\u2019 difficulty levels in a tutoring system for teaching the conversion of natural language into first-order logic","volume":"33","author":"Perikos","year":"2016","journal-title":"Expert Systems"},{"issue":"1","key":"10.1007\/s40593-023-00362-1_bib60","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1177\/026553229501200103","article-title":"Predicting item difficulty in a reading comprehension test with an artificial neural network","volume":"12","author":"Perkins","year":"1995","journal-title":"Language Testing"},{"key":"10.1007\/s40593-023-00362-1_bib61","doi-asserted-by":"crossref","unstructured":"Peters, M.E., Ammar, W., Bhagavatula, C., Power, R. (2017). Semi-supervised sequence tagging with bidirectional language models. arXiv preprint arXiv:1705.00108","DOI":"10.18653\/v1\/P17-1161"},{"key":"10.1007\/s40593-023-00362-1_bib62","doi-asserted-by":"crossref","unstructured":"Qiu, Z., Wu, X., & Fan, W. (2019). Question difficulty prediction for multiple choice problems in medical exams. In Proceedings of the 28th ACM International Conference on Information and Knowledge Management (pp. 139\u2013148).","DOI":"10.1145\/3357384.3358013"},{"key":"10.1007\/s40593-023-00362-1_bib63","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K., Liang, P. (2016). SQuAD: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250","DOI":"10.18653\/v1\/D16-1264"},{"key":"10.1007\/s40593-023-00362-1_bib64","series-title":"Modern psychometrics: The science of psychological assessment","author":"Rust","year":"2014"},{"key":"10.1007\/s40593-023-00362-1_bib65","series-title":"Introduction to modern information retrieval","author":"Salton","year":"1986"},{"key":"10.1007\/s40593-023-00362-1_bib66","unstructured":"Sano, M. (2015). Automated capturing of psycho-linguistic features in reading assessment text. Annual Meeting of the National Council on Measurement in Education."},{"key":"10.1007\/s40593-023-00362-1_bib67","unstructured":"Scott, M. (2008). Wordsmith tools (version 6) [computer software]. Oxford University Press."},{"key":"10.1007\/s40593-023-00362-1_bib68","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1162\/tacl_a_00310","article-title":"Machine learning\u2013driven language assessment","volume":"8","author":"Settles","year":"2020","journal-title":"Transactions of the Association for computational Linguistics"},{"key":"10.1007\/s40593-023-00362-1_bib69","doi-asserted-by":"crossref","unstructured":"Seyler, D., Yahya, M., & Berberich, K. (2017). Knowledge questions from knowledge graphs. In Proceedings of the ACM SIGIR international conference on theory of information retrieval (pp. 11\u201318).","DOI":"10.1145\/3121050.3121073"},{"key":"10.1007\/s40593-023-00362-1_bib70","unstructured":"Sheehan, K. M., Flor, M., & Napolitano, D. (2013). A two-stage approach for generating unbiased estimates of text complexity. In Proceedings of the workshop on natural language processing for improving textual accessibility (pp. 49\u201358)."},{"key":"10.1007\/s40593-023-00362-1_bib71","doi-asserted-by":"crossref","unstructured":"Susanti, Y., Nishikawa, H., Tokunaga, T., Obari, H., et al. (2016). Item difficulty analysis of English vocabulary questions. CSEDU, 1, 267\u2013274.","DOI":"10.5220\/0005775502670274"},{"issue":"1","key":"10.1007\/s40593-023-00362-1_bib72","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s41039-020-00132-w","article-title":"Integrating automatic question generation with computerised adaptive test","volume":"15","author":"Susanti","year":"2020","journal-title":"Research and Practice in Technology Enhanced Learning"},{"issue":"1","key":"10.1007\/s40593-023-00362-1_bib73","first-page":"1","article-title":"Controlling item difficulty for automatic vocabulary question generation","volume":"12","author":"Susanti","year":"2017","journal-title":"Research and Practice in Technology Enhanced Learning"},{"key":"10.1007\/s40593-023-00362-1_bib74","unstructured":"Thorndike, R. (1982). Item and score conversion by pooled judgment. Test Equating, 309\u2013317."},{"issue":"2","key":"10.1007\/s40593-023-00362-1_bib75","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1177\/0265532215623581","article-title":"Determining cloze item difficulty from item and passage characteristics across different learner backgrounds","volume":"34","author":"Trace","year":"2017","journal-title":"Language Testing"},{"key":"10.1007\/s40593-023-00362-1_bib76","unstructured":"Vinu, E., Alsubait, T., Kumar, P. (2016). Modeling of item-difficulty for ontology-based MCQs. arXiv preprint arXiv:1607.00869"},{"issue":"6","key":"10.1007\/s40593-023-00362-1_bib77","first-page":"1023","article-title":"Difficulty-level modeling of ontology-based factual questions","volume":"11","author":"Vinu","year":"2020","journal-title":"Semantic Web"},{"key":"10.1007\/s40593-023-00362-1_bib78","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1016\/j.websem.2015.05.005","article-title":"A novel approach to generate MCQs from domain ontology: Considering DL semantics and open-world assumption","volume":"34","author":"Vinu","year":"2015","journal-title":"Journal of Web Semantics"},{"issue":"6","key":"10.1007\/s40593-023-00362-1_bib79","first-page":"1023","article-title":"Automated generation of assessment tests from domain ontologies","volume":"8","author":"Vinu","year":"2017","journal-title":"Semantic Web"},{"issue":"4","key":"10.1007\/s40593-023-00362-1_bib80","doi-asserted-by":"crossref","first-page":"1183","DOI":"10.1016\/j.compedu.2011.11.020","article-title":"Item difficulty estimation: An auspicious collaboration between data and judgment","volume":"58","author":"Wauters","year":"2012","journal-title":"Computers & Education"},{"key":"10.1007\/s40593-023-00362-1_bib81","doi-asserted-by":"crossref","unstructured":"Wohlin, C. (2014). Guidelines for snowballing in systematic literature studies and a replication in software engineering. In Proceedings of the 18th International Conference on Evaluation and Assessment in Software Engineering (pp. 1\u201310).","DOI":"10.1145\/2601248.2601268"},{"key":"10.1007\/s40593-023-00362-1_bib82","unstructured":"Xu, J., Wei, T., & Lv, P. (2022). SQL-DP: A novel difficulty prediction framework for SQL programming problems. In Proceedings of the 15th International Conference on Educational Data Mining (p. 86)."},{"key":"10.1007\/s40593-023-00362-1_bib83","doi-asserted-by":"crossref","unstructured":"Xue, K., Yaneva, V., Runyon, C., & Baldwin, P. (2020). Predicting the difficulty and response time of multiple choice questions using transfer learning. In Proceedings of the fifteenth workshop on innovative use of NLP for building educational applications (pp. 193\u2013197).","DOI":"10.18653\/v1\/2020.bea-1.20"},{"key":"10.1007\/s40593-023-00362-1_bib84","doi-asserted-by":"crossref","unstructured":"Yaneva, V., Baldwin, P., Mee, J., et al. (2019). Predicting the difficulty of multiple choice questions in a high-stakes medical exam. In Proceedings of the fourteenth workshop on innovative use of NLP for building educational applications (pp. 11\u201320).","DOI":"10.18653\/v1\/W19-4402"},{"key":"10.1007\/s40593-023-00362-1_bib85","unstructured":"Yaneva, V., Baldwin, P., Mee, J., et al. (2020). Predicting item survival for multiple choice questions in a high-stakes medical exam. In Proceedings of the 12th language resources and evaluation conference (pp. 6812\u20136818)."},{"key":"10.1007\/s40593-023-00362-1_bib86","doi-asserted-by":"crossref","unstructured":"Yang, Z., Qi, P., Zhang, S., Bengio, Y., Cohen, W. W., Salakhutdinov, R., Manning, C. D. (2018). HotpotQA: A dataset for diverse, explainable multi-hop question answering. arXiv preprint arXiv:1809.09600","DOI":"10.18653\/v1\/D18-1259"},{"key":"10.1007\/s40593-023-00362-1_bib87","unstructured":"Yeung, C. Y., Lee, J. S., & Tsou, B. K. (2019). Difficulty-aware distractor generation for gap-fill items. In Proceedings of the 17th annual workshop of the Australasian language technology association (pp. 159\u2013164)."},{"key":"10.1007\/s40593-023-00362-1_bib88","doi-asserted-by":"crossref","unstructured":"Zhou, Y., & Tao, C. (2020). Multi-task BERT for problem difficulty prediction. In 2020 International Conference on Communications, Information System and Computer Engineering (CISCE) (pp. 213\u2013216).","DOI":"10.1109\/CISCE50729.2020.00048"}],"container-title":["International Journal of Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-023-00362-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40593-023-00362-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156042922600315X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156042922600315X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40593-023-00362-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,18]],"date-time":"2026-05-18T06:20:49Z","timestamp":1779085249000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S156042922600315X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":88,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["S156042922600315X"],"URL":"https:\/\/doi.org\/10.1007\/s40593-023-00362-1","relation":{},"ISSN":["1560-4292"],"issn-type":[{"value":"1560-4292","type":"print"}],"subject":[],"published":{"date-parts":[[2024,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Text-based Question Difficulty Prediction: A Systematic Review of Automatic Approaches","name":"articletitle","label":"Article Title"},{"value":"International Journal of Artificial Intelligence in Education","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1007\/s40593-023-00362-1","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Copyright \u00a9 2023 The Author(s). Published by Elsevier Ltd","name":"copyright","label":"Copyright"}]}}