{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T16:03:01Z","timestamp":1769529781756,"version":"3.49.0"},"reference-count":192,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","award":["RGPIN-2022-05193"],"award-info":[{"award-number":["RGPIN-2022-05193"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","award":["RGPIN-2022-04197"],"award-info":[{"award-number":["RGPIN-2022-04197"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-025-11421-5","type":"journal-article","created":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T11:11:51Z","timestamp":1764069111000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploring unanswerability in machine reading comprehension: approaches, benchmarks, and open challenges"],"prefix":"10.1007","volume":"59","author":[{"given":"Hadiseh","family":"Moradisani","sequence":"first","affiliation":[]},{"given":"Fattane","family":"Zarrinkalam","sequence":"additional","affiliation":[]},{"given":"Zeinab","family":"Noorian","sequence":"additional","affiliation":[]},{"given":"Faezeh","family":"Ensan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,25]]},"reference":[{"issue":"4","key":"11421_CR1","doi-asserted-by":"publisher","first-page":"1399","DOI":"10.1007\/s10115-022-01783-5","volume":"65","author":"H Abdel-Nabi","year":"2023","unstructured":"Abdel-Nabi H, Awajan A, Ali MZ (2023) Deep learning-based question answering: a survey. Knowl Inf Syst 65(4):1399\u20131485","journal-title":"Knowl Inf Syst"},{"key":"11421_CR2","doi-asserted-by":"crossref","unstructured":"Abnar S, Zuidema W (2020) Quantifying attention flow in transformers. arXiv preprint arXiv:2005.00928","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"11421_CR3","unstructured":"Agarwal A, Patel N, Varshney N, Parmar M, Mallina P, Shah A, Sangaraju SR, Patel T, Thakkar N, Baral C (2023) Can nlp models\u2019 identify\u2019, \u2019distinguish\u2019, and \u2019justify\u2019 questions that don\u2019t have a definitive answer? In: The 61st annual meeting of the association for computational linguistics"},{"key":"11421_CR4","doi-asserted-by":"crossref","unstructured":"Aken BV, Winter B, L\u00f6ser A, Gers FA (2020) Visbert: Hidden-state visualizations for transformers. In: companion proceedings of the web conference 2020, pp 207\u2013211","DOI":"10.1145\/3366424.3383542"},{"key":"11421_CR5","doi-asserted-by":"crossref","unstructured":"Alammar J (2021) Ecco: An open source library for the explainability of transformer language models. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing: system demonstrations, pp 249\u2013257","DOI":"10.18653\/v1\/2021.acl-demo.30"},{"issue":"3","key":"11421_CR6","volume":"12","author":"SS Alanazi","year":"2021","unstructured":"Alanazi SS, Elfadil N, Jarajreh M, Algarni S (2021) Question answering systems: a systematic literature review. Int J Adv Comput Sci Appl 12(3):1","journal-title":"Int J Adv Comput Sci Appl"},{"key":"11421_CR7","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.2664","volume":"11","author":"A Allahim","year":"2025","unstructured":"Allahim A, Cherif A, Imine A (2025) Semantic approaches for query expansion: taxonomy, challenges, and future research directions. PeerJ Comput Sci 11:2664","journal-title":"PeerJ Comput Sci"},{"issue":"3","key":"11421_CR8","volume":"2","author":"AMN Allam","year":"2012","unstructured":"Allam AMN, Haggag MH (2012) The question answering systems: a survey. Int J Res Rev Inf Sci (IJRRIS) 2(3):1","journal-title":"Int J Res Rev Inf Sci (IJRRIS)"},{"key":"11421_CR9","doi-asserted-by":"crossref","unstructured":"Amayuelas A, Wong K, Pan L, Chen W, Wang W (2023) Knowledge of knowledge: exploring known-unknowns uncertainty with large language models. arXiv preprint arXiv:2305.13712","DOI":"10.18653\/v1\/2024.findings-acl.383"},{"key":"11421_CR10","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.inffus.2019.12.012","volume":"58","author":"AB Arrieta","year":"2020","unstructured":"Arrieta AB, D\u00edaz-Rodr\u00edguez N, Del Ser J, Bennetot A, Tabik S, Barbado A, Garc\u00eda S, Gil-L\u00f3pez S, Molina D, Benjamins R (2020) Explainable artificial intelligence (XAI): concepts, taxonomies, opportunities and challenges toward responsible AI. Inf Fusion 58:82\u2013115","journal-title":"Inf Fusion"},{"key":"11421_CR11","doi-asserted-by":"crossref","unstructured":"Asai A, Choi E (2021) Challenges in information-seeking qa: unanswerable questions and paragraph retrieval. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (Volume 1: Long Papers), pp 1492\u20131504","DOI":"10.18653\/v1\/2021.acl-long.118"},{"key":"11421_CR12","doi-asserted-by":"crossref","unstructured":"Asai A, Kasai J, Clark JH, Lee K, Choi E, Hajishirzi H (2021) Xor qa: Cross-lingual open-retrieval question answering. In: Proceedings of the 2021 conference of the North American chapter of the association for computational linguistics: human language technologies, pp 547\u2013564","DOI":"10.18653\/v1\/2021.naacl-main.46"},{"key":"11421_CR13","unstructured":"Aubet F-X, Danks D, Zhu Y (2019) Equant (enhanced question answer network). arXiv preprint arXiv:1907.00708"},{"key":"11421_CR14","unstructured":"Bacco L, Cimino A, Dell\u2019Orletta F, Merone M (2021) Extractive summarization for explainable sentiment analysis using transformers. In: Joint 2nd international workshop on deep learning meets ontologies and natural language processing and 6th international workshop on explainable sentiment mining and emotion detection, DeepOntoNLP and X-SENTIMENT 2021, 2918, pp 62\u201373"},{"key":"11421_CR15","unstructured":"Back S, Chinthakindi SC, Kedia A, Lee H, Choo J (2020) Neurquri: Neural question requirement inspector for answerability prediction in machine reading comprehension. In: International conference on learning representations"},{"issue":"2","key":"11421_CR16","first-page":"97","volume":"11","author":"H Bahak","year":"2024","unstructured":"Bahak H, Taheri F, Zojaji Z, Kazemi A (2024) Evaluating chatgpt as a question answering system: a comprehensive analysis and comparison with existing models. J Comput Secur 11(2):97\u2013116","journal-title":"J Comput Secur"},{"key":"11421_CR17","unstructured":"Bai Y, Wang DZ (2021) More than reading comprehension: a survey on datasets and metrics of textual question answering. arXiv preprint arXiv:2109.12264"},{"key":"11421_CR18","unstructured":"Bai T, Yang L, Wong ZH, Peng J, Zhuang X, Zhang C, Wu L, Qiu J, Zhang W, Yuan B, et al (2024) Multi-agent collaborative data selection for efficient llm pretraining. CoRR"},{"issue":"6","key":"11421_CR19","doi-asserted-by":"publisher","first-page":"683","DOI":"10.1017\/S1351324921000395","volume":"28","author":"R Baradaran","year":"2022","unstructured":"Baradaran R, Ghiasi R, Amirkhani H (2022) A survey on machine reading comprehension systems. Nat Lang Eng 28(6):683\u2013732","journal-title":"Nat Lang Eng"},{"key":"11421_CR20","doi-asserted-by":"crossref","unstructured":"Barres V, McFate CJ, Kalyanpur A, Saravanakumar KK, Moon L, Seifu N, Bautista-Castillo A (2025) From generating answers to building explanations: integrating multi-round rag and causal modeling for scientific qa. In: Proceedings of the 2025 conference of the nations of the Americas chapter of the association for computational linguistics: human language technologies (Volume 3: Industry Track), pp 515\u2013522","DOI":"10.18653\/v1\/2025.naacl-industry.42"},{"key":"11421_CR21","doi-asserted-by":"crossref","unstructured":"Bennetot A, Donadello I, Haouari El Qadi El, A, Dragoni M, Frossard T, Wagner B, Sarranti A, Tulli S, Trocan M, Chatila R, (2024) A practical tutorial on explainable Ai techniques. ACM Comput Surv 57(2):1\u201344","DOI":"10.1145\/3670685"},{"key":"11421_CR22","doi-asserted-by":"publisher","first-page":"366","DOI":"10.1016\/j.procs.2015.12.005","volume":"73","author":"A Bouziane","year":"2015","unstructured":"Bouziane A, Bouchiha D, Doumi N, Malki M (2015) Question answering systems: survey and trends. Procedia Comput Sci 73:366\u2013375","journal-title":"Procedia Comput Sci"},{"key":"11421_CR23","unstructured":"Buck C, Bulian J, Ciaramita M, Gajewski W, Gesmundo A, Houlsby N, Wang W (2018) Ask the right questions: active question reformulation with reinforcement learning. In: International conference on learning representations"},{"key":"11421_CR24","doi-asserted-by":"crossref","unstructured":"Cambazoglu BB, Sanderson M, Scholer F, Croft B (2021) A review of public datasets in question answering research. In: ACM SIGIR Forum, vol. 54, pp 1\u201323. ACM New York, NY, USA","DOI":"10.1145\/3483382.3483389"},{"issue":"1","key":"11421_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103111","volume":"60","author":"E Cambria","year":"2023","unstructured":"Cambria E, Malandri L, Mercorio F, Mezzanzanica M, Nobani N (2023) A survey on XAI and natural language explanations. Inf Process Manag 60(1):103111","journal-title":"Inf Process Manag"},{"key":"11421_CR26","doi-asserted-by":"crossref","unstructured":"Carvalho\u00a0Souza ME, Souza MEDC, Weigang L (2025) Unveiling the black box: The significance of xai in making llms transparent. Authorea Preprints","DOI":"10.22541\/au.173991264.46233479\/v1"},{"key":"11421_CR27","doi-asserted-by":"crossref","unstructured":"Chen D, Fisch A, Weston J, Bordes A (2017) Reading wikipedia to answer open-domain questions. In: 55th Annual meeting of the association for computational linguistics, ACL 2017. Association for Computational Linguistics (ACL), pp 1870\u20131879","DOI":"10.18653\/v1\/P17-1171"},{"key":"11421_CR28","doi-asserted-by":"crossref","unstructured":"Choi E, He H, Iyyer M, Yatskar M, Yih W-t, Choi Y, Liang P, Zettlemoyer L (2018) Quac: Question answering in context. In: Proceedings of the 2018 conference on empirical methods in natural language processing, pp 2174\u20132184","DOI":"10.18653\/v1\/D18-1241"},{"key":"11421_CR29","unstructured":"Clark K (2020) Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555"},{"key":"11421_CR30","doi-asserted-by":"crossref","unstructured":"Clark C, Gardner M (2018) Simple and effective multi-paragraph reading comprehension. In: Proceedings of the 56th annual meeting of the association for computational linguistics (Volume 1: Long Papers). Association for Computational Linguistics","DOI":"10.18653\/v1\/P18-1078"},{"key":"11421_CR31","doi-asserted-by":"publisher","first-page":"454","DOI":"10.1162\/tacl_a_00317","volume":"8","author":"JH Clark","year":"2020","unstructured":"Clark JH, Choi E, Collins M, Garrette D, Kwiatkowski T, Nikolaev V, Palomaki J (2020) Tydi qa: A benchmark for information-seeking question answering in ty pologically di verse languages. Trans Assoc Comput Linguist 8:454\u2013470","journal-title":"Trans Assoc Comput Linguist"},{"key":"11421_CR32","doi-asserted-by":"crossref","unstructured":"Dalvi B, Jansen P, Tafjord O, Xie Z, Smith H, Pipatanangkura L, Clark P (2021) Explaining answers with entailment trees. In: Proceedings of the 2021 conference on empirical methods in natural language processing, pp 7358\u20137370","DOI":"10.18653\/v1\/2021.emnlp-main.585"},{"key":"11421_CR33","unstructured":"Das R, Godbole A, Naik A, Tower E, Zaheer M, Hajishirzi H, Jia R, McCallum A (2022) Knowledge base question answering by case-based reasoning over subgraphs. In: International conference on machine learning. PMLR, pp 4777\u20134793"},{"key":"11421_CR34","doi-asserted-by":"crossref","unstructured":"Dastin J (2022) Amazon scraps secret ai recruiting tool that showed bias against women. In: Ethics of data and analytics. Auerbach Publications, pp 296\u2013299","DOI":"10.1201\/9781003278290-44"},{"key":"11421_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.119898","volume":"655","author":"J Del Ser","year":"2024","unstructured":"Del Ser J, Barredo-Arrieta A, D\u00edaz-Rodr\u00edguez N, Herrera F, Saranti A, Holzinger A (2024) On generating trustworthy counterfactual explanations. Inf Sci 655:119898","journal-title":"Inf Sci"},{"issue":"3","key":"11421_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3570640","volume":"41","author":"Y Deng","year":"2023","unstructured":"Deng Y, Zhang W, Xu W, Lei W, Chua T-S, Lam W (2023) A unified multi-task learning framework for multi-goal conversational recommender systems. ACM Trans Inf Syst 41(3):1\u201325","journal-title":"ACM Trans Inf Syst"},{"key":"11421_CR37","doi-asserted-by":"crossref","unstructured":"Deng Y, Zhao Y, Li M, Ng SK, Chua T-S (2024) Don t just say i don t know! Self-aligning large language models for responding to unknown questions with explanations. In: Proceedings of the 2024 conference on empirical methods in natural language processing, pp 13652\u201313673","DOI":"10.18653\/v1\/2024.emnlp-main.757"},{"key":"11421_CR38","unstructured":"Devlin J (2018) Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"11421_CR39","unstructured":"Ding C, Hong Y, Yao J (2024) Sgcm: Salience-guided context modeling for question generation. In: Proceedings of the 2024 joint international conference on computational linguistics, language resources and evaluation (LREC-COLING 2024), pp 14755\u201314762"},{"key":"11421_CR40","doi-asserted-by":"crossref","unstructured":"Dong L, Mallinson J, Reddy S, Lapata M (2017) Learning to paraphrase for question answering. In: Proceedings of the 2017 conference on empirical methods in natural language processing, pp 875\u2013886","DOI":"10.18653\/v1\/D17-1091"},{"key":"11421_CR41","doi-asserted-by":"crossref","unstructured":"Dong G, Zhu Y, Zhang C, Wang Z, Wen J-R, Dou Z (2025) Understand what llm needs: dual preference alignment for retrieval-augmented generation. In: Proceedings of the ACM on web conference 2025, pp 4206\u20134225","DOI":"10.1145\/3696410.3714717"},{"key":"11421_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.121763","volume":"697","author":"Y Fan","year":"2025","unstructured":"Fan Y, Zhang H, Li R, Wang Y, Zhang G, Tan H, Liang J (2025) Weakly-supervised explainable question answering via question aware contrastive learning and adaptive gate mechanism. Inf Sci 697:121763","journal-title":"Inf Sci"},{"issue":"4","key":"11421_CR43","doi-asserted-by":"publisher","DOI":"10.3390\/computers13040092","volume":"13","author":"P Fantozzi","year":"2024","unstructured":"Fantozzi P, Naldi M (2024) The explainability of transformers: current status and directions. Computers 13(4):92","journal-title":"Computers"},{"issue":"8017","key":"11421_CR44","doi-asserted-by":"publisher","first-page":"625","DOI":"10.1038\/s41586-024-07421-0","volume":"630","author":"S Farquhar","year":"2024","unstructured":"Farquhar S, Kossen J, Kuhn L, Gal Y (2024) Detecting hallucinations in large language models using semantic entropy. Nature 630(8017):625\u2013630","journal-title":"Nature"},{"key":"11421_CR45","doi-asserted-by":"crossref","unstructured":"Ferguson J, Gardner M, Hajishirzi H, Khot T, Dasigi P (2020) Iirc: A dataset of incomplete information reading comprehension questions. In: Proceedings of the 2020 conference on empirical methods in natural language processing (EMNLP), pp 1137\u20131147","DOI":"10.18653\/v1\/2020.emnlp-main.86"},{"key":"11421_CR46","doi-asserted-by":"crossref","unstructured":"Fischer K, F\u00fcrst D, Steindl S, Lindner J, Sch\u00e4fer U (2024) Question: How do large language models perform on the question answering tasks? Answer. arXiv preprint arXiv:2412.12893","DOI":"10.1007\/978-3-031-92605-1_12"},{"issue":"1","key":"11421_CR47","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1007\/s10115-023-01966-8","volume":"66","author":"A Formica","year":"2024","unstructured":"Formica A, Mele I, Taglino F (2024) A template-based approach for question answering over knowledge bases. Knowl Inf Syst 66(1):453\u2013479","journal-title":"Knowl Inf Syst"},{"key":"11421_CR48","doi-asserted-by":"crossref","unstructured":"Gao G, Chen H-T, Artzi Y, Choi E (2023) Continually improving extractive qa via human feedback. In: Proceedings of the 2023 conference on empirical methods in natural language processing, pp 406\u2013423","DOI":"10.18653\/v1\/2023.emnlp-main.27"},{"key":"11421_CR49","doi-asserted-by":"crossref","unstructured":"Gautam V, Zhang M, Klakow D (2023) A lightweight method to generate unanswerable questions in English. In: Findings of the association for computational linguistics: EMNLP 2023, pp 7349\u20137360","DOI":"10.18653\/v1\/2023.findings-emnlp.491"},{"key":"11421_CR50","doi-asserted-by":"crossref","unstructured":"Godin F, Kumar A, Mittal A (2019) Learning when not to answer: a ternary reward structure for reinforcement learning based question answering. In: 2019 Conference of the North American chapter of the association for computational linguistics: human language technologies. Association for Computational Linguistics (ACL), pp 122\u2013129","DOI":"10.18653\/v1\/N19-2016"},{"issue":"11","key":"11421_CR51","doi-asserted-by":"publisher","first-page":"2849","DOI":"10.1007\/s10115-022-01737-x","volume":"64","author":"JA Gomes","year":"2022","unstructured":"Gomes JA, Mello RC, Str\u00f6ele V, Souza JF (2022) A study of approaches to answering complex questions over knowledge bases. Knowl Inf Syst 64(11):2849\u20132881","journal-title":"Knowl Inf Syst"},{"key":"11421_CR52","doi-asserted-by":"crossref","unstructured":"Gu Y, Kase S, Vanni M, Sadler B, Liang P, Yan X, Su Y (2021) Beyond iid: three levels of generalization for question answering on knowledge bases. In: Proceedings of the web conference 2021, pp 3477\u20133488","DOI":"10.1145\/3442381.3449992"},{"key":"11421_CR53","unstructured":"Guo C, Pleiss G, Sun Y, Weinberger KQ (2017) On calibration of modern neural networks. In: International conference on machine learning. PMLR, pp 1321\u20131330"},{"issue":"1","key":"11421_CR54","first-page":"94","volume":"16","author":"V Gupta","year":"2023","unstructured":"Gupta V, Dixit A (2023) Recent query reformulation approaches for information retrieval system-a survey. Recent Adv Comput Sci Commun (Formerly: Recent Patents on Computer Science) 16(1):94\u2013107","journal-title":"Recent Adv Comput Sci Commun (Formerly: Recent Patents on Computer Science)"},{"key":"11421_CR55","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2023.1225093","volume":"6","author":"S Gurrapu","year":"2023","unstructured":"Gurrapu S, Kulkarni A, Huang L, Lourentzou I, Batarseh FA (2023) Rationalization for explainable nlp: a survey. Front Artif Intell 6:1225093","journal-title":"Front Artif Intell"},{"key":"11421_CR56","doi-asserted-by":"crossref","unstructured":"Hao T, Li X, He Y, Wang FL, Qu Y (2022) Recent progress in leveraging deep learning methods for question answering. Neural Comput Appl, 1\u201319","DOI":"10.1007\/s00521-021-06748-3"},{"key":"11421_CR57","doi-asserted-by":"crossref","unstructured":"He W, Liu K, Liu J, Lyu Y, Zhao S, Xiao X, Liu Y, Wang Y, Wu H, She Q (2018) Dureader: a Chinese machine reading comprehension dataset from real-world applications. In: Proceedings of the workshop on machine reading for question answering, pp 37\u201346","DOI":"10.18653\/v1\/W18-2605"},{"key":"11421_CR58","doi-asserted-by":"crossref","unstructured":"He J, Yu L, Li C, Yang R, Chen F, Li K, Zhang M, Lei S, Zhang X, Beigi M, et al (2025) Survey of uncertainty estimation in large language models-sources, methods, applications, and challenge","DOI":"10.1016\/j.inffus.2025.104057"},{"key":"11421_CR59","doi-asserted-by":"crossref","unstructured":"Huang K, Tang Y, Huang J, He X, Zhou B (2019) Relation module for non-answerable predictions on reading comprehension. In: Proceedings of the 23rd conference on computational natural language learning (CoNLL), pp 747\u2013756","DOI":"10.18653\/v1\/K19-1070"},{"key":"11421_CR60","doi-asserted-by":"crossref","unstructured":"Huang J, Lu X, Liang J, Bao Q, Huang C, Xiao Y, Liu B, Chen Y (2022) Knowing what i don t know: A generation assisted rejection framework in knowledge base question answering. In: International conference on database systems for advanced applications. Springer, pp 238\u2013246","DOI":"10.1007\/978-3-031-00129-1_17"},{"key":"11421_CR61","doi-asserted-by":"crossref","unstructured":"Hu M, Peng Y, Huang Z, Qiu X, Wei F, Zhou M (2018) Reinforced mnemonic reader for machine reading comprehension. In: Proceedings of the 27th international joint conference on artificial intelligence, pp 4099\u20134106","DOI":"10.24963\/ijcai.2018\/570"},{"key":"11421_CR62","doi-asserted-by":"crossref","unstructured":"Hu M, Wei F, Peng Y, Huang Z, Yang N, Li D (2019) Read+ verify: machine reading comprehension with unanswerable questions. In: Proceedings of the AAAI conference on artificial intelligence, 33, pp 6529\u20136537","DOI":"10.1609\/aaai.v33i01.33016529"},{"key":"11421_CR63","doi-asserted-by":"crossref","unstructured":"Hu S, Luo Y, Wang H, Cheng X, Liu Z, Sun M (2023) Won t get fooled again: answering questions with false premises. In: Proceedings of the 61st annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 5626\u20135643","DOI":"10.18653\/v1\/2023.acl-long.309"},{"key":"11421_CR64","doi-asserted-by":"crossref","unstructured":"Jacovi A, Goldberg Y (2020) Towards faithfully interpretable nlp systems: how should we define and evaluate faithfulness? In: Proceedings of the 58th annual meeting of the association for computational linguistics, pp 4198\u20134205","DOI":"10.18653\/v1\/2020.acl-main.386"},{"key":"11421_CR65","doi-asserted-by":"crossref","unstructured":"Jia R, Liang P (2017) Adversarial examples for evaluating reading comprehension systems. In: Proceedings of the 2017 conference on empirical methods in natural language processing, pp 2021\u20132031","DOI":"10.18653\/v1\/D17-1215"},{"key":"11421_CR66","doi-asserted-by":"crossref","unstructured":"Ji Y, Chen L-Y, Dou C, Ma B, Li X (2022)To answer or not to answer? Improving machine reading comprehension model with span-based contrastive learning. In: Findings of the association for computational linguistics: NAACL 2022, pp 1292\u20131300","DOI":"10.18653\/v1\/2022.findings-naacl.96"},{"key":"11421_CR67","doi-asserted-by":"crossref","unstructured":"Jin Q, Dhingra B, Liu Z, Cohen W, Lu X (2016) Pubmedqa: A dataset for biomedical research question answering. In: Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP), pp 2567\u20132577","DOI":"10.18653\/v1\/D19-1259"},{"key":"11421_CR68","doi-asserted-by":"crossref","unstructured":"Kaiser M, Saha\u00a0Roy R, Weikum G (2021) Reinforcement learning from reformulations in conversational question answering over knowledge graphs. In: Proceedings of the 44th international ACM SIGIR conference on research and development in information retrieval, pp 459\u2013469","DOI":"10.1145\/3404835.3462859"},{"key":"11421_CR69","doi-asserted-by":"crossref","unstructured":"Kaiser M, Saha\u00a0Roy R, Weikum G (2024) Robust training for conversational question answering models with reinforced reformulation generation. In: Proceedings of the 17th ACM international conference on web search and data mining, pp 322\u2013331","DOI":"10.1145\/3616855.3635822"},{"issue":"Suppl 2","key":"11421_CR70","doi-asserted-by":"publisher","first-page":"2509","DOI":"10.1007\/s10462-023-10583-4","volume":"56","author":"S Kazi","year":"2023","unstructured":"Kazi S, Khoja S, Daud A (2023) A survey of deep learning techniques for machine reading comprehension. Artif Intell Rev 56(Suppl 2):2509\u20132569","journal-title":"Artif Intell Rev"},{"issue":"6","key":"11421_CR71","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3534965","volume":"55","author":"K Keyvan","year":"2022","unstructured":"Keyvan K, Huang JX (2022) How to approach ambiguous queries in conversational search: a survey of techniques, approaches, tools, and challenges. ACM Comput Surv 55(6):1\u201340","journal-title":"ACM Comput Surv"},{"key":"11421_CR72","doi-asserted-by":"crossref","unstructured":"Kim N, Pavlick E, Karagol-Ayan B, Ramachandran D (2021) Which linguist invented the lightbulb? Presupposition verification for question-answering. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing (Volume 1: Long Papers), pp 3932\u20133945","DOI":"10.18653\/v1\/2021.acl-long.304"},{"key":"11421_CR73","doi-asserted-by":"crossref","unstructured":"Kim N, Htut PM, Bowman SR, Petty J (2023) (QA)2: Question answering with questionable assumptions. In: The 61st annual meeting of the association for computational linguistics","DOI":"10.18653\/v1\/2023.acl-long.472"},{"key":"11421_CR74","doi-asserted-by":"crossref","unstructured":"Kim H, Bibi A, Torr P, Gal Y (2024) Detecting llm hallucination through layer-wise information deficiency: analysis of unanswerable questions and ambiguous prompts. arXiv preprint arXiv:2412.10246","DOI":"10.18653\/v1\/2025.emnlp-main.1644"},{"key":"11421_CR75","unstructured":"Kirichenko P, Ibrahim M, Chaudhuri K, Bell SJ (2025) Abstentionbench: Reasoning llms fail on unanswerable questions. In: ICML 2025 workshop on reliable and responsible foundation models"},{"key":"11421_CR76","doi-asserted-by":"crossref","unstructured":"Kongyoung S, Macdonald C, Ounis I (2023) Multi-task learning of query generation and classification for generative conversational question rewriting. In: Findings of the association for computational linguistics: EMNLP 2023, pp 13667\u201313678","DOI":"10.18653\/v1\/2023.findings-emnlp.913"},{"key":"11421_CR77","doi-asserted-by":"crossref","unstructured":"Kundu S, Ng HT (2018a) A nil-aware answer extraction framework for question answering. In: Proceedings of the 2018 conference on empirical methods in natural language processing, pp 4243\u20134252","DOI":"10.18653\/v1\/D18-1456"},{"key":"11421_CR78","doi-asserted-by":"crossref","unstructured":"Kundu S, Ng HT (2018b) A question-focused multi-factor attention network for question answering. In: Proceedings of the AAAI conference on artificial intelligence, vol. 32","DOI":"10.1609\/aaai.v32i1.12065"},{"key":"11421_CR79","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1162\/tacl_a_00276","volume":"7","author":"T Kwiatkowski","year":"2019","unstructured":"Kwiatkowski T, Palomaki J, Redfield O, Collins M, Parikh A, Alberti C, Epstein D, Polosukhin I, Devlin J, Lee K (2019) Natural questions: a benchmark for question answering research. Trans Assoc Comput Linguist 7:453\u2013466","journal-title":"Trans Assoc Comput Linguist"},{"key":"11421_CR80","unstructured":"Lai Y, Wu J, Zhang C, Sun H, Zhou D (2025) Adacqr: Enhancing query reformulation for conversational search via sparse and dense retrieval alignment. In: Proceedings of the 31st international conference on computational linguistics, pp 7698\u20137720"},{"key":"11421_CR81","doi-asserted-by":"crossref","unstructured":"\u0141ajewska W, Balog K (2024) Towards reliable and factual response generation: Detecting unanswerable questions in information-seeking conversations. In: European conference on information retrieval. Springer, pp 336\u2013344","DOI":"10.1007\/978-3-031-56063-7_25"},{"key":"11421_CR82","unstructured":"Lan Z (2019) Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942"},{"issue":"6","key":"11421_CR83","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3732938","volume":"24","author":"H Le","year":"2025","unstructured":"Le H, Luu N, Nguyen T, Dao T, Dinh S (2025) Optimizing answer generator in Vietnamese legal question answering systems using language models. ACM Trans Asian Low-Resour Lang Inf Process 24(6):1\u201317","journal-title":"ACM Trans Asian Low-Resour Lang Inf Process"},{"key":"11421_CR84","doi-asserted-by":"publisher","first-page":"9861","DOI":"10.1609\/aaai.v33i01.33019861","volume":"33","author":"G Lee","year":"2019","unstructured":"Lee G, Kim S, Hwang S-W (2019) Qadiver: interactive framework for diagnosing qa models. Proceedings of the AAAI Conference on Artificial Intelligence 33:9861\u20139862","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"11421_CR85","unstructured":"Lee G, Hwang S-w, Cho H (2020) Squad2-cr: Semi-supervised annotation for cause and rationales for unanswerability in squad 2.0. In: Proceedings of the twelfth language resources and evaluation conference, pp 5425\u20135432"},{"key":"11421_CR86","unstructured":"Lee B, Padhi I, Ramamurthy KN, Miehling E, Dognin P, Nagireddy M, Dhurandhar A (2025) Programming refusal with conditional activation steering. In: International conference on learning representations"},{"key":"11421_CR87","doi-asserted-by":"crossref","unstructured":"Levy O, Seo M, Choi E, Zettlemoyer L (2017) Zero-shot relation extraction via reading comprehension. In: Proceedings of the 21st conference on computational natural language learning (CoNLL 2017). Association for Computational Linguistics","DOI":"10.18653\/v1\/K17-1034"},{"key":"11421_CR88","doi-asserted-by":"crossref","unstructured":"Liao J, Zhao X, Zheng J, Li X, Cai F, Tang J (2022) Ptau: Prompt tuning for attributing unanswerable questions. In: Proceedings of the 45th international ACM SIGIR conference on research and development in information retrieval, pp 1219\u20131229","DOI":"10.1145\/3477495.3532048"},{"key":"11421_CR89","unstructured":"Liu Y (2019) Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692"},{"key":"11421_CR90","unstructured":"Lin C-Y (2004) Rouge: A package for automatic evaluation of summaries. In: Text summarization branches out, pp 74\u201381"},{"key":"11421_CR91","unstructured":"Liu X, Li W, Fang Y, Kim A, Duh K, Gao J (2018a) Stochastic answer networks for squad 2.0. arXiv preprint arXiv:1809.09194"},{"key":"11421_CR92","doi-asserted-by":"crossref","unstructured":"Liu X, Shen Y, Duh K, Gao J (2018b) Stochastic answer networks for machine reading comprehension. In: Proceedings of the 56th annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 1694\u20131704","DOI":"10.18653\/v1\/P18-1157"},{"issue":"18","key":"11421_CR93","doi-asserted-by":"publisher","DOI":"10.3390\/app9183698","volume":"9","author":"S Liu","year":"2019","unstructured":"Liu S, Zhang X, Zhang S, Wang H, Zhang W (2019) Neural machine reading comprehension: methods and trends. Appl Sci 9(18):3698","journal-title":"Appl Sci"},{"key":"11421_CR94","doi-asserted-by":"crossref","unstructured":"Liu D, Gong Y, Fu J, Yan Y, Chen J, Lv J, Duan N, Zhou M (2020) Tell me how to ask again: question data augmentation with controllable rewriting in continuous space. In: Proceedings of the 2020 conference on empirical methods in natural language processing (EMNLP), pp 5798\u20135810","DOI":"10.18653\/v1\/2020.emnlp-main.467"},{"key":"11421_CR95","doi-asserted-by":"crossref","unstructured":"Liu T, Guo Q, Hu X, Zhang Y, Qiu X, Zhang Z (2022) Rlet: A reinforcement learning based approach for explainable qa with entailment trees. In: Proceedings of the 2022 conference on empirical methods in natural language processing, pp 7177\u20137189","DOI":"10.18653\/v1\/2022.emnlp-main.483"},{"key":"11421_CR96","unstructured":"Li S, Yang C, Wu T, Shi C, Zhang Y, Zhu X, Cheng Z, Cai D, Yu M, Liu L, et al (2024) A survey on the honesty of large language models. arXiv preprint arXiv:2409.18786"},{"key":"11421_CR97","unstructured":"Lundberg SM, Lee S-I (2017) A unified approach to interpreting model predictions. Adv Neural Inf Process Syst 30"},{"issue":"2","key":"11421_CR98","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1001\/amajethics.2019.131","volume":"21","author":"DD Luxton","year":"2019","unstructured":"Luxton DD (2019) Should Watson be consulted for a second opinion? AMA J Ethics 21(2):131\u2013137","journal-title":"AMA J Ethics"},{"key":"11421_CR99","doi-asserted-by":"crossref","unstructured":"Malin B, Kalganova T, Boulgouris N (2025) A review of faithfulness metrics for hallucination assessment in large language models. IEEE J Select Top Signal Process","DOI":"10.1109\/JSTSP.2025.3579203"},{"key":"11421_CR100","unstructured":"Mason M (2018) How To Set the Optimal Confidence Threshold for Your Assistant. Medium . https:\/\/medium.com\/ibm-watson\/how-to-set-the-optimal-confidence-threshold-for-your-assistant-bbb1be6f294f"},{"key":"11421_CR101","unstructured":"McGregor B (2021) Getting Started with Watson Assistant: Finishing Touches. Medium. https:\/\/medium.com\/ibm-watson\/finishing-touches-5471b8f46ff"},{"key":"11421_CR102","unstructured":"Microsoft: The confidence score of an answer. Microsoft (2025). https:\/\/learn.microsoft.com\/en-us\/azure\/ai-services\/qnamaker\/concepts\/confidence-score"},{"key":"11421_CR103","doi-asserted-by":"crossref","unstructured":"Mitra S, Ramnani R, Sengupta S (2022) Constraint-based multi-hop question answering with knowledge graph. In: Proceedings of the 2022 conference of the North American chapter of the association for computational linguistics: human language technologies: industry track, pp 280\u2013288","DOI":"10.18653\/v1\/2022.naacl-industry.31"},{"key":"11421_CR104","unstructured":"Mohammadi H, Bagheri A, Giachanou A, Oberski DL (2025) Explainability in practice: a survey of explainable nlp across various domains. CoRR"},{"key":"11421_CR105","doi-asserted-by":"crossref","unstructured":"Moradisani H, Zarrinkalam F, Serbanescu J, Noorian Z (2024) Unanswgen: A systematic approach for generating unanswerable questions in machine reading comprehension. In: Proceedings of the 2024 annual international ACM SIGIR conference on research and development in information retrieval in the Asia Pacific region, pp 280\u2013286","DOI":"10.1145\/3673791.3698413"},{"key":"11421_CR106","unstructured":"Mumuni F, Mumuni A (2025) Explainable artificial intelligence (xai): from inherent explainability to large language models. arXiv preprint arXiv:2501.09967"},{"issue":"9","key":"11421_CR107","doi-asserted-by":"publisher","first-page":"10602","DOI":"10.1007\/s10489-022-04052-8","volume":"53","author":"K Nassiri","year":"2023","unstructured":"Nassiri K, Akhloufi M (2023) Transformer models used for text-based question answering systems. Appl Intell 53(9):10602\u201310635","journal-title":"Appl Intell"},{"key":"11421_CR108","doi-asserted-by":"crossref","unstructured":"Neeman E, Aharoni R, Honovich O, Choshen L, Szpektor I, Abend O (2023) Disentqa: Disentangling parametric and contextual knowledge with counterfactual question answering. In: Proceedings of the 61st annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 10056\u201310070","DOI":"10.18653\/v1\/2023.acl-long.559"},{"key":"11421_CR109","unstructured":"Nguyen T, Rosenberg M, Song X, Gao J, Tiwary S, Majumder R, Deng L (2016) Ms marco: A human-generated machine reading comprehension dataset"},{"key":"11421_CR110","doi-asserted-by":"crossref","unstructured":"Nian J, Peng Z, Wang Q, Fang Y (2025) W-rag: Weakly supervised dense retrieval in rag for open-domain question answering. In: Proceedings of the 2025 International ACM SIGIR conference on innovative concepts and theories in information retrieval (ICTIR), pp 136\u2013146","DOI":"10.1145\/3731120.3744578"},{"key":"11421_CR111","doi-asserted-by":"crossref","unstructured":"Ni A, Gardner M, Dasigi P (2021) Mitigating false-negative contexts in multi-document question answering with retrieval marginalization. In: Proceedings of the 2021 conference on empirical methods in natural language processing, pp 6149\u20136161","DOI":"10.18653\/v1\/2021.emnlp-main.497"},{"issue":"8","key":"11421_CR112","doi-asserted-by":"publisher","first-page":"717","DOI":"10.13052\/jwe1540-9589.1785","volume":"17","author":"B Ojokoh","year":"2018","unstructured":"Ojokoh B, Adebisi E (2018) A review of question answering systems. J Web Eng 17(8):717\u2013758","journal-title":"J Web Eng"},{"key":"11421_CR113","doi-asserted-by":"crossref","unstructured":"Pandya HA, Bhatt BS (2021) Question answering survey: directions, challenges, datasets, evaluation matrices. arXiv preprint arxiv:org\/abs\/2112.03572","DOI":"10.20944\/preprints202112.0136.v1"},{"key":"11421_CR114","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu W-J (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the association for computational linguistics, pp 311\u2013318","DOI":"10.3115\/1073083.1073135"},{"key":"11421_CR115","doi-asserted-by":"crossref","unstructured":"Park J, Lee H (2024) Conversational query reformulation with the guidance of retrieved documents. arXiv preprint arXiv:2407.12363","DOI":"10.2139\/ssrn.5760729"},{"key":"11421_CR116","doi-asserted-by":"crossref","unstructured":"Patidar M, Faldu P, Singh A, Vig L, Bhattacharya I, et al (2022) Do i have the knowledge to answer? Investigating answerability of knowledge base questions. arXiv preprint arXiv:2212.10189","DOI":"10.18653\/v1\/2023.acl-long.576"},{"key":"11421_CR117","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107364","volume":"229","author":"W Peng","year":"2021","unstructured":"Peng W, Hu Y, Yu J, Xing L, Xie Y (2021) Aper: adaptive evidence-driven reasoning network for machine reading comprehension with unanswerable questions. Knowledge-Based Systems 229:107364","journal-title":"Knowledge-Based Systems"},{"key":"11421_CR118","doi-asserted-by":"crossref","unstructured":"Putri RA, Oh A (2022) Idk-mrc: Unanswerable questions for Indonesian machine reading comprehension. In: Proceedings of the 2022 conference on empirical methods in natural language processing, pp 6918\u20136933","DOI":"10.18653\/v1\/2022.emnlp-main.465"},{"issue":"4","key":"11421_CR119","doi-asserted-by":"publisher","first-page":"2235","DOI":"10.1007\/s10115-023-02019-w","volume":"66","author":"F Qamar","year":"2024","unstructured":"Qamar F, Latif S, Shah A (2024) Techniques, datasets, evaluation metrics and future directions of a question answering system. Knowl Inf Syst 66(4):2235\u20132268","journal-title":"Knowl Inf Syst"},{"key":"11421_CR120","doi-asserted-by":"crossref","unstructured":"Rajpurkar P, Zhang J, Lopyrev K, Liang P (2016) Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250","DOI":"10.18653\/v1\/D16-1264"},{"key":"11421_CR121","doi-asserted-by":"crossref","unstructured":"Rajpurkar P, Jia R, Liang P (2018) Know what you don\u2019t know: unanswerable questions for squad. arXiv preprint arXiv:1806.03822","DOI":"10.18653\/v1\/P18-2124"},{"key":"11421_CR122","unstructured":"Ramos-Varela S, Bellver-Soler J, Estecha-Garitagoitia M, D Haro LF (2025) Context or retrieval? Evaluating rag methods for art and museum qa system. In: Proceedings of the 15th international workshop on spoken dialogue systems technology, pp 129\u2013136"},{"key":"11421_CR123","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1162\/tacl_a_00266","volume":"7","author":"S Reddy","year":"2019","unstructured":"Reddy S, Chen D, Manning CD (2019) Coqa: a conversational question answering challenge. Trans Assoc Comput Linguist 7:249\u2013266","journal-title":"Trans Assoc Comput Linguist"},{"key":"11421_CR124","unstructured":"Renz K, Chitta K, Mercea O-B, Koepke A, Akata Z, Geiger A (2022) Plant: explainable planning transformers via object-level representations. In: 6th annual conference on robot learning. MLResearchPress, pp 459\u2013470"},{"key":"11421_CR125","unstructured":"Reyes\u00a0Montesinos J (2022) None of the above: comparing scenarios for answerability detection in question answering systems"},{"key":"11421_CR126","doi-asserted-by":"crossref","unstructured":"Ribeiro MT, Singh S, Guestrin C (2016) \"Why should i trust you?\" Explaining the predictions of any classifier. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp 1135\u20131144","DOI":"10.1145\/2939672.2939778"},{"key":"11421_CR127","doi-asserted-by":"crossref","unstructured":"Ribeiro MT, Guestrin C, Singh S (2019) Are red roses red? Evaluating consistency of question-answering models. In: Proceedings of the 57th annual meeting of the association for computational linguistics, pp 6174\u20136184","DOI":"10.18653\/v1\/P19-1621"},{"issue":"10","key":"11421_CR128","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3560260","volume":"55","author":"A Rogers","year":"2023","unstructured":"Rogers A, Gardner M, Augenstein I (2023) Qa dataset explosion: a taxonomy of nlp resources for question answering and reading comprehension. ACM Comput Surv 55(10):1\u201345","journal-title":"ACM Comput Surv"},{"key":"11421_CR129","unstructured":"Saadat A, Sogir TB, Chowdhury MTA, Aziz S (2024) When not to answer: evaluating prompts on gpt models for effective abstention in unanswerable math word problems. arXiv preprint arXiv:2410.13029"},{"key":"11421_CR130","doi-asserted-by":"crossref","unstructured":"Saha A, Aralikatte R, Khapra MM, Sankaranarayanan K (2018) Duorc: Towards complex language understanding with paraphrased reading comprehension. In: Proceedings of the 56th annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 1683\u20131693","DOI":"10.18653\/v1\/P18-1156"},{"key":"11421_CR131","doi-asserted-by":"publisher","DOI":"10.1016\/j.nlp.2024.100088","author":"A Sarkar","year":"2024","unstructured":"Sarkar A, Agarwal R, Dhiman S, Upadhyay P, Chaturvedi S (2024) A comprehensive survey on answer generation methods using NLP. Natural Language Processing Journal. https:\/\/doi.org\/10.1016\/j.nlp.2024.100088","journal-title":"Natural Language Processing Journal"},{"key":"11421_CR132","doi-asserted-by":"crossref","unstructured":"Saxena A, Tripathi A, Talukdar P (2020) Improving multi-hop question answering over knowledge graphs using knowledge base embeddings. In: Proceedings of the 58th Annual meeting of the association for computational linguistics, pp 4498\u20134507","DOI":"10.18653\/v1\/2020.acl-main.412"},{"key":"11421_CR133","doi-asserted-by":"crossref","unstructured":"Sen P, Saffari A (2020) What do models learn from question answering datasets? In: Proceedings of the 2020 conference on empirical methods in natural language processing (EMNLP), pp 2429\u20132438","DOI":"10.18653\/v1\/2020.emnlp-main.190"},{"key":"11421_CR134","unstructured":"Seo M, Kembhavi A, Farhadi A, Hajishirzi H (2017) Bi-directional attention flow for machine comprehension. In: 5th International conference on learning representations, ICLR 2017"},{"key":"11421_CR135","unstructured":"Shekarpour S, Alshargi F, Shekarpour M (2020) Towards explainable question answering (xqa). In: AI4SG@ AAAI fall symposium"},{"issue":"6","key":"11421_CR136","doi-asserted-by":"publisher","first-page":"635","DOI":"10.1016\/j.jksuci.2018.08.005","volume":"32","author":"MAC Soares","year":"2020","unstructured":"Soares MAC, Parreiras FS (2020) A literature review on question answering techniques, paradigms and systems. J King Saud Univ 32(6):635\u2013646","journal-title":"J King Saud Univ"},{"key":"11421_CR137","unstructured":"Soni S, Gudala M, Pajouhi A, Roberts K (2022) Radqa: A question answering dataset to improve comprehension of radiology reports. In: Proceedings of the thirteenth language resources and evaluation conference, pp 6250\u20136259"},{"key":"11421_CR138","doi-asserted-by":"crossref","unstructured":"Sulem E, Hay J, Roth D (2021) Do we know what we dont know? Studying unanswerable questions beyond squad 2.0. In: Findings of the association for computational linguistics: EMNLP 2021, pp 4543\u20134548","DOI":"10.18653\/v1\/2021.findings-emnlp.385"},{"key":"11421_CR139","unstructured":"Sun F, Li L, Qiu X, Liu Y (2018) U-net: Machine reading comprehension with unanswerable questions. arXiv preprint arXiv:1810.06638"},{"key":"11421_CR140","doi-asserted-by":"crossref","unstructured":"Sun Y, Zuo A, Gao W, Ma J (2025) Causalabstain: Enhancing multilingual llms with causal reasoning for trustworthy abstention. arXiv preprint arXiv:2506.00519","DOI":"10.18653\/v1\/2025.findings-acl.723"},{"key":"11421_CR141","doi-asserted-by":"crossref","unstructured":"Tan C, Wei F, Zhou Q, Yang N, Lv W, Zhou M (2018) I know there is no answer: modeling answer validation for machine reading comprehension. In: Natural language processing and Chinese computing: 7th CCF international conference, NLPCC 2018, Hohhot, China, August 26\u201330, 2018, Proceedings, Part I 7. Springer, pp 85\u201397","DOI":"10.1007\/978-3-319-99495-6_8"},{"key":"11421_CR142","doi-asserted-by":"crossref","unstructured":"Taubenfeld A, Sheffer T, Ofek E, Feder A, Goldstein A, Gekhman Z, Yona G (2025) Confidence improves self-consistency in llms. arXiv preprint arXiv:2502.06233","DOI":"10.18653\/v1\/2025.findings-acl.1030"},{"key":"11421_CR143","unstructured":"Thai D, Ravishankar S, Abdelaziz I, Chaudhary M, Mihindukulasooriya N, Naseem T, Das R, Kapanipathi P, Fokoue A, McCallum A (2022) Cbr-ikb: A case-based reasoning approach for question answering over incomplete knowledge bases. arXiv preprint arXiv:2204.08554"},{"key":"11421_CR144","unstructured":"Thayaparan M, Valentino M, Freitas A (2020) A survey on explainability in machine reading comprehension. arXiv preprint arXiv:2010.00389"},{"key":"11421_CR145","doi-asserted-by":"crossref","unstructured":"Tommasel A, Assent I (2024) Semantic grounding of llms using knowledge graphs for query reformulation in medical information retrieval. In: 2024 IEEE international conference on big data (BigData). IEEE, pp 4048\u20134057","DOI":"10.1109\/BigData62323.2024.10826117"},{"key":"11421_CR146","unstructured":"Tran SQ, Do G-H, Do PN-T, Kretchmar M, Du X (2023a) Agent: A novel pipeline for automatically creating unanswerable questions. arXiv preprint arXiv:2309.05103"},{"key":"11421_CR147","doi-asserted-by":"crossref","unstructured":"Tran SQ, Do PN-T, Le U, Kretchmar M (2023b) The impacts of unanswerable questions on the robustness of machine reading comprehension models. In: Proceedings of the 17th conference of the european chapter of the association for computational linguistics, pp 1543\u20131557","DOI":"10.18653\/v1\/2023.eacl-main.113"},{"key":"11421_CR148","doi-asserted-by":"crossref","unstructured":"Tran S, Kretchmar M (2024) Towards robust extractive question answering models: rethinking the training methodology. In: Findings of the association for computational linguistics: EMNLP 2024, pp 2222\u20132236","DOI":"10.18653\/v1\/2024.findings-emnlp.121"},{"key":"11421_CR149","doi-asserted-by":"crossref","unstructured":"Trautmann D, Ostapuk N., Grail Q, Pol A, Bonifazi G, Gao S, Gajek M (2024) Measuring the groundedness of legal question-answering systems. In: Proceedings of the natural legal language processing workshop 2024, pp 176\u2013186","DOI":"10.18653\/v1\/2024.nllp-1.14"},{"key":"11421_CR150","doi-asserted-by":"crossref","unstructured":"Trischler A, Wang T, Yuan X, Harris J, Sordoni A, Bachman P, Suleman K (2017) Newsqa: A machine comprehension dataset. In: Proceedings of the 2nd workshop on representation learning for NLP, pp 191\u2013200","DOI":"10.18653\/v1\/W17-2623"},{"key":"11421_CR151","doi-asserted-by":"crossref","unstructured":"Tu M, Huang K, Wang G, Huang J, He X, Zhou B (2020) Select, answer and explain: Interpretable multi-hop reading comprehension over multiple documents. In: Proceedings of the AAAI conference on artificial intelligence, vol. 34, pp 9073\u20139080","DOI":"10.1609\/aaai.v34i05.6441"},{"key":"11421_CR152","doi-asserted-by":"crossref","unstructured":"Vakulenko S, Longpre S, Tu Z, Anantha R (2021) Question rewriting for conversational question answering. In: Proceedings of the 14th ACM international conference on web search and data mining, pp 355\u2013363","DOI":"10.1145\/3437963.3441748"},{"key":"11421_CR153","first-page":"7","volume":"2019","author":"E Wallace","year":"2019","unstructured":"Wallace E, Tuyls J, Wang J, Subramanian S, Gardner M, Singh S (2019a) Allennlp interpret: a framework for explaining predictions of nlp models. EMNLP-IJCNLP 2019:7","journal-title":"EMNLP-IJCNLP"},{"key":"11421_CR154","doi-asserted-by":"crossref","unstructured":"Wallace E, Feng S, Kandpal N, Gardner M, Singh S (2019b) Universal adversarial triggers for attacking and analyzing nlp. In: Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP), pp 2153\u20132162","DOI":"10.18653\/v1\/D19-1221"},{"key":"11421_CR155","doi-asserted-by":"crossref","unstructured":"Wang S, Jiang J (2016) Learning natural language inference with lstm. In: Proceedings of the 2016 conference of the North American chapter of the association for computational linguistics: human language technologies, pp 1442\u20131451","DOI":"10.18653\/v1\/N16-1170"},{"key":"11421_CR156","doi-asserted-by":"crossref","unstructured":"Wang W, Yang N, Wei F, Chang B, Zhou M (2017) Gated self-matching networks for reading comprehension and question answering. In: Proceedings of the 55th annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 189\u2013198","DOI":"10.18653\/v1\/P17-1018"},{"key":"11421_CR158","unstructured":"Wang X, Wei J, Schuurmans D, Le QV, Chi EH, Narang S, Chowdhery A, Zhou D (2022) Self-consistency improves chain of thought reasoning in language models. In: The eleventh international conference on learning representations"},{"key":"11421_CR159","doi-asserted-by":"crossref","unstructured":"Wan G, Wu Y, Chen J, Li S (2025) Reasoning aware self-consistency: Leveraging reasoning paths for efficient llm sampling. In: Proceedings of the 2025 conference of the nations of the Americas chapter of the association for computational linguistics: human language technologies (Volume 1: Long Papers), pp 3613\u20133635","DOI":"10.18653\/v1\/2025.naacl-long.184"},{"key":"11421_CR160","doi-asserted-by":"crossref","unstructured":"Weissenborn D, Wiese G, Seiffe L (2017) Making neural qa as simple as possible but not simpler. In: Proceedings of the 21st conference on computational natural language learning (CoNLL 2017), pp 271\u2013280","DOI":"10.18653\/v1\/K17-1028"},{"key":"11421_CR161","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1162\/tacl_a_00754","volume":"13","author":"B Wen","year":"2025","unstructured":"Wen B, Yao J, Feng S, Xu C, Tsvetkov Y, Howe B, Wang LL (2025) Know your limits: a survey of abstention in large language models. Transactions of the Association for Computational Linguistics 13:529\u2013556","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"11421_CR162","doi-asserted-by":"crossref","unstructured":"Wu T, Ribeiro MT, Heer J, Weld DS (2019) Errudite: Scalable, reproducible, and testable error analysis. In: Proceedings of the 57th annual meeting of the association for computational linguistics, pp 747\u2013763","DOI":"10.18653\/v1\/P19-1073"},{"key":"11421_CR163","doi-asserted-by":"crossref","unstructured":"Wu C-S, Madotto A, Liu W, Fung P, Xiong C (2022) Qaconv: Question answering on informative conversations. In: Proceedings of the 60th annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 5389\u20135411","DOI":"10.18653\/v1\/2022.acl-long.370"},{"key":"11421_CR164","unstructured":"Xie Q, Li Q, Yu Z, Zhang Y, Zhang Y, Yang L (2025) An empirical analysis of uncertainty in large language model evaluations. In: The thirteenth international conference on learning representations"},{"key":"11421_CR165","doi-asserted-by":"crossref","unstructured":"Xiong W, Yu M, Chang S, Guo X, Wang WY (2019) Improving question answering over incomplete kbs with knowledge-aware reader. In: Proceedings of the 57th annual meeting of the association for computational linguistics, pp 4258\u20134264","DOI":"10.18653\/v1\/P19-1417"},{"key":"11421_CR166","doi-asserted-by":"crossref","unstructured":"Xu W, Zhang H, Cai D, Lam W (2021) Dynamic semantic graph construction and reasoning for explainable multi-hop science question answering. In: Findings of the association for computational linguistics: ACL-IJCNLP 2021, pp 1044\u20131056","DOI":"10.18653\/v1\/2021.findings-acl.90"},{"key":"11421_CR167","doi-asserted-by":"crossref","unstructured":"Yahya M, Barbosa D, Berberich K, Wang Q, Weikum G (2016) Relationship queries on extended knowledge graphs. In: Proceedings of the ninth ACM international conference on web search and data mining, pp 605\u2013614","DOI":"10.1145\/2835776.2835795"},{"key":"11421_CR168","doi-asserted-by":"crossref","unstructured":"Yamada M, Arase Y (2025) Light-weight hallucination detection using contrastive learning for conditional text generation. In: Proceedings of the 63rd annual meeting of the association for computational linguistics (Volume 4: Student Research Workshop), pp 687\u2013694","DOI":"10.18653\/v1\/2025.acl-srw.44"},{"key":"11421_CR169","doi-asserted-by":"crossref","unstructured":"Yang Z, Qi P, Zhang S, Bengio Y, Cohen W, Salakhutdinov R, Manning CD (2018) Hotpotqa: A dataset for diverse, explainable multi-hop question answering. In: Proceedings of the 2018 conference on empirical methods in natural language processing. association for computational linguistics","DOI":"10.18653\/v1\/D18-1259"},{"key":"11421_CR170","doi-asserted-by":"crossref","unstructured":"Yang Y, Yih W-T, Meek C (2015) Wikiqa: A challenge dataset for open-domain question answering. In: Proceedings of the 2015 conference on empirical methods in natural language processing, pp 2013\u20132018","DOI":"10.18653\/v1\/D15-1237"},{"key":"11421_CR171","unstructured":"Yatskar M (2019) A qualitative comparison of coqa, squad 2.0 and quac. In: Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, Volume 1 (Long and Short Papers), pp 2318\u20132323"},{"key":"11421_CR172","doi-asserted-by":"crossref","unstructured":"Yen A-Z, Huang H-H, Chen H-H (2021) Unanswerable question correction in question answering over personal knowledge base. In: Proceedings of the AAAI conference on artificial intelligence, vol. 35, pp 14266\u201314275","DOI":"10.1609\/aaai.v35i16.17678"},{"key":"11421_CR173","doi-asserted-by":"crossref","unstructured":"Yen A-Z, Huang H-H, Chen H-H (2022) Unanswerable question correction and explanation over personal knowledge base. In: Proceedings of the 31st ACM International conference on information & knowledge management, pp 4645\u20134649","DOI":"10.1145\/3511808.3557717"},{"key":"11421_CR174","unstructured":"Yih SW-T., Chang M-W, Meek C, Pastusiak A (2013) Question answering using enhanced lexical semantic models. In: Proceedings of the 51st annual meeting of the association for computational linguistics"},{"key":"11421_CR175","doi-asserted-by":"crossref","unstructured":"Yin Z, Sun Q, Guo Q, Wu J, Qiu X, Huang X-J (2023) Do large language models know what they dont know? In: Findings of the association for computational linguistics: ACL 2023, pp 8653\u20138665","DOI":"10.18653\/v1\/2023.findings-acl.551"},{"key":"11421_CR176","doi-asserted-by":"crossref","unstructured":"Yoon C, Kim G, Jeon B, Kim S, Jo Y, Kang J (2025) Ask optimal questions: aligning large language models with retrievers preference in conversation. In: Findings of the association for computational linguistics: NAACL 2025, pp 5899\u20135921","DOI":"10.18653\/v1\/2025.findings-naacl.328"},{"key":"11421_CR177","unstructured":"Yu AW, Dohan D, Luong M-T, Zhao R, Chen K, Norouzi M, Le QV (2018) Qanet: Combining local convolution with global self-attention for reading comprehension. In: International conference on learning representations"},{"key":"11421_CR178","unstructured":"Yu Z, He L, Wu Z, Dai X, Chen J (2023a) Towards better chain-of-thought prompting strategies: a survey. arXiv preprint arXiv:2310.04959"},{"key":"11421_CR179","doi-asserted-by":"crossref","unstructured":"Yu X, Min S, Zettlemoyer L, Hajishirzi H (2023b) Crepe: Open-domain question answering with false presuppositions. In: Proceedings of the 61st annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 10457\u201310480","DOI":"10.18653\/v1\/2023.acl-long.583"},{"issue":"12","key":"11421_CR180","doi-asserted-by":"publisher","first-page":"3151","DOI":"10.1007\/s10115-022-01744-y","volume":"64","author":"M Zaib","year":"2022","unstructured":"Zaib M, Zhang WE, Sheng QZ, Mahmood A, Zhang Y (2022) Conversational question answering: a survey. Knowl Inf Syst 64(12):3151\u20133195","journal-title":"Knowl Inf Syst"},{"issue":"21","key":"11421_CR181","doi-asserted-by":"publisher","DOI":"10.3390\/app10217640","volume":"10","author":"C Zeng","year":"2020","unstructured":"Zeng C, Li S, Li Q, Hu J, Hu J (2020) A survey on machine reading comprehension tasks, evaluation metrics and benchmark datasets. Appl Sci 10(21):7640","journal-title":"Appl Sci"},{"key":"11421_CR182","doi-asserted-by":"crossref","unstructured":"Zhang Z, Wu Y, Zhou J, Duan S, Zhao H, Wang R (2020) Sg-net: Syntax-guided machine reading comprehension. In: Proceedings of the AAAI conference on artificial intelligence, vol. 34, pp 9636\u20139643","DOI":"10.1609\/aaai.v34i05.6511"},{"key":"11421_CR183","doi-asserted-by":"crossref","unstructured":"Zhang Z, Yang J, Zhao H (2021) Retrospective reader for machine reading comprehension. In: Proceedings of the AAAI conference on artificial intelligence, vol. 35, pp 14506\u201314514","DOI":"10.1609\/aaai.v35i16.17705"},{"key":"11421_CR184","doi-asserted-by":"crossref","unstructured":"Zhang J, Zhang X, Yu J, Tang J, Tang J, Li C, Chen H (2022) Subgraph retrieval enhanced model for multi-hop knowledge base question answering. In: Proceedings of the 60th annual meeting of the association for computational linguistics (Volume 1: Long Papers), pp 5773\u20135784","DOI":"10.18653\/v1\/2022.acl-long.396"},{"key":"11421_CR185","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.aiopen.2022.12.003","volume":"4","author":"L Zhang","year":"2023","unstructured":"Zhang L, Zhang J, Ke X, Li H, Huang X, Shao Z, Cao S, Lv X (2023) A survey on complex factual question answering. AI Open 4:1\u201312","journal-title":"AI Open"},{"key":"11421_CR186","unstructured":"Zhang Q-W, Li F, Wang J, Qiao L, Yu Y, Yin D, Sun X (2025a) Factguard: Leveraging multi-agent systems to generate answerable and unanswerable questions for enhanced long-context llm extraction. arXiv e-prints, 2504"},{"issue":"8","key":"11421_CR187","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3719341","volume":"57","author":"Z Zhang","year":"2025","unstructured":"Zhang Z, Yao Y, Zhang A, Tang X, Ma X, He Z, Wang Y, Gerstein M, Wang R, Liu G (2025b) Igniting language intelligence: the hitchhikers guide from chain-of-thought reasoning to language agents. ACM Comput Surv 57(8):1\u201339","journal-title":"ACM Comput Surv"},{"key":"11421_CR188","doi-asserted-by":"crossref","unstructured":"Zhao W, Gao G, Cardie C, Rush AM (2024) I could ve asked that: reformulating unanswerable questions. In: Proceedings of the 2024 conference on empirical methods in natural language processing, pp 4207\u20134220","DOI":"10.18653\/v1\/2024.emnlp-main.242"},{"key":"11421_CR189","unstructured":"Zhong Y, Wang H, Xing EP (2022) Mrclens: an mrc dataset bias detection toolkit. arXiv preprint arXiv:2207.08943"},{"issue":"5","key":"11421_CR190","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10050593","volume":"10","author":"J Zhou","year":"2021","unstructured":"Zhou J, Gandomi AH, Chen F, Holzinger A (2021) Evaluating the quality of machine learning explanations: a survey on methods and metrics. Electronics 10(5):593","journal-title":"Electronics"},{"key":"11421_CR191","unstructured":"Zhou W, Jiang YE, Li L, Wu J, Wang T, Wang S, Chen J, Zhang J, Chen J, Tang X, et al (2024) Agents: An open-source framework for autonomous language agents. In: ICLR 2024 workshop on large language model (LLM) agents"},{"key":"11421_CR192","doi-asserted-by":"crossref","unstructured":"Zhu H, Dong L, Wei F, Wang W, Qin B, Liu T (2019) Learning to ask unanswerable questions for machine reading comprehension. In: Proceedings of the 57th annual meeting of the association for computational linguistics, pp 4238\u20134248","DOI":"10.18653\/v1\/P19-1415"},{"key":"11421_CR193","unstructured":"Zhu F, Lei W, Wang C, Zheng J, Poria S, Chua T-S (2021) Retrieving and reading: a comprehensive survey on open-domain question answering. arXiv preprint arXiv:2101.00774"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11421-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-025-11421-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11421-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T03:12:26Z","timestamp":1769483546000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-025-11421-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,25]]},"references-count":192,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,1]]}},"alternative-id":["11421"],"URL":"https:\/\/doi.org\/10.1007\/s10462-025-11421-5","relation":{},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,25]]},"assertion":[{"value":"3 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"23"}}