{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T04:10:32Z","timestamp":1743826232060,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031887079","type":"print"},{"value":"9783031887086","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-88708-6_20","type":"book-chapter","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T11:51:57Z","timestamp":1743767517000},"page":"310-325","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing FEVER-Style Claim Fact-Checking Against Wikipedia: A Diagnostic Taxonomy and\u00a0a\u00a0Generative Framework"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4839-4269","authenticated-orcid":false,"given":"Anton","family":"Chernyavskiy","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5484-372X","authenticated-orcid":false,"given":"Dmitry","family":"Ilvovsky","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3600-1510","authenticated-orcid":false,"given":"Preslav","family":"Nakov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,4,3]]},"reference":[{"key":"20_CR1","unstructured":"Chen, J., Chen, L., Huang, H., Zhou, T.: When do you need chain-of-thought prompting for ChatGPT?. CoRR, abs\/2304.03262 (2023). https:\/\/doi.org\/10.48550\/arXiv.2304.03262"},{"key":"20_CR2","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901 (2020)"},{"key":"20_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1007\/978-3-030-72240-1_75","volume-title":"Advances in Information Retrieval","author":"P Nakov","year":"2021","unstructured":"Nakov, P., et al.: The CLEF-2021 CheckThat! Lab on detecting check-worthy claims, previously fact-checked claims, and fake news. In: Hiemstra, D., Moens, M.-F., Mothe, J., Perego, R., Potthast, M., Sebastiani, F. (eds.) ECIR 2021. LNCS, vol. 12657, pp. 639\u2013649. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-72240-1_75"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Gad-Elrab, M., Stepanova, D., Urbani, J., Weikum, G.: Tracy: tracing facts over knowledge graphs and text. IN: Proceedings of the World Wide Web Conference, pp. 3516\u20133520 (2019)","DOI":"10.1145\/3308558.3314126"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Nie, Y., Chen, H., Bansal, M.: Combining fact extraction and verification with neural semantic matching networks. In: Proceedings Of The AAAI Conference On Artificial Intelligence, pp. 6859\u20136866 (2019)","DOI":"10.1609\/aaai.v33i01.33016859"},{"key":"20_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3485127","volume":"55","author":"G Bekoulis","year":"2020","unstructured":"Bekoulis, G., Papagiannopoulou, C., Deligiannis, N.: A review on fact extraction and verification. ACM Comput. Surv. (CSUR) 55, 1\u201335 (2020)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Guo, Z., Schlichtkrull, M., Vlachos, A.: A survey on automated fact-checking. Trans. Assoc. Comput. Linguist. 10, 178\u2013206 (2022). https:\/\/doi.org\/10.1162\/tacl%5C_a%5C_00454","DOI":"10.1162\/tacl_a_00454"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Morris, J., et al. TextAttack: a framework for adversarial attacks, data augmentation, and adversarial training in NLP. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 119\u2013126 (2020)","DOI":"10.18653\/v1\/2020.emnlp-demos.16"},{"key":"20_CR9","unstructured":"Wang, A., et al.: SuperGLUE: a stickier benchmark for general-purpose language understanding systems. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"20_CR10","unstructured":"Wang, B., et al.: Adversarial GLUE: a multi-task benchmark for robustness evaluation of language models. In: Advances in Neural Information Processing Systems (2021)"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Meyer, S., Elsweiler, D., Ludwig, B., Fernandez-Pichel, M., Losada, D.: Do we still need human assessors? Prompt-based GPT-3 user simulation in conversational AI. In: Proceedings of the 4th Conference on Conversational User Interfaces (2022). https:\/\/doi.org\/10.1145\/3543829.3544529","DOI":"10.1145\/3543829.3544529"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Wang, S., Liu, Y., Xu, Y., Zhu, C., Zeng, M.: Want to reduce labeling cost? GPT-3 can help. arXiv abs\/2108.13487 (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.354"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Bonifacio, L., Abonizio, H., Fadaee, M., Nogueira, R.: InPars: unsupervised dataset generation for information retrieval. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2387\u20132392 (2022). https:\/\/doi.org\/10.1145\/3477495.3531863","DOI":"10.1145\/3477495.3531863"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Nakov, P., et al.: Automated fact-checking for assisting human fact-checkers. In: Proceedings of the 30th International Joint Conference on Artificial Intelligence, pp. 4551\u20134558 (2021)","DOI":"10.24963\/ijcai.2021\/619"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Gu, Z., Fan, J., Tang, N., Nakov, P., Zhao, X. , Du, X.: PASTA: table-operations aware fact verification via sentence-table cloze pre-training. In: Proceedings of the 2022 Conference on Empirical Methods In Natural Language Processing (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.331"},{"key":"20_CR16","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1023\/B:BTTJ.0000047600.45421.6d","volume":"22","author":"H Liu","year":"2004","unstructured":"Liu, H., Singh, P.: ConceptNet - a practical commonsense reasoning tool-kit. BT Technol. J. 22, 211\u2013226 (2004)","journal-title":"BT Technol. J."},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Speer, R., Chin, J., Havasi, C.: ConceptNet 5.5: an open multilingual graph of general knowledge. In: AAAI, pp. 4444\u20134451 (2017)","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"20_CR18","unstructured":"Jackendoff, R. Semantic Structures. MIT Press, Cambridge (1990)"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Guo, Z., Schlichtkrull, M., Vlachos, A.: A survey on automated fact-checking. Trans. Assoc. Comput. Linguist. 10, 178\u2013206 (2022). https:\/\/aclanthology.org\/2022.tacl-1.11","DOI":"10.1162\/tacl_a_00454"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Thorne, J., Vlachos, A., Cocarascu, O., Christodoulopoulos, C., Mittal, A.: The Fact Extraction and VERification (FEVER) shared task. In: Proceedings of the First Workshop on Fact Extraction and VERification (FEVER), pp. 1\u20139 (2018). https:\/\/aclanthology.org\/W18-5501","DOI":"10.18653\/v1\/W18-5501"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Thorne, J., Vlachos, A., Cocarascu, O., Christodoulopoulos, C., Mittal, A.: The FEVER2.0 shared task. In: Proceedings of the Second Workshop on Fact Extraction And VERification (FEVER). pp. 1\u20136 (2019). https:\/\/aclanthology.org\/D19-6601","DOI":"10.18653\/v1\/D19-6601"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Nie, Y., Williams, A., Dinan, E., Bansal, M., Weston, J., Kiela, D.: Adversarial NLI: a new benchmark for natural language understanding. In: Proceedings of the 58th Annual Meeting of the Association For Computational Linguistics, pp. 4885\u20134901 (2020). https:\/\/aclanthology.org\/2020.acl-main.441","DOI":"10.18653\/v1\/2020.acl-main.441"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"R\u00f6ttger, P., Vidgen, B., Nguyen, D., Waseem, Z., Margetts, H., Pierrehumbert, J.: HateCheck: functional tests for hate speech detection models. In: Proceedings of the 59th Annual Meeting of the Association For Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 41\u201358 (2021). https:\/\/aclanthology.org\/2021.acl-long.4","DOI":"10.18653\/v1\/2021.acl-long.4"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Wallace, E., Feng, S., Kandpal, N., Gardner, M., Singh, S.: Universal adversarial triggers for attacking and analyzing NLP. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 2153\u20132162 (2019). https:\/\/aclanthology.org\/D19-1221","DOI":"10.18653\/v1\/D19-1221"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Reddy, A., Rocha, G., Esteves, D.: DeFactoNLP: fact verification using entity recognition, TFIDF vector comparison and decomposable attention. In: Proceedings of the First Workshop on Fact Extraction and VERification (FEVER), pp. 132\u2013137 (2018). https:\/\/aclanthology.org\/W18-5522","DOI":"10.18653\/v1\/W18-5522"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Hidey, C., Diab, M.: Team SWEEPer: joint sentence extraction and fact checking with pointer networks. In: Proceedings of the First Workshop on fact Extraction and VERification (FEVER), pp. 150\u2013155 (2018). https:\/\/aclanthology.org\/W18-5525","DOI":"10.18653\/v1\/W18-5525"},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Stammbach, D., Neumann, G.: Team DOMLIN: exploiting evidence enhancement for the FEVER shared task. In: Proceedings of the Second Workshop on fact Extraction and VERification (FEVER), pp. 105\u2013109 (2019). https:\/\/aclanthology.org\/D19-6616","DOI":"10.18653\/v1\/D19-6616"},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Chernyavskiy, A., Ilvovsky, D.: Extract and aggregate: a novel domain-independent approach to factual data verification. In: Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER), pp. 69\u201378 (2019). https:\/\/aclanthology.org\/D19-6612","DOI":"10.18653\/v1\/D19-6612"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Subramanian, S., Lee, K.: Hierarchical evidence set modeling for automated fact extraction and verification. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 7798\u20137809 (2020). https:\/\/aclanthology.org\/2020.emnlp-main.627","DOI":"10.18653\/v1\/2020.emnlp-main.627"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Thorne, J., Vlachos, A., Christodoulopoulos, C., Mittal, A.: FEVER: a large-scale dataset for fact extraction and verification. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), pp. 809\u2013819 (2018). https:\/\/aclanthology.org\/N18-1074","DOI":"10.18653\/v1\/N18-1074"},{"key":"20_CR31","doi-asserted-by":"crossref","unstructured":"Thorne, J., Vlachos, A., Christodoulopoulos, C., Mittal, A.: Evaluating adversarial attacks against multiple fact verification systems. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp. 2944\u20132953 (2019). https:\/\/aclanthology.org\/D19-1292","DOI":"10.18653\/v1\/D19-1292"},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Niewinski, P., Pszona, M., Janicka, M.: GEM: generative enhanced model for adversarial attacks. In: Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER), pp. 20\u201326 (2019). https:\/\/aclanthology.org\/D19-6604","DOI":"10.18653\/v1\/D19-6604"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Kim, Y., Allan, J.: FEVER breaker\u2019s run of team NbAuzDrLqg. In: Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER), pp. 99\u2013104 (2019). https:\/\/aclanthology.org\/D19-6615","DOI":"10.18653\/v1\/D19-6615"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.: GLUE: a multi-task benchmark and analysis platform for natural language understanding. In: Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP, pp. 353\u2013355 (2018). https:\/\/aclanthology.org\/W18-5446","DOI":"10.18653\/v1\/W18-5446"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Joshi, P., Aditya, S., Sathe, A., Choudhury, M.: TaxiNLI: taking a ride up the NLU hill. In: Proceedings of the 24th Conference on Computational Natural Language Learning, pp. 41\u201355 (2020). https:\/\/aclanthology.org\/2020.conll-1.4","DOI":"10.18653\/v1\/2020.conll-1.4"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Ribeiro, M., Wu, T., Guestrin, C., Singh, S.: Beyond accuracy: behavioral testing of NLP models with checklist. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 4902\u20134912 (2020). https:\/\/aclanthology.org\/2020.acl-main.442","DOI":"10.18653\/v1\/2020.acl-main.442"},{"key":"20_CR37","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: PromDA: prompt-based data augmentation for low-resource NLU tasks. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 4242\u20134255 (2022). https:\/\/aclanthology.org\/2022.acl-long.292","DOI":"10.18653\/v1\/2022.acl-long.292"},{"key":"20_CR38","doi-asserted-by":"crossref","unstructured":"Park, J., Min, S., Kang, J., Zettlemoyer, L., Hajishirzi, H.: FaVIQ: FAct verification from information-seeking questions. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 5154\u20135166 (2022). https:\/\/aclanthology.org\/2022.acl-long.354","DOI":"10.18653\/v1\/2022.acl-long.354"},{"key":"20_CR39","doi-asserted-by":"crossref","unstructured":"Wadden, D., et al.: Fact or fiction: verifying scientific claims. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 7534\u20137550 (2020). https:\/\/aclanthology.org\/2020.emnlp-main.609","DOI":"10.18653\/v1\/2020.emnlp-main.609"},{"key":"20_CR40","doi-asserted-by":"crossref","unstructured":"Miller, G.: WordNet: a lexical database for English. In: Human Language Technology: Proceedings of a Workshop Held at Plainsboro, New Jersey, 8\u201311 March 1994 (1994). https:\/\/aclanthology.org\/H94-1111","DOI":"10.3115\/1075812.1075938"},{"key":"20_CR41","doi-asserted-by":"crossref","unstructured":"Aly, R., et al.: The Fact Extraction and VERification Over Unstructured and Structured information (FEVEROUS) shared task. In: Proceedings of the Fourth Workshop on Fact Extraction and VERification (FEVER), pp. 1\u201313 (2021)","DOI":"10.18653\/v1\/2021.fever-1.1"},{"key":"20_CR42","doi-asserted-by":"crossref","unstructured":"DeHaven, M., Scott, S.: BEVERS: a general, simple, and performant framework for automatic fact verification. In: Proceedings of the Sixth Fact Extraction and VERification Workshop (FEVER), pp. 58\u201365. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2023.fever-1.6","DOI":"10.18653\/v1\/2023.fever-1.6"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-88708-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T11:52:26Z","timestamp":1743767546000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-88708-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031887079","9783031887086"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-88708-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"3 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Our dataset is relatively small, but it is typical for diagnostic datasets. Note also that this is a test dataset: since we have many classes, its total size is not that small. There is a need for a more comprehensive taxonomy and also for a larger number of examples. Moreover, we focus on English only; there is a need for such research for other languages.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Limitations"}},{"value":"Our dataset can be misused to unfairly moderate content. Thus, we ask researchers and potential users to exercise caution. The intended use is to help improve research systems by means of providing a way to analyze their limitations.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics and Broader Impact"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lucca","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"47","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecir2025.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}