{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T16:19:27Z","timestamp":1773937167944,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T00:00:00Z","timestamp":1773878400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T00:00:00Z","timestamp":1773878400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Research Organization of Information and Systems, Japan","award":["2024-ISMCRP-4307"],"award-info":[{"award-number":["2024-ISMCRP-4307"]}]},{"name":"Research Organization of Information and Systems, Japan","award":["2024-ISMCRP-4307"],"award-info":[{"award-number":["2024-ISMCRP-4307"]}]},{"name":"Research Organization of Information and Systems, Japan","award":["2024-ISMCRP-4307"],"award-info":[{"award-number":["2024-ISMCRP-4307"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Data Sci Anal"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1007\/s41060-025-00998-3","type":"journal-article","created":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:36:08Z","timestamp":1773930968000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Boosting large-language models for fact-checking: leveraging verbalized tabular data as evidence"],"prefix":"10.1007","volume":"22","author":[{"given":"Son T.","family":"Luu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Trung","family":"Vo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vu","family":"Tran","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tomoko","family":"Matsui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Le-Minh","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,3,19]]},"reference":[{"key":"998_CR1","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1162\/tacl_a_00454","volume":"10","author":"Z Guo","year":"2022","unstructured":"Guo, Z., Schlichtkrull, M., Vlachos, A.: A survey on automated fact-checking. Transactions of the Association for Computational Linguistics 10, 178\u2013206 (2022). https:\/\/doi.org\/10.1162\/tacl_a_00454","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"998_CR2","doi-asserted-by":"crossref","unstructured":"Hassan, N., Li, C., Tremayne, M.: Detecting check-worthy factual claims in presidential debates. In: Proceedings of the 24th Acm International on Conference on Information and Knowledge Management, pp. 1835\u20131838 (2015)","DOI":"10.1145\/2806416.2806652"},{"key":"998_CR3","doi-asserted-by":"publisher","unstructured":"Akhtar, M., Schlichtkrull, M., Guo, Z., Cocarascu, O., Simperl, E., Vlachos, A.: Multimodal automated fact-checking: A survey. In: Bouamor, H., Pino, J., Bali, K. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2023, pp. 5430\u20135448. Association for Computational Linguistics, Singapore (2023). https:\/\/doi.org\/10.18653\/v1\/2023.findings-emnlp.361. https:\/\/aclanthology.org\/2023.findings-emnlp.361\/","DOI":"10.18653\/v1\/2023.findings-emnlp.361"},{"key":"998_CR4","doi-asserted-by":"crossref","unstructured":"Jin, N., Siebert, J., Li, D., Chen, Q.: A survey on table question answering: recent advances. In: China Conference on Knowledge Graph and Semantic Computing, pp. 174\u2013186. Springer (2022)","DOI":"10.1007\/978-981-19-7596-7_14"},{"key":"998_CR5","doi-asserted-by":"crossref","unstructured":"Luu, S.T., Nguyen, H., Vo, T., Nguyen, L.-M.: Zefav: Boosting large language models for zero-shot fact verification. In: Pacific Rim International Conference on Artificial Intelligence, pp. 288\u2013295. Springer (2024)","DOI":"10.1007\/978-981-96-0119-6_28"},{"key":"998_CR6","doi-asserted-by":"publisher","unstructured":"Pan, L., Wu, X., Lu, X., Luu, A.T., Wang, W.Y., Kan, M.-Y., Nakov, P.: Fact-checking complex claims with program-guided reasoning. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 6981\u20137004. Association for Computational Linguistics, Toronto, Canada (2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.386. https:\/\/aclanthology.org\/2023.acl-long.386\/","DOI":"10.18653\/v1\/2023.acl-long.386"},{"key":"998_CR7","unstructured":"Qin, L., Chen, Q., Feng, X., Wu, Y., Zhang, Y., Li, Y., Li, M., Che, W., Yu, P.S.: Large language models meet nlp: A survey. arXiv preprint arXiv:2405.12819 (2024)"},{"key":"998_CR8","unstructured":"Vykopal, I., Pikuliak, M., Ostermann, S., \u0160imko, M.: Generative large language models in automated fact-checking: A survey. arXiv preprint arXiv:2407.02351 (2024)"},{"key":"998_CR9","doi-asserted-by":"publisher","unstructured":"Sui, Y., Zhou, M., Zhou, M., Han, S., Zhang, D.: Table meets llm: Can large language models understand structured table data? a benchmark and empirical study. In: Proceedings of the 17th ACM International Conference on Web Search and Data Mining. WSDM \u201924, pp. 645\u2013654. Association for Computing Machinery, New York, NY, USA (2024). https:\/\/doi.org\/10.1145\/3616855.3635752","DOI":"10.1145\/3616855.3635752"},{"key":"998_CR10","doi-asserted-by":"publisher","unstructured":"Deng, N., Sun, Z., He, R., Sikka, A., Chen, Y., Ma, L., Zhang, Y., Mihalcea, R.: Tables as texts or images: Evaluating the table reasoning ability of LLMs and MLLMs. In: Ku, L.-W., Martins, A., Srikumar, V. (eds.) Findings of the Association for Computational Linguistics: ACL 2024, pp. 407\u2013426. Association for Computational Linguistics, Bangkok, Thailand (2024). https:\/\/doi.org\/10.18653\/v1\/2024.findings-acl.23. https:\/\/aclanthology.org\/2024.findings-acl.23\/","DOI":"10.18653\/v1\/2024.findings-acl.23"},{"key":"998_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.datak.2024.102313","volume":"152","author":"A Remadi","year":"2024","unstructured":"Remadi, A., El Hage, K., Hobeika, Y., Bugiotti, F.: To prompt or not to prompt: Navigating the use of large language models for integrating and modeling heterogeneous data. Data & Knowledge Engineering 152, 102313 (2024)","journal-title":"Data & Knowledge Engineering"},{"key":"998_CR12","doi-asserted-by":"publisher","unstructured":"Zhao, Y., Qi, Z., Nan, L., Mi, B., Liu, Y., Zou, W., Han, S., Chen, R., Tang, X., Xu, Y., Radev, D., Cohan, A.: QTSumm: Query-focused summarization over tabular data. In: Bouamor, H., Pino, J., Bali, K. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 1157\u20131172. Association for Computational Linguistics, Singapore (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.74. https:\/\/aclanthology.org\/2023.emnlp-main.74\/","DOI":"10.18653\/v1\/2023.emnlp-main.74"},{"key":"998_CR13","unstructured":"Moosavi, N.S., R\u00fcckl\u00e9, A., Roth, D., Gurevych, I.: Scigen: a dataset for reasoning-aware text generation from scientific tables. In: Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2) (2021)"},{"key":"998_CR14","unstructured":"Grattafiori, A., Dubey, A., Jauhri, A., Pandey, A., Kadian, A., Al-Dahle, A., Letman, A., Mathur, A., Schelten, A., Vaughan, A., et al.: The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"998_CR15","unstructured":"Yang, A., Yang, B., Zhang, B., Hui, B., Zheng, B., Yu, B., Li, C., Liu, D., Huang, F., Wei, H., et al.: Qwen2. 5 technical report. arXiv preprint arXiv:2412.15115 (2024)"},{"key":"998_CR16","unstructured":"Guo, D., Yang, D., Zhang, H., Song, J., Zhang, R., Xu, R., Zhu, Q., Ma, S., Wang, P., Bi, X., et al.: Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)"},{"key":"998_CR17","unstructured":"Chen, W., Wang, H., Chen, J., Zhang, Y., Wang, H., Li, S., Zhou, X., Wang, W.Y.: Tabfact: A large-scale dataset for table-based fact verification. arXiv preprint arXiv:1909.02164 (2019)"},{"key":"998_CR18","doi-asserted-by":"publisher","unstructured":"Lu, X., Pan, L., Liu, Q., Nakov, P., Kan, M.-Y.: SCITAB: A challenging benchmark for compositional reasoning and claim verification on scientific tables. In: Bouamor, H., Pino, J., Bali, K. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 7787\u20137813. Association for Computational Linguistics, Singapore (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.483. https:\/\/aclanthology.org\/2023.emnlp-main.483\/","DOI":"10.18653\/v1\/2023.emnlp-main.483"},{"key":"998_CR19","doi-asserted-by":"publisher","unstructured":"Akhtar, M., Cocarascu, O., Simperl, E.: PubHealthTab: A public health table-based dataset for evidence-based fact checking. In: Carpuat, M., Marneffe, M.-C., Meza\u00a0Ruiz, I.V. (eds.) Findings of the Association for Computational Linguistics: NAACL 2022, pp. 1\u201316. Association for Computational Linguistics, Seattle, United States (2022). https:\/\/doi.org\/10.18653\/v1\/2022.findings-naacl.1. https:\/\/aclanthology.org\/2022.findings-naacl.1\/","DOI":"10.18653\/v1\/2022.findings-naacl.1"},{"key":"998_CR20","doi-asserted-by":"publisher","unstructured":"Kotonya, N., Toni, F.: Explainable automated fact-checking: A survey. In: Scott, D., Bel, N., Zong, C. (eds.) Proceedings of the 28th International Conference on Computational Linguistics, pp. 5430\u20135443. International Committee on Computational Linguistics, Barcelona, Spain (Online) (2020). https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.474. https:\/\/aclanthology.org\/2020.coling-main.474\/","DOI":"10.18653\/v1\/2020.coling-main.474"},{"key":"998_CR21","doi-asserted-by":"publisher","unstructured":"Thorne, J., Vlachos, A., Christodoulopoulos, C., Mittal, A.: FEVER: a large-scale dataset for fact extraction and VERification. In: Walker, M., Ji, H., Stent, A. (eds.) Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), pp. 809\u2013819. Association for Computational Linguistics, New Orleans, Louisiana (2018). https:\/\/doi.org\/10.18653\/v1\/N18-1074. https:\/\/aclanthology.org\/N18-1074\/","DOI":"10.18653\/v1\/N18-1074"},{"key":"998_CR22","doi-asserted-by":"publisher","unstructured":"Wang, W.Y.: \u201cliar, liar pants on fire\u201d: A new benchmark dataset for fake news detection. In: Barzilay, R., Kan, M.-Y. (eds.) Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 422\u2013426. Association for Computational Linguistics, Vancouver, Canada (2017). https:\/\/doi.org\/10.18653\/v1\/P17-2067. https:\/\/aclanthology.org\/P17-2067\/","DOI":"10.18653\/v1\/P17-2067"},{"key":"998_CR23","doi-asserted-by":"publisher","unstructured":"Jiang, Y., Bordia, S., Zhong, Z., Dognin, C., Singh, M., Bansal, M.: HoVer: A dataset for many-hop fact extraction and claim verification. In: Cohn, T., He, Y., Liu, Y. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 3441\u20133460. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.309. https:\/\/aclanthology.org\/2020.findings-emnlp.309\/","DOI":"10.18653\/v1\/2020.findings-emnlp.309"},{"key":"998_CR24","unstructured":"Somvanshi, S., Das, S., Javed, S.A., Antariksa, G., Hossain, A.: A survey on deep tabular learning. arXiv preprint arXiv:2410.12034 (2024)"},{"key":"998_CR25","doi-asserted-by":"crossref","unstructured":"Aly, R., Guo, Z., Schlichtkrull, M., Thorne, J., Vlachos, A., Christodoulopoulos, C., Cocarascu, O., Mittal, A.: Feverous: Fact extraction and verification over unstructured and structured information. arXiv preprint arXiv:2106.05707 (2021)","DOI":"10.18653\/v1\/2021.fever-1.1"},{"key":"998_CR26","doi-asserted-by":"publisher","unstructured":"Gupta, V., Mehta, M., Nokhiz, P., Srikumar, V.: INFOTABS: Inference on tables as semi-structured data. In: Jurafsky, D., Chai, J., Schluter, N., Tetreault, J. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 2309\u20132324. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.210. https:\/\/aclanthology.org\/2020.acl-main.210\/","DOI":"10.18653\/v1\/2020.acl-main.210"},{"key":"998_CR27","doi-asserted-by":"publisher","unstructured":"Wang, N.X.R., Mahajan, D., Danilevsky, M., Rosenthal, S.: SemEval-2021 task 9: Fact verification and evidence finding for tabular data in scientific documents (SEM-TAB-FACTS). In: Palmer, A., Schneider, N., Schluter, N., Emerson, G., Herbelot, A., Zhu, X. (eds.) Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021), pp. 317\u2013326. Association for Computational Linguistics, Online (2021). https:\/\/doi.org\/10.18653\/v1\/2021.semeval-1.39. https:\/\/aclanthology.org\/2021.semeval-1.39\/","DOI":"10.18653\/v1\/2021.semeval-1.39"},{"key":"998_CR28","doi-asserted-by":"publisher","unstructured":"Herzig, J., Nowak, P.K., M\u00fcller, T., Piccinno, F., Eisenschlos, J.: TaPas: Weakly supervised table parsing via pre-training. In: Jurafsky, D., Chai, J., Schluter, N., Tetreault, J. (eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 4320\u20134333. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.398. https:\/\/aclanthology.org\/2020.acl-main.398\/","DOI":"10.18653\/v1\/2020.acl-main.398"},{"key":"998_CR29","doi-asserted-by":"publisher","unstructured":"Jiang, Z., Mao, Y., He, P., Neubig, G., Chen, W.: OmniTab: Pretraining with natural and synthetic data for few-shot table-based question answering. In: Carpuat, M., Marneffe, M.-C., Meza\u00a0Ruiz, I.V. (eds.) Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 932\u2013942. Association for Computational Linguistics, Seattle, United States (2022). https:\/\/doi.org\/10.18653\/v1\/2022.naacl-main.68. https:\/\/aclanthology.org\/2022.naacl-main.68\/","DOI":"10.18653\/v1\/2022.naacl-main.68"},{"key":"998_CR30","unstructured":"Liu, Q., Chen, B., Guo, J., Ziyadi, M., Lin, Z., Chen, W., Lou, J.-G.: Tapex: Table pre-training via learning a neural sql executor. arXiv preprint arXiv:2107.07653 (2021)"},{"key":"998_CR31","doi-asserted-by":"publisher","unstructured":"Zhao, Y., Nan, L., Qi, Z., Zhang, R., Radev, D.: ReasTAP: Injecting table reasoning skills during pre-training via synthetic reasoning examples. In: Goldberg, Y., Kozareva, Z., Zhang, Y. (eds.) Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 9006\u20139018. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (2022). https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.615. https:\/\/aclanthology.org\/2022.emnlp-main.615\/","DOI":"10.18653\/v1\/2022.emnlp-main.615"},{"key":"998_CR32","doi-asserted-by":"publisher","unstructured":"Zhao, Y., Zhang, H., Si, S., Nan, L., Tang, X., Cohan, A.: Investigating table-to-text generation capabilities of large language models in real-world information seeking scenarios. In: Wang, M., Zitouni, I. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track, pp. 160\u2013175. Association for Computational Linguistics, Singapore (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-industry.17. https:\/\/aclanthology.org\/2023.emnlp-industry.17\/","DOI":"10.18653\/v1\/2023.emnlp-industry.17"},{"key":"998_CR33","doi-asserted-by":"publisher","unstructured":"Min, S., Krishna, K., Lyu, X., Lewis, M., Yih, W.-t., Koh, P., Iyyer, M., Zettlemoyer, L., Hajishirzi, H.: FActScore: Fine-grained atomic evaluation of factual precision in long form text generation. In: Bouamor, H., Pino, J., Bali, K. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 12076\u201312100. Association for Computational Linguistics, Singapore (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.741. https:\/\/aclanthology.org\/2023.emnlp-main.741\/","DOI":"10.18653\/v1\/2023.emnlp-main.741"},{"key":"998_CR34","doi-asserted-by":"publisher","unstructured":"Wu, Z., Feng, Y.: ProTrix: Building models for planning and reasoning over tables with sentence context. In: Al-Onaizan, Y., Bansal, M., Chen, Y.-N. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2024, pp. 4378\u20134406. Association for Computational Linguistics, Miami, Florida, USA (2024). https:\/\/doi.org\/10.18653\/v1\/2024.findings-emnlp.253. https:\/\/aclanthology.org\/2024.findings-emnlp.253\/","DOI":"10.18653\/v1\/2024.findings-emnlp.253"},{"key":"998_CR35","doi-asserted-by":"crossref","unstructured":"Lu, X., Pan, L., Ma, Y., Nakov, P., Kan, M.-Y.: Tart: An open-source tool-augmented framework for explainable table-based reasoning. arXiv preprint arXiv:2409.11724 (2024)","DOI":"10.18653\/v1\/2025.findings-naacl.244"},{"key":"998_CR36","doi-asserted-by":"publisher","unstructured":"Liu, T., Wang, F., Chen, M.: Rethinking tabular data understanding with large language models. In: Duh, K., Gomez, H., Bethard, S. (eds.) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 450\u2013482. Association for Computational Linguistics, Mexico City, Mexico (2024). https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.26. https:\/\/aclanthology.org\/2024.naacl-long.26\/","DOI":"10.18653\/v1\/2024.naacl-long.26"},{"key":"998_CR37","unstructured":"Fang, X., Xu, W., Tan, F.A., Zhang, J., Hu, Z., Qi, Y., Nickleach, S., Socolinsky, D., Sengamedu, S., Faloutsos, C.: Large language models (llms) on tabular data: Prediction, generation, and understanding\u2013a survey. arXiv preprint arXiv:2402.17944 (2024)"},{"key":"998_CR38","unstructured":"Taniguchi, S., Harada, K., Minegishi, G., Oshima, Y., Jeong, S.C., Nagahara, G., Iiyama, T., Suzuki, M., Iwasawa, Y., Matsuo, Y.: Adopt: Modified adam can converge with any $${\\beta }_{2}$$ with the optimal rate. In: Globerson, A., Mackey, L., Belgrave, D., Fan, A., Paquet, U., Tomczak, J., Zhang, C. (eds.) Advances in Neural Information Processing Systems, pp. 72438\u201372474"},{"key":"998_CR39","doi-asserted-by":"publisher","unstructured":"Qiao, S., Ou, Y., Zhang, N., Chen, X., Yao, Y., Deng, S., Tan, C., Huang, F., Chen, H.: Reasoning with language model prompting: A survey. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 5368\u20135393. Association for Computational Linguistics, Toronto, Canada (2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.294. https:\/\/aclanthology.org\/2023.acl-long.294\/","DOI":"10.18653\/v1\/2023.acl-long.294"},{"key":"998_CR40","doi-asserted-by":"publisher","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: Bleu: a method for automatic evaluation of machine translation. In: Isabelle, P., Charniak, E., Lin, D. (eds.) Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318. Association for Computational Linguistics, Philadelphia, Pennsylvania, USA (2002). https:\/\/doi.org\/10.3115\/1073083.1073135. https:\/\/aclanthology.org\/P02-1040\/","DOI":"10.3115\/1073083.1073135"},{"key":"998_CR41","unstructured":"Banerjee, S., Lavie, A.: METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In: Goldstein, J., Lavie, A., Lin, C.-Y., Voss, C. (eds.) Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation And\/or Summarization, pp. 65\u201372. Association for Computational Linguistics, Ann Arbor, Michigan (2005). https:\/\/aclanthology.org\/W05-0909\/"},{"key":"998_CR42","unstructured":"Lin, C.-Y.: ROUGE: A package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381. Association for Computational Linguistics, Barcelona, Spain (2004). https:\/\/aclanthology.org\/W04-1013\/"},{"key":"998_CR43","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)"},{"key":"998_CR44","doi-asserted-by":"crossref","unstructured":"Saadany, H., Orasan, C.: BLEU, METEOR, BERTScore: Evaluation of metrics performance in assessing critical translation errors in sentiment-oriented text. In: Mitkov, R., Sosoni, V., Gigu\u00e8re, J.C., Murgolo, E., Deysel, E. (eds.) Proceedings of the Translation and Interpreting Technology Online Conference, pp. 48\u201356. INCOMA Ltd., Held Online (2021). https:\/\/aclanthology.org\/2021.triton-1.6\/","DOI":"10.26615\/978-954-452-071-7_006"},{"key":"998_CR45","unstructured":"Li, H., Dong, Q., Chen, J., Su, H., Zhou, Y., Ai, Q., Ye, Z., Liu, Y.: Llms-as-judges: a comprehensive survey on llm-based evaluation methods. arXiv preprint arXiv:2412.05579 (2024)"},{"key":"998_CR46","doi-asserted-by":"publisher","unstructured":"Li, J., Wang, J., Zhang, Z., Zhao, H.: Self-prompting large language models for zero-shot open-domain QA. In: Duh, K., Gomez, H., Bethard, S. (eds.) Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 296\u2013310. Association for Computational Linguistics, Mexico City, Mexico (2024). https:\/\/doi.org\/10.18653\/v1\/2024.naacl-long.17. https:\/\/aclanthology.org\/2024.naacl-long.17\/","DOI":"10.18653\/v1\/2024.naacl-long.17"}],"container-title":["International Journal of Data Science and Analytics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-025-00998-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41060-025-00998-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41060-025-00998-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:36:18Z","timestamp":1773930978000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41060-025-00998-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,19]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,12]]}},"alternative-id":["998"],"URL":"https:\/\/doi.org\/10.1007\/s41060-025-00998-3","relation":{},"ISSN":["2364-415X","2364-4168"],"issn-type":[{"value":"2364-415X","type":"print"},{"value":"2364-4168","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3,19]]},"assertion":[{"value":"26 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 March 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}],"article-number":"109"}}