{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T04:35:09Z","timestamp":1758083709738,"version":"3.44.0"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032046260"},{"type":"electronic","value":"9783032046277"}],"license":[{"start":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T00:00:00Z","timestamp":1757980800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T00:00:00Z","timestamp":1757980800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04627-7_34","type":"book-chapter","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T02:06:42Z","timestamp":1757988402000},"page":"587-604","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["EviFiVQA: A Benchmark for\u00a0Evidence-Grounded Multi-hop Reasoning in\u00a0Financial VQA"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-0385-7437","authenticated-orcid":false,"given":"Sachin","family":"Raja","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4808-8860","authenticated-orcid":false,"given":"Ajoy","family":"Mondal","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6767-7057","authenticated-orcid":false,"given":"C. V.","family":"Jawahar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,16]]},"reference":[{"key":"34_CR1","doi-asserted-by":"crossref","unstructured":"Ding, C.: Cyber security intelligence and analytics. In: The 5th International Conference on Cyber Security Intelligence and Analytics (CSIA 2023), vol. 1, pp. 160\u2013169 (2023)","DOI":"10.1007\/978-3-031-31860-3_17"},{"key":"34_CR2","doi-asserted-by":"crossref","unstructured":"Liu, R.: Applications of new technologies to recognition of financial statement fraud. In: 2020 International Conference on Computer Communication and Network Security (CCNS), pp. 22\u201325 (2020)","DOI":"10.1109\/CCNS50731.2020.00013"},{"key":"34_CR3","unstructured":"Financial Accounting, Reporting & Analysis (2017)"},{"key":"34_CR4","volume-title":"and C","author":"S Raja","year":"2020","unstructured":"Raja, S., Mondal, A.: and C, vol. Jawahar. Table structure recognition using top-down and bottom-up cues, In ECCV (2020)"},{"key":"34_CR5","doi-asserted-by":"crossref","unstructured":"Xing, H., et al.: LORE: Logical location regression network for table structure recognition. arXiv preprint arXiv:2303.03730 (2023)","DOI":"10.1609\/aaai.v37i3.25402"},{"key":"34_CR6","unstructured":"Wang, P., et\u00a0al.: Qwen2-VL: Enhancing vision-language model\u2019s perception of the world at any resolution. arXiv preprint arXiv:2409.12191 (2024)"},{"key":"34_CR7","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Adv. Neural Inf. Process. Syst. 36, 34892\u201334916 (2023)"},{"key":"34_CR8","unstructured":"Agrawal, P., et\u00a0al.: Pixtral 12B. arXiv preprint arXiv:2410.07073 (2024)"},{"key":"34_CR9","unstructured":"Lu, H., et\u00a0al.: DeepSeek-VL: towards real-world vision-language understanding. arXiv preprint arXiv:2403.05525 (2024)"},{"key":"34_CR10","unstructured":"Dubey, A., et\u00a0al.: The Llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)"},{"key":"34_CR11","doi-asserted-by":"crossref","unstructured":"Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (GTE): a framework for joint table identification and cell structure recognition using visual context. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 697\u2013706 (2021)","DOI":"10.1109\/WACV48630.2021.00074"},{"key":"34_CR12","doi-asserted-by":"crossref","unstructured":"Pasupat, P., Liang, P.: Compositional semantic parsing on semi-structured tables. arXiv preprint arXiv:1508.00305 (2015)","DOI":"10.3115\/v1\/P15-1142"},{"key":"34_CR13","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1162\/tacl_a_00276","volume":"7","author":"T Kwiatkowski","year":"2019","unstructured":"Kwiatkowski, T., et al.: Natural questions: a benchmark for question answering research. Trans. Assoc. Comput. Linguist. 7, 453\u2013466 (2019)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"34_CR14","doi-asserted-by":"crossref","unstructured":"Zhong, X., ShafieiBavani, E., Yepes, A.J.: Image-based table recognition: data, model, and evaluation. arXiv (2019)","DOI":"10.1007\/978-3-030-58589-1_34"},{"key":"34_CR15","unstructured":"Kim, Y., Yim, M., Song, K.Y.: A Visual Question Answering Benchmark on Multiple Table Domains. arXiv, TableVQA-Bench (2024)"},{"key":"34_CR16","doi-asserted-by":"crossref","unstructured":"Zhu, F., et al.: TAT-QA: a question answering benchmark on a hybrid of tabular and textual content in finance. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 3277\u20133287 (2021)","DOI":"10.18653\/v1\/2021.acl-long.254"},{"key":"34_CR17","doi-asserted-by":"crossref","unstructured":"Nan, L.: FeTaQA: free-form table question answering. Trans. Assoc. Comput. Linguist. 10, 35\u201349 (2022)","DOI":"10.1162\/tacl_a_00446"},{"key":"34_CR18","unstructured":"Xianjie, W., et al. TableBench: A Comprehensive and Complex Benchmark for Table Question Answering. arXiv (2024)"},{"key":"34_CR19","doi-asserted-by":"crossref","unstructured":"Raja, S., Mondal, A., Jawahar, C.V.: ICDAR 2023 competition on visual question answering on business document images. In: International Conference on Document Analysis and Recognition, pp. 454\u2013470. Springer (2023)","DOI":"10.1007\/978-3-031-41679-8_26"},{"key":"34_CR20","unstructured":"Qiu, Z., Peng, Y., He, G., Yuan, B., Wang, C.: TQA-Bench: Evaluating LLMs for Multi-Table Question Answering with Scalable Context and Symbolic Extension. arXiv (2024)"},{"key":"34_CR21","doi-asserted-by":"crossref","unstructured":"Chen, Z., et\u00a0al.: FinQA: A dataset of numerical reasoning over financial data. arXiv preprint arXiv:2109.00122 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.300"},{"key":"34_CR22","doi-asserted-by":"crossref","unstructured":"Herzig, J., et al.: TAPAS: Weakly supervised table parsing via pre-training. arXiv preprint arXiv:2004.02349 (2020)","DOI":"10.18653\/v1\/2020.acl-main.398"},{"key":"34_CR23","doi-asserted-by":"crossref","unstructured":"Eisenschlos, J.M., Krichene, S., M\u00fcller, T.: Understanding tables with intermediate pre-training. arXiv preprint arXiv:2010.00571 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.27"},{"key":"34_CR24","doi-asserted-by":"crossref","unstructured":"Smock, B., Pesala, R., Abraham, R.: PubTables-1M: towards comprehensive table extraction from unstructured documents. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4634\u20134642 (2022)","DOI":"10.1109\/CVPR52688.2022.00459"},{"key":"34_CR25","doi-asserted-by":"crossref","unstructured":"Prasad, D., Gadpal, A., Kapadni, K., Visave, M., Sultanpure, K.: CascadeTabNet: an approach for end to end table detection and structure recognition from image-based documents. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern recognition workshops, pp. 572\u2013573 (2020)","DOI":"10.1109\/CVPRW50498.2020.00294"},{"key":"34_CR26","unstructured":"Nguyen, P., Ly, N.T., Takeda, H., Takasu, A.: Table Questions Answering on Business Document Images. arXiv, TabIQA (2023)"},{"key":"34_CR27","doi-asserted-by":"crossref","unstructured":"Xue, W., et al.: A question answering system for unstructured table images. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 2783\u20132785 (2021)","DOI":"10.1145\/3474085.3478558"},{"key":"34_CR28","doi-asserted-by":"crossref","unstructured":"Zeng, J., et al.: M-TBQA: multimodal table-based question answering. In: 2023 4th International Conference on Machine Learning and Computer Application, pp. 227\u2013231 (2023)","DOI":"10.1145\/3650215.3650255"},{"key":"34_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, T., Liu, Y., Wu, J., Liao, J.: Answering questions over tables based on TAPAS and graph attention model. In: Proceedings of the 2022 5th International Conference on Machine Learning and Natural Language Processing, pp. 8\u201313 (2022)","DOI":"10.1145\/3578741.3578744"},{"key":"34_CR30","unstructured":"Chen, P., et al.: Hypergraph-enhanced Tabular Data Representation Learning. arXiv, HYTREL (2023)"},{"key":"34_CR31","unstructured":"Liu, Q., et al.: Table Pre-training via Learning a Neural SQL Executor. arXiv, TAPEX (2021)"},{"key":"34_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: ReAcTable: enhancing ReAct for table question answering. Proc. VLDB Endowment 17(8), 1981\u20131994 (2024)","DOI":"10.14778\/3659437.3659452"},{"issue":"12","key":"34_CR33","doi-asserted-by":"publisher","first-page":"3920","DOI":"10.14778\/3685800.3685816","volume":"17","author":"J-P Zhu","year":"2024","unstructured":"Zhu, J.-P., et al.: AutoTQA: towards autonomous tabular question answering through multi-agent large language models. Proc. VLDB Endowment 17(12), 3920\u20133933 (2024)","journal-title":"Proc. VLDB Endowment"},{"key":"34_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, T., Yue, X., Li, Y., Sun, H.: TableLlama: towards open large generalist models for tables. In: Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pp. 6024\u20136044 (2024)","DOI":"10.18653\/v1\/2024.naacl-long.335"},{"key":"34_CR35","doi-asserted-by":"crossref","unstructured":"Jiang, Z., Mao, Y., He, P., Neubig, G., Chen, W.: OmniTab: pretraining with natural and synthetic data for few-shot table-based question answering. In: Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 932\u2013942 (2022)","DOI":"10.18653\/v1\/2022.naacl-main.68"},{"key":"34_CR36","unstructured":"Kang, D., Jung, B., Kim, Y., Lee, G.G.: Denoising Table-Text Retrieval for Open-Domain Question Answering. arXiv (2024)"},{"key":"34_CR37","unstructured":"Roychowdhury, S., Soman, S., Ranjani, H.G., Sharma, A., Gunda, N., Bala, S.K.: Evaluation of Table Representations to Answer Questions from Tables in Documents: A Case Study using 3GPP Specifications. arXiv (2024)"},{"key":"34_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, W., Mesgar, M., Adel, H., Friedrich, A.: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. arXiv, FREB-TQA (2024)","DOI":"10.18653\/v1\/2024.naacl-long.137"},{"key":"34_CR39","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: Sentence embeddings using Siamese BERT-networks. arXiv preprint arXiv:1908.10084 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"34_CR40","doi-asserted-by":"crossref","unstructured":"Biten, A.F., et al.: ICDAR 2019 competition on scene text visual question answering. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1563\u20131570. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00251"},{"key":"34_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"635","DOI":"10.1007\/978-3-030-86337-1_42","volume-title":"Document Analysis and Recognition - ICDAR 2021","author":"R Tito","year":"2021","unstructured":"Tito, R., Mathew, M., Jawahar, C.V., Valveny, E., Karatzas, D.: ICDAR 2021 competition on document visual question answering. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12824, pp. 635\u2013649. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86337-1_42"},{"key":"34_CR42","unstructured":"Hannun, A., Digani, J., Katharopoulos, A., Collobert, R.: Efficient and flexible machine learning on apple silicon, MLX (2023)"},{"key":"34_CR43","unstructured":"Canuma, P.: MLX-VLM: MLX-VLM is a package for inference and fine-tuning of vision language models (VLMs) on Mac using MLX (2024)"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04627-7_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T02:07:02Z","timestamp":1757988422000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04627-7_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,16]]},"ISBN":["9783032046260","9783032046277"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04627-7_34","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,16]]},"assertion":[{"value":"16 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}