{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,8]],"date-time":"2026-06-08T15:00:35Z","timestamp":1780930835552,"version":"3.54.1"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031746420","type":"print"},{"value":"9783031746437","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-74643-7_23","type":"book-chapter","created":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T23:20:00Z","timestamp":1735687200000},"page":"307-322","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["ViBERTgrid BiLSTM-CRF: Multimodal Key Information Extraction from\u00a0Unstructured Financial Documents"],"prefix":"10.1007","author":[{"given":"Furkan","family":"Pala","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mehmet Yasin","family":"Akp\u0131nar","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Onur","family":"Deniz","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"G\u00fcl\u015fen","family":"Eryi\u011fit","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,1,1]]},"reference":[{"key":"23_CR1","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol.\u00a033, pp. 1877\u20131901. Curran Associates, Inc. (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"23_CR2","unstructured":"Calusha, N.T.P.: Icdar 2019 robust reading challenge on scanned receipts ocr and information extraction (2021). https:\/\/github.com\/BlackStar1313\/ICDAR-2019-RRC-SROIE"},{"key":"23_CR3","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. In: NIPS 2014 Workshop on Deep Learning, December 2014 (2014)"},{"key":"23_CR4","doi-asserted-by":"publisher","unstructured":"Cristani, M., Bertolaso, A., Scannapieco, S., Tomazzoli, C.: Future paradigms of automated processing of business documents. Int. J. Inf. Manag. 40, 67\u201375 (2018). https:\/\/doi.org\/10.1016\/j.ijinfomgt.2018.01.010. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0268401217309994","DOI":"10.1016\/j.ijinfomgt.2018.01.010"},{"key":"23_CR5","unstructured":"Delplace, A.: Chargrid model: extraction of meaningful instances from document images (2020). https:\/\/github.com\/antoinedelplace\/Chargrid"},{"key":"23_CR6","unstructured":"Denk, T.I., Reisswig, C.: Bertgrid: contextualized embedding for 2d document representation and understanding. arXiv preprint arXiv:1909.04948 (2019)"},{"key":"23_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"23_CR8","doi-asserted-by":"publisher","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 2980\u20132988 (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.322","DOI":"10.1109\/ICCV.2017.322"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"23_CR10","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9, 1735\u20131780 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Comput."},{"key":"23_CR11","doi-asserted-by":"publisher","unstructured":"Huang, Z., et al.: Icdar2019 competition on scanned receipt ocr and information extraction. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1516\u20131520 (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00244","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: Icdar2019 competition on scanned receipt ocr and information extraction. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 1516\u20131520. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"23_CR13","unstructured":"Huang, Z., Xu, W., Yu, K.: Bidirectional lstm-crf models for sequence tagging. arXiv preprint arXiv:1508.01991 (2015)"},{"key":"23_CR14","doi-asserted-by":"publisher","unstructured":"Jaume, G., Kemal\u00a0Ekenel, H., Thiran, J.P.: Funsd: a dataset for form understanding in noisy scanned documents. In: 2019 International Conference on Document Analysis and Recognition Workshops (ICDARW), vol.\u00a02, pp.\u00a01\u20136 (2019). https:\/\/doi.org\/10.1109\/ICDARW.2019.10029","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Jia, R., Wong, C., Poon, H.: Document-level n-ary relation extraction with multiscale representation learning. In: Proceedings of NAACL-HLT, pp. 3693\u20133704 (2019)","DOI":"10.18653\/v1\/N19-1370"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Katti, A.R., et al.: Chargrid: towards understanding 2d documents. arXiv preprint arXiv:1809.08799 (2018)","DOI":"10.18653\/v1\/D18-1476"},{"key":"23_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1007\/978-3-030-86159-9_28","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021 Workshops","author":"M Kerroumi","year":"2021","unstructured":"Kerroumi, M., Sayem, O., Shabou, A.: VisualWordGrid: information extraction from scanned documents using a multimodal approach. In: Barney Smith, E.H., Pal, U. (eds.) ICDAR 2021. LNCS, vol. 12917, pp. 389\u2013402. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86159-9_28"},{"key":"23_CR18","unstructured":"Lafferty, J.D., McCallum, A., Pereira, F.: Conditional random fields: probabilistic models for segmenting and labeling sequence data. In: International Conference on Machine Learning (2001)"},{"key":"23_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"548","DOI":"10.1007\/978-3-030-86549-8_35","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021","author":"W Lin","year":"2021","unstructured":"Lin, W., et al.: ViBERTgrid: a jointly trained multi-modal 2D document representation for key information extraction from documents. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12821, pp. 548\u2013563. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86549-8_35"},{"key":"23_CR20","unstructured":"Lin, Z.: Vibertgrid pytorch (2021). https:\/\/github.com\/ZeningLin\/ViBERTgrid-PyTorch"},{"key":"23_CR21","first-page":"1","volume":"2019","author":"B Oral","year":"2019","unstructured":"Oral, B., Emekligil, E., Arslan, S., Eryigit, G.: Extracting complex relations from banking documents. EMNLP-IJCNLP 2019, 1 (2019)","journal-title":"EMNLP-IJCNLP"},{"key":"23_CR22","doi-asserted-by":"crossref","unstructured":"Oral, B., Emekligil, E., Arslan, S., Eryi\u01e7it, G.: Information extraction from text intensive and visually rich banking documents. Inf. Process. Manag. 57(6), 102361 (2020)","DOI":"10.1016\/j.ipm.2020.102361"},{"key":"23_CR23","doi-asserted-by":"crossref","unstructured":"Oral, B., Eryi\u011fit, G.: Fusion of visual representations for multimodal information extraction from unstructured transactional documents. Int. J. Doc. Anal. Recogn. (IJDAR) 1\u201319 (2022)","DOI":"10.1007\/s10032-022-00399-3"},{"key":"23_CR24","unstructured":"Park, S., et al.: Cord: a consolidated receipt dataset for post-ocr parsing (2019)"},{"key":"23_CR25","unstructured":"Paszke, A., Chaurasia, A., Kim, S., Culurciello, E.: Enet: a deep neural network architecture for real-time semantic segmentation. arXiv preprint arXiv:1606.02147 (2016)"},{"key":"23_CR26","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1162\/tacl_a_00049","volume":"5","author":"N Peng","year":"2017","unstructured":"Peng, N., Poon, H., Quirk, C., Toutanova, K., Yih, W.T.: Cross-sentence n-ary relation extraction with graph lstms. Trans. Assoc. Comput. Linguist. 5, 101\u2013115 (2017)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"23_CR27","doi-asserted-by":"publisher","unstructured":"Peters, M.E., et al.: Deep contextualized word representations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, vol. 1 (Long Papers), pp. 2227\u20132237. Association for Computational Linguistics, New Orleans (2018). https:\/\/doi.org\/10.18653\/v1\/N18-1202. https:\/\/aclanthology.org\/N18-1202","DOI":"10.18653\/v1\/N18-1202"},{"key":"23_CR28","doi-asserted-by":"crossref","unstructured":"Riba, P., Dutta, A., Goldmann, L., Forn\u00e9s, A., Ramos, O., Llad\u00f3s, J.: Table detection in invoice documents by graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 122\u2013127. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00028"},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning internal representations by error propagation (1986)","DOI":"10.21236\/ADA164453"},{"key":"23_CR30","doi-asserted-by":"crossref","unstructured":"\u015eahin, G.G., Emekligil, E., Arslan, S., A\u011f\u0131n, O., Eryi\u011fit, G.: Relation extraction via one-shot dependency parsing on intersentential, higher-order, and nested relations. Turk. J. Electr. Eng. Comput. Sci. 26(2), 830\u2013843 (2018)","DOI":"10.3906\/elk-1703-108"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"Settles, B.: Biomedical named entity recognition using conditional random fields and rich feature sets. In: Proceedings of the International Joint Workshop on Natural Language Processing in Biomedicine and its Applications (NLPBA\/BioNLP), pp. 107\u2013110. COLING, Geneva (2004). https:\/\/aclanthology.org\/W04-1221","DOI":"10.3115\/1567594.1567618"},{"key":"23_CR32","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15(56), 1929\u20131958 (2014). http:\/\/jmlr.org\/papers\/v15\/srivastava14a.html"},{"key":"23_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"564","DOI":"10.1007\/978-3-030-86549-8_36","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021","author":"T Stanis\u0142awek","year":"2021","unstructured":"Stanis\u0142awek, T., et al.: Kleister: key information extraction datasets involving long documents with complex layouts. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12821, pp. 564\u2013579. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86549-8_36"},{"issue":"2","key":"23_CR34","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1109\/TIT.1967.1054010","volume":"13","author":"A Viterbi","year":"1967","unstructured":"Viterbi, A.: Error bounds for convolutional codes and an asymptotically optimum decoding algorithm. IEEE Trans. Inf. Theory 13(2), 260\u2013269 (1967). https:\/\/doi.org\/10.1109\/TIT.1967.1054010","journal-title":"IEEE Trans. Inf. Theory"},{"key":"23_CR35","unstructured":"Wyner, A., Casini, G.: A deep learning approach to contract element extraction. Legal Knowl. Inf. Syst., 155 (2017)"},{"key":"23_CR36","doi-asserted-by":"crossref","unstructured":"Yu, W., Lu, N., Qi, X., Gong, P., Xiao, R.: Pick: processing key information extraction from documents using improved graph learning-convolutional networks. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 4363\u20134370. IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9412927"}],"container-title":["Communications in Computer and Information Science","Machine Learning and Principles and Practice of Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-74643-7_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T02:36:00Z","timestamp":1735698960000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-74643-7_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031746420","9783031746437"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-74643-7_23","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"In conducting this research and analyzing the in-house financial and banking data, we are committed to upholding ethical standards and ensuring the privacy and confidentiality of our customers\u2019 information. First of all, customers give open consent by sending their documents to the bank. In an everyday scenario, customers willingly send various documents such as invoices, transactions, and other relevant paperwork to the bank for the purpose of processing these documents. Staff and employees of the bank are well-trained in data privacy and protection issues. It is strictly forbidden to remove data from the secure banking network. The objective of this project is not to create recommendation systems or similar tools to alter or manipulate customer preferences. We respect the autonomy and individual choices of our customers and aim to eliminate the manual human effort of data entry as much as possible by automating the information extraction process. This will result in a significant speed-up in the services provided to customers.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}