{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T03:06:47Z","timestamp":1771297607512,"version":"3.50.1"},"publisher-location":"Cham","reference-count":91,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031416781","type":"print"},{"value":"9783031416798","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-41679-8_9","type":"book-chapter","created":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T07:02:59Z","timestamp":1692342179000},"page":"147-166","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["DocILE Benchmark for\u00a0Document Information Localization and\u00a0Extraction"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6687-1210","authenticated-orcid":false,"given":"\u0160t\u011bp\u00e1n","family":"\u0160imsa","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6321-0131","authenticated-orcid":false,"given":"Milan","family":"\u0160ulc","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2606-4470","authenticated-orcid":false,"given":"Michal","family":"U\u0159i\u010d\u00e1\u0159","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9373-529X","authenticated-orcid":false,"given":"Yash","family":"Patel","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8964-2135","authenticated-orcid":false,"given":"Ahmed","family":"Hamdi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0124-9348","authenticated-orcid":false,"given":"Mat\u011bj","family":"Koci\u00e1n","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0197-7134","authenticated-orcid":false,"given":"Maty\u00e1\u0161","family":"Skalick\u00fd","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0863-4844","authenticated-orcid":false,"given":"Ji\u0159\u00ed","family":"Matas","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6160-3356","authenticated-orcid":false,"given":"Antoine","family":"Doucet","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0123-439X","authenticated-orcid":false,"given":"Micka\u00ebl","family":"Coustaty","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8762-4454","authenticated-orcid":false,"given":"Dimosthenis","family":"Karatzas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,19]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Appalaraju, S., Jasani, B., Kota, B.U., Xie, Y., Manmatha, R.: Docformer: end-to-end transformer for document understanding. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00103"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Baek, Y., et al.: Cleval: character-level evaluation for text detection and recognition tasks. In: CVPR workshops (2020)","DOI":"10.1109\/CVPRW50498.2020.00290"},{"key":"9_CR3","unstructured":"Bensch, O., Popa, M., Spille, C.: Key information extraction from documents: evaluation and generator. In: Abb\u00e8s, S.B., et al. (eds.) Proceedings of DeepOntoNLP and X-SENTIMENT (2021)"},{"key":"9_CR4","doi-asserted-by":"publisher","unstructured":"Biten, A.F., Tito, R., Gomez, L., Valveny, E., Karatzas, D.: OCR-IDL: OCR annotations for industry document library dataset. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) Computer Vision \u2013 ECCV 2022 Workshops. ECCV 2022. LNCS, vol. 13804, pp. 241\u2013252. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-25069-9_16","DOI":"10.1007\/978-3-031-25069-9_16"},{"key":"9_CR5","unstructured":"Borchmann, \u0141., et al.: DUE: end-to-end document understanding benchmark. In: NeurIPS (2021)"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Bu\u0161ta, M., Patel, Y., Matas, J.: E2E-MLT - an unconstrained end-to-end method for multi-language scene text. In: ACCV workshops (2019)","DOI":"10.1007\/978-3-030-21074-8_11"},{"key":"9_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Davis, B., Morse, B., Cohen, S., Price, B., Tensmeyer, C.: Deep visual template-free form parsing. In: ICDAR (2019)","DOI":"10.1109\/ICDAR.2019.00030"},{"key":"9_CR9","unstructured":"Denk, T.I., Reisswig, C.: BERTgrid: contextualized embedding for 2d document representation and understanding. arXiv (2019)"},{"key":"9_CR10","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv (2018)"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Dhakal, P., Munikar, M., Dahal, B.: One-shot template matching for automatic document data capture. In: Artificial Intelligence for Transforming Business and Society (AITB) (2019)","DOI":"10.1109\/AITB48515.2019.8947440"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., et al.: Flownet: learning optical flow with convolutional networks. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.316"},{"key":"9_CR13","unstructured":"Du, Y., et al.: PP-OCR: a practical ultra lightweight OCR system. arXiv (2020)"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Fang, J., Tao, X., Tang, Z., Qiu, R., Liu, Y.: Dataset, ground-truth and performance metrics for table detection evaluation. In: Blumenstein, M., Pal, U., Uchida, S. (eds.) DAS (2012)","DOI":"10.1109\/DAS.2012.29"},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Garncarek, \u0141., et al.: Lambert: layout-aware language modeling for information extraction. In: ICDAR (2021)","DOI":"10.1007\/978-3-030-86549-8_34"},{"key":"9_CR16","unstructured":"Geimfari, L.: Mimesis: the fake data generator (2022). http:\/\/github.com\/lk-geimfari\/mimesis"},{"key":"9_CR17","unstructured":"Gu, J., et al.: Unidoc: Unified pretraining framework for document understanding. In: NeurIPS (2021)"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Gu, Z., et al.: XYLayoutLM: towards layout-aware multimodal networks for visually-rich document understanding. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00454"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Gupta, A., Vedaldi, A., Zisserman, A.: Synthetic data for text localisation in natural images. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.254"},{"key":"9_CR20","doi-asserted-by":"publisher","first-page":"244","DOI":"10.18100\/ijamec.270374","volume":"2016","author":"KA Hamad","year":"2016","unstructured":"Hamad, K.A., Mehmet, K.: A detailed analysis of optical character recognition technology. Int. J. Appl. Math. Electron. Comput. 2016, 244\u2013249 (2016)","journal-title":"Int. J. Appl. Math. Electron. Comput."},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Carel, E., Joseph, A., Coustaty, M., Doucet, A.: Information extraction from invoices. In: ICDAR (2021)","DOI":"10.1007\/978-3-030-86331-9_45"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Hammami, M., H\u00e9roux, P., Adam, S., d\u2019Andecy, V.P.: One-shot field spotting on colored forms using subgraph isomorphism. In: ICDAR (2015)","DOI":"10.1109\/ICDAR.2015.7333829"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Harley, A.W., Ufkes, A., Derpanis, K.G.: Evaluation of deep convolutional nets for document image classification and retrieval. In: ICDAR (2015)","DOI":"10.1109\/ICDAR.2015.7333910"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Herzig, J., Nowak, P.K., M\u00fcller, T., Piccinno, F., Eisenschlos, J.M.: Tapas: weakly supervised table parsing via pre-training. arXiv (2020)","DOI":"10.18653\/v1\/2020.acl-main.398"},{"key":"9_CR25","doi-asserted-by":"crossref","unstructured":"Hole\u010dek, M., Hoskovec, A., Baudi\u0161, P., Klinger, P.: Table understanding in structured documents. In: ICDAR Workshops (2019)","DOI":"10.1109\/ICDARW.2019.40098"},{"key":"9_CR26","unstructured":"Holt, X., Chisholm, A.: Extracting structured data from invoices. In: Proceedings of the Australasian Language Technology Association Workshop 2018, pp. 53\u201359 (2018)"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Hong, T., Kim, D., Ji, M., Hwang, W., Nam, D., Park, S.: Bros: a pre-trained language model focusing on text and layout for better key information extraction from documents. In: AAAI (2022)","DOI":"10.1609\/aaai.v36i10.21322"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: Layoutlmv3: pre-training for document AI with unified text and image masking. In: ACM-MM (2022)","DOI":"10.1145\/3503161.3548112"},{"key":"9_CR29","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: ICDAR2019 competition on scanned receipt OCR and information extraction. In: ICDAR (2019)","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"9_CR30","doi-asserted-by":"crossref","unstructured":"Hwang, W., Yim, J., Park, S., Yang, S., Seo, M.: Spatial dependency parsing for semi-structured document information extraction. arXiv (2020)","DOI":"10.18653\/v1\/2021.findings-acl.28"},{"key":"9_CR31","unstructured":"Islam, N., Islam, Z., Noor, N.: A survey on optical character recognition system. arXiv (2017)"},{"key":"9_CR32","doi-asserted-by":"crossref","unstructured":"Jaume, G., Ekenel, H.K., Thiran, J.P.: FUNSD: a dataset for form understanding in noisy scanned documents. In: ICDAR (2019)","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Katti, A.R., et al.: Chargrid: towards understanding 2d documents. In: EMNLP (2018)","DOI":"10.18653\/v1\/D18-1476"},{"key":"9_CR34","doi-asserted-by":"crossref","unstructured":"Kil, J., Chao, W.L.: Revisiting document representations for large-scale zero-shot learning. arXiv (2021)","DOI":"10.18653\/v1\/2021.naacl-main.250"},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Krieger, F., Drews, P., Funk, B., Wobbe, T.: Information extraction from invoices: a graph neural network approach for datasets with high layout variety. In: Innovation Through Information Systems: Volume II: A Collection of Latest Research on Technology Issues (2021)","DOI":"10.1007\/978-3-030-86797-3_1"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Lee, C.Y., et al.: FormNet: structural encoding beyond sequential modeling in form document information extraction. In: ACL (2022)","DOI":"10.18653\/v1\/2022.acl-long.260"},{"key":"9_CR37","doi-asserted-by":"crossref","unstructured":"Lewis, D., Agam, G., Argamon, S., Frieder, O., Grossman, D., Heard, J.: Building a test collection for complex document information processing. In: SIGIR (2006)","DOI":"10.1145\/1148170.1148307"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Li, C., et al.: StructuralLM: structural pre-training for form understanding. In: ACL (2021)","DOI":"10.18653\/v1\/2021.acl-long.493"},{"key":"9_CR39","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/TKDE.2020.2981314","volume":"34","author":"J Li","year":"2020","unstructured":"Li, J., Sun, A., Han, J., Li, C.: A survey on deep learning for named entity recognition. IEEE Trans. Knowl. Data Eng. 34, 50\u201370 (2020)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"9_CR40","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Structext: structured text understanding with multi-modal transformers. In: ACM-MM (2021)","DOI":"10.1145\/3474085.3475345"},{"key":"9_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"9_CR42","doi-asserted-by":"crossref","unstructured":"Lin, W., et al.: VibertGrid: a jointly trained multi-modal 2d document representation for key information extraction from documents. In: ICDAR (2021)","DOI":"10.1007\/978-3-030-86549-8_35"},{"key":"9_CR43","unstructured":"Liu, W., Zhang, Y., Wan, B.: Unstructured document recognition on business invoice. Technical report (2016)"},{"key":"9_CR44","unstructured":"Liu, Y., et al.: RoBERTa: A Robustly Optimized BERT Pretraining Approach. arXiv (2019)"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Lohani, D., Bela\u00efd, A., Bela\u00efd, Y.: An invoice reading system using a graph convolutional network. In: ACCV workshops (2018)","DOI":"10.1007\/978-3-030-21074-8_12"},{"key":"9_CR46","doi-asserted-by":"crossref","unstructured":"Majumder, B.P., Potti, N., Tata, S., Wendt, J.B., Zhao, Q., Najork, M.: Representation learning for information extraction from form-like documents. In: ACL (2020)","DOI":"10.18653\/v1\/2020.acl-main.580"},{"key":"9_CR47","doi-asserted-by":"crossref","unstructured":"Mathew, M., Bagal, V., Tito, R., Karatzas, D., Valveny, E., Jawahar, C.: InfographicVQA. In: WACV (2022)","DOI":"10.1109\/WACV51458.2022.00264"},{"key":"9_CR48","doi-asserted-by":"crossref","unstructured":"Mathew, M., Karatzas, D., Jawahar, C.: DocVQA: a dataset for VQA on document images. In: WACV (2021)","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"9_CR49","doi-asserted-by":"crossref","unstructured":"Medvet, E., Bartoli, A., Davanzo, G.: A probabilistic approach to printed document understanding. In: ICDAR (2011)","DOI":"10.1007\/s10032-010-0137-1"},{"key":"9_CR50","doi-asserted-by":"publisher","first-page":"142642","DOI":"10.1109\/ACCESS.2020.3012542","volume":"8","author":"J Memon","year":"2020","unstructured":"Memon, J., Sami, M., Khan, R.A., Uddin, M.: Handwritten optical character recognition (OCR): a comprehensive systematic literature review (SLR). IEEE Access. 8, 142642\u2013142668 (2020)","journal-title":"IEEE Access."},{"key":"9_CR51","unstructured":"Mindee: docTR: Document text recognition (2021). http:\/\/github.com\/mindee\/doctr"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Nassar, A., Livathinos, N., Lysak, M., Staar, P.W.J.: TableFormer: table structure understanding with transformers. arXiv (2022)","DOI":"10.1109\/CVPR52688.2022.00457"},{"key":"9_CR53","doi-asserted-by":"crossref","unstructured":"Nayef, N., et al.: ICDAR 2019 robust reading challenge on multi-lingual scene text detection and recognition-RRC-MLT-2019. In: ICDAR (2019)","DOI":"10.1109\/ICDAR.2019.00254"},{"key":"9_CR54","unstructured":"Olejniczak, K., \u0160ulc, M.: Text detection forgot about document OCR. In: CVWW (2023)"},{"key":"9_CR55","doi-asserted-by":"crossref","unstructured":"Palm, R.B., Laws, F., Winther, O.: Attend, copy, parse end-to-end information extraction from documents. In: ICDAR (2019)","DOI":"10.1109\/ICDAR.2019.00060"},{"key":"9_CR56","doi-asserted-by":"crossref","unstructured":"Palm, R.B., Winther, O., Laws, F.: CloudScan - a configuration-free invoice analysis system using recurrent neural networks. In: ICDAR (2017)","DOI":"10.1109\/ICDAR.2017.74"},{"key":"9_CR57","unstructured":"Pampari, A., Ermon, S.: Unsupervised calibration under covariate shift. arXiv (2020)"},{"key":"9_CR58","unstructured":"Park, S., et al.: Cord: a consolidated receipt dataset for post-OCR parsing. In: NeurIPS Workshops (2019)"},{"key":"9_CR59","doi-asserted-by":"crossref","unstructured":"Powalski, R., Borchmann, \u0141., Jurkiewicz, D., Dwojak, T., Pietruszka, M., Pa\u0142ka, G.: Going full-tilt boogie on document understanding with text-image-layout transformer. In: ICDAR (2021)","DOI":"10.1007\/978-3-030-86331-9_47"},{"key":"9_CR60","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. JMLR. 21, 5485\u20135551 (2020)","journal-title":"JMLR."},{"key":"9_CR61","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NeurIPS (2015)"},{"key":"9_CR62","doi-asserted-by":"crossref","unstructured":"Riba, P., Dutta, A., Goldmann, L., Forn\u00e9s, A., Ramos, O., Llad\u00f3s, J.: Table detection in invoice documents by graph neural networks. In: ICDAR (2019)","DOI":"10.1109\/ICDAR.2019.00028"},{"key":"9_CR63","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. IJCV. 115, 211\u2013252 (2015)","journal-title":"IJCV."},{"key":"9_CR64","doi-asserted-by":"crossref","unstructured":"Schreiber, S., Agne, S., Wolf, I., Dengel, A., Ahmed, S.: DeepDeSRT: deep learning for detection and structure recognition of tables in document images. In: ICDAR (2017)","DOI":"10.1109\/ICDAR.2017.192"},{"key":"9_CR65","doi-asserted-by":"crossref","unstructured":"Schuster, D., et al.: Intellix-end-user trained information extraction for document archiving. In: ICDAR (2013)","DOI":"10.1109\/ICDAR.2013.28"},{"key":"9_CR66","doi-asserted-by":"crossref","unstructured":"Siegel, N., Lourie, N., Power, R., Ammar, W.: Extracting scientific figures with distantly supervised neural networks. In: Chen, J., Gon\u00e7alves, M.A., Allen, J.M., Fox, E.A., Kan, M., Petras, V. (eds.) Proceedings of the 18th ACM\/IEEE on Joint Conference on Digital Libraries, JCDL (2018)","DOI":"10.1145\/3197026.3197040"},{"key":"9_CR67","doi-asserted-by":"crossref","unstructured":"\u0160imsa, \u0160., \u0160ulc, M., Skalick\u1ef3, M., Patel, Y., Hamdi, A.: Docile 2023 teaser: document information localization and extraction. In: ECIR (2023)","DOI":"10.1007\/978-3-031-28241-6_69"},{"key":"9_CR68","doi-asserted-by":"crossref","unstructured":"\u0160ipka, T., \u0160ulc, M., Matas, J.: The hitchhiker\u2019s guide to prior-shift adaptation. In: WACV (2022)","DOI":"10.1109\/WACV51458.2022.00209"},{"key":"9_CR69","doi-asserted-by":"crossref","unstructured":"Skalick\u00fd, M., \u0160imsa, \u0160., U\u0159i\u010d\u00e1\u0159, M., \u0160ulc, M.: Business document information extraction: Towards practical benchmarks. In: CLEF (2022)","DOI":"10.1007\/978-3-031-13643-6_8"},{"key":"9_CR70","doi-asserted-by":"crossref","unstructured":"Smith, R.: An overview of the tesseract OCR engine. In: ICDAR (2007)","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"9_CR71","doi-asserted-by":"crossref","unstructured":"Smock, B., Pesala, R., Abraham, R.: PubTables-1M: towards comprehensive table extraction from unstructured documents. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00459"},{"key":"9_CR72","doi-asserted-by":"crossref","unstructured":"Stanis\u0142awek, T., et al.: Kleister: key information extraction datasets involving long documents with complex layouts. In: ICDAR (2021)","DOI":"10.1007\/978-3-030-86549-8_36"},{"key":"9_CR73","unstructured":"Stray, J., Svetlichnaya, S.: DeepForm: extract information from documents (2020). http:\/\/wandb.ai\/deepform\/political-ad-extraction, benchmark"},{"key":"9_CR74","unstructured":"Sun, H., Kuang, Z., Yue, X., Lin, C., Zhang, W.: Spatial dual-modality graph reasoning for key information extraction. arXiv (2021)"},{"key":"9_CR75","unstructured":"Sunder, V., Srinivasan, A., Vig, L., Shroff, G., Rahul, R.: One-shot information extraction from document images using neuro-deductive program synthesis. arXiv (2019)"},{"key":"9_CR76","doi-asserted-by":"crossref","unstructured":"Tanaka, R., Nishida, K., Yoshida, S.: VisualMRC: machine reading comprehension on document images. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i15.17635"},{"key":"9_CR77","doi-asserted-by":"crossref","unstructured":"Tang, Z., et al.: Unifying vision, text, and layout for universal document processing. arXiv (2022)","DOI":"10.1109\/CVPR52729.2023.01845"},{"key":"9_CR78","doi-asserted-by":"crossref","unstructured":"Tensmeyer, C., Morariu, V.I., Price, B., Cohen, S., Martinez, T.: Deep splitting and merging for table structure decomposition. In: ICDAR (2019)","DOI":"10.1109\/ICDAR.2019.00027"},{"key":"9_CR79","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: Towards robust visual information extraction in real world: new dataset and novel solution. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i4.16378"},{"key":"9_CR80","unstructured":"Web: Industry Documents Library. www.industrydocuments.ucsf.edu\/. Accessed 20 Oct 2022"},{"key":"9_CR81","unstructured":"Web: Industry Documents Library API. www.industrydocuments.ucsf.edu\/research-tools\/api\/. Accessed 20 Oct 2022"},{"key":"9_CR82","unstructured":"Web: Public Inspection Files. http:\/\/publicfiles.fcc.gov\/. Accessed 20 Oct 2022"},{"key":"9_CR83","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Layoutlmv2: Multi-modal pre-training for visually-rich document understanding. In: ACL (2021)","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"9_CR84","doi-asserted-by":"crossref","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: LayoutLM: pre-training of text and layout for document image understanding. In: KDD (2020)","DOI":"10.1145\/3394486.3403172"},{"key":"9_CR85","unstructured":"Xu, Y., et al.: LayoutXLM: multimodal pre-training for multilingual visually-rich document understanding. arXiv (2021)"},{"key":"9_CR86","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Ma, J., Du, J., Wang, L., Zhang, J.: Multimodal pre-training based on graph attention network for document understanding. IEEE Trans. Multimed. (2022)","DOI":"10.1109\/TMM.2022.3214102"},{"key":"9_CR87","unstructured":"Zhao, X., Wu, Z., Wang, X.: CUTIE: learning to understand documents with convolutional universal text information extractor. arXiv (2019)"},{"key":"9_CR88","doi-asserted-by":"crossref","unstructured":"Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (GTE): a framework for joint table identification and cell structure recognition using visual context. In: WACV (2021)","DOI":"10.1109\/WACV48630.2021.00074"},{"key":"9_CR89","doi-asserted-by":"crossref","unstructured":"Zhong, X., Tang, J., Jimeno-Yepes, A.: PublayNet: largest dataset ever for document layout analysis. In: ICDAR (2019)","DOI":"10.1109\/ICDAR.2019.00166"},{"key":"9_CR90","doi-asserted-by":"crossref","unstructured":"Zhou, J., Yu, H., Xie, C., Cai, H., Jiang, L.: IRMP: from printed forms to relational data model. In: HPCC (2016)","DOI":"10.1109\/HPCC-SmartCity-DSS.2016.0199"},{"key":"9_CR91","doi-asserted-by":"crossref","unstructured":"Zhu, Y., et al.: Aligning books and movies: Towards story-like visual explanations by watching movies and reading books. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.11"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2023"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-41679-8_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T09:43:26Z","timestamp":1729935806000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-41679-8_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031416781","9783031416798"],"references-count":91,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-41679-8_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"19 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"San Jos\u00e9, CA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"316","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"154","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.89","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.50","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Number and type of other papers accepted : IJDAR track papers","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}