{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T14:40:04Z","timestamp":1749998404139,"version":"3.41.0"},"publisher-location":"Cham","reference-count":54,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031705519"},{"type":"electronic","value":"9783031705526"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70552-6_5","type":"book-chapter","created":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T04:02:14Z","timestamp":1725940934000},"page":"77-96","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["UniVIE: A Unified Label Space Approach to\u00a0Visual Information Extraction from\u00a0Form-Like Documents"],"prefix":"10.1007","author":[{"given":"Kai","family":"Hu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiawei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weihong","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhuoyao","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiang","family":"Huo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Aggarwal, M., Gupta, H., Sarkar, M., Krishnamurthy, B.: Form2Seq: a framework for higher-order form structure extraction. In: EMNLP, pp. 3830\u20133840 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.314"},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Aggarwal, M., Sarkar, M., Gupta, H., Krishnamurthy, B.: Multi-modal association based grouping for form structure extraction. In: WACV, pp. 2075\u20132084 (2020)","DOI":"10.1109\/WACV45572.2020.9093376"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Appalaraju, S., Jasani, B., Kota, B.U., Xie, Y., Manmatha, R.: Docformer: end-to-end transformer for document understanding. In: ICCV, pp. 993\u20131003 (2021)","DOI":"10.1109\/ICCV48922.2021.00103"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Carbonell, M., Riba, P., Villegas, M., Forn\u00e9s, A., Llad\u00f3s, J.: Named entity recognition and relation extraction with graph neural networks in semi structured documents. In: ICPR, pp. 9622\u20139627 (2021)","DOI":"10.1109\/ICPR48806.2021.9412669"},{"key":"5_CR5","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1007\/s10032-002-0084-6","volume":"6","author":"F Cesarini","year":"2003","unstructured":"Cesarini, F., Francesconi, E., Gori, M., Soda, G.: Analysis and understanding of multi-class invoices. Doc. Anal. Recogn. 6, 102\u2013114 (2003)","journal-title":"Doc. Anal. Recogn."},{"key":"5_CR6","first-page":"1396","volume":"14","author":"YJ Chu","year":"1965","unstructured":"Chu, Y.J.: On the shortest arborescence of a directed graph. Sci. Sinica 14, 1396\u20131400 (1965)","journal-title":"Sci. Sinica"},{"key":"5_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1007\/978-3-031-25069-9_19","volume-title":"Computer Vision - ECCV 2022 Workshops","author":"B Davis","year":"2022","unstructured":"Davis, B., Morse, B., Price, B., Tensmeyer, C., Wigington, C., Morariu, V.: End-to-end document recognition and understanding with dessurt. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) ECCV 2022. LNCS, vol. 13804, pp. 280\u2013296. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-25069-9_19"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Davis, B., Morse, B., Price, B., Tensmeyer, C., Wiginton, C.: Visual fudge: form understanding via dynamic graph editing. In: ICDAR, pp. 416\u2013431 (2021)","DOI":"10.1007\/978-3-030-86549-8_27"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: CVPR, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"Dengel, A.R., Klein, B.: Smartfix: a requirements-driven system for document analysis and understanding. In: DAS, pp. 433\u2013444 (2002)","DOI":"10.1007\/3-540-45869-7_47"},{"key":"5_CR11","unstructured":"Denk, T.I., Reisswig, C.: BERTgrid: contextualized embedding for 2D document representation and understanding. arXiv preprint arXiv:1909.04948 (2019)"},{"key":"5_CR12","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL, pp. 4171\u20134186 (2019)"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Dhouib, M., Bettaieb, G., Shabou, A.: Docparser: end-to-end OCR-free information extraction from visually rich documents. In: ICDAR, pp. 155\u2013172 (2023)","DOI":"10.1007\/978-3-031-41734-4_10"},{"key":"5_CR14","unstructured":"Dozat, T., Manning, C.D.: Deep biaffine attention for neural dependency parsing. In: ICLR (2017)"},{"issue":"4","key":"5_CR15","doi-asserted-by":"publisher","first-page":"233","DOI":"10.6028\/jres.071B.032","volume":"71","author":"J Edmonds","year":"1967","unstructured":"Edmonds, J., et al.: Optimum branchings. J. Res. Natl. Bureau Stand. B 71(4), 233\u2013240 (1967)","journal-title":"J. Res. Natl. Bureau Stand. B"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Esser, D., Schuster, D., Muthmann, K., Berger, M., Schill, A.: Automatic indexing of scanned documents: a layout-based approach. In: DRR, pp. 118\u2013125 (2012)","DOI":"10.1117\/12.908542"},{"key":"5_CR17","unstructured":"Gao, M., Xue, L., Ramaiah, C., Xing, C., Xu, R., Xiong, C.: Docquerynet: value retrieval with arbitrary queries for form-like documents. In: COLING, pp. 2141\u20142146 (2022)"},{"key":"5_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1007\/978-3-031-25069-9_22","volume-title":"Computer Vision - ECCV 2022 Workshops","author":"A Gemelli","year":"2023","unstructured":"Gemelli, A., Biswas, S., Civitelli, E., Llad\u00f3s, J., Marinai, S.: Doc2graph: a task agnostic document understanding framework based on graph neural networks. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) ECCV 2022. LNCS, vol. 13804, pp. 329\u2013344. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-25069-9_22"},{"key":"5_CR19","unstructured":"Gupta, P., Sch\u00fctze, H., Andrassy, B.: Table filling multi-task recurrent neural network for joint entity and relation extraction. In: COLING, pp. 2537\u20132547 (2016)"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: WACV, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Hu, K., Wu, Z., Zhong, Z., Lin, W., Sun, L., Huo, Q.: A question-answering approach to key value pair extraction from form-like document images. In: AAAI, pp. 12899\u201312906 (2023)","DOI":"10.1609\/aaai.v37i11.26516"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Huang, Y., Lv, T., Cui, L., Lu, Y., Wei, F.: Layoutlmv3: pre-training for document AI with unified text and image masking. In: ACM MM, pp. 4083\u20134091 (2022)","DOI":"10.1145\/3503161.3548112"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Huang, Z., et al.: ICDAR2019 competition on scanned receipt OCR and information extraction. In: ICDAR, pp. 1516\u20131520 (2019)","DOI":"10.1109\/ICDAR.2019.00244"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Hwang, W., Yim, J., Park, S., Yang, S., Seo, M.: Spatial dependency parsing for semi-structured document information extraction. In: ACL Findings, pp. 330\u2014343 (2021)","DOI":"10.18653\/v1\/2021.findings-acl.28"},{"key":"5_CR26","doi-asserted-by":"crossref","unstructured":"Jaume, G., Ekenel, H.K., Thiran, J.P.: FunSD: a dataset for form understanding in noisy scanned documents. In: ICDAR Workshops, pp.\u00a01\u20136 (2019)","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"Katti, A.R., et al.: Chargrid: towards understanding 2D documents. In: EMNLP, pp. 4459\u20134469 (2018)","DOI":"10.18653\/v1\/D18-1476"},{"key":"5_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1007\/978-3-031-19815-1_29","volume-title":"Computer Vision - ECCV 2022","author":"G Kim","year":"2022","unstructured":"Kim, G., et al.: OCR-free document understanding transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13688, pp. 498\u2013517. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_29"},{"key":"5_CR29","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"5_CR30","doi-asserted-by":"crossref","unstructured":"Lee, C.Y., et al.: Formnet: structural encoding beyond sequential modeling in form document information extraction. In: ACL, pp. 3735\u20133754 (2022)","DOI":"10.18653\/v1\/2022.acl-long.260"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Lin, W., et al.: Vibertgrid: a jointly trained multi-modal 2d document representation for key information extraction from documents. In: ICDAR, pp. 548\u2013563 (2021)","DOI":"10.1007\/978-3-030-86549-8_35"},{"key":"5_CR32","doi-asserted-by":"crossref","unstructured":"Mathur, P., et\u00a0al.: LayerDoc: layer-wise extraction of spatial hierarchical structure in visually-rich documents. In: WACV, pp. 3610\u20133620 (2023)","DOI":"10.1109\/WACV56688.2023.00360"},{"key":"5_CR33","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10032-010-0137-1","volume":"14","author":"E Medvet","year":"2011","unstructured":"Medvet, E., Bartoli, A., Davanzo, G.: A probabilistic approach to printed document understanding. IJDAR 14, 335\u2013347 (2011)","journal-title":"IJDAR"},{"key":"5_CR34","unstructured":"Park, S., et al.: Cord: a consolidated receipt dataset for post-OCR parsing. In: NeurIPS Workshops (2019)"},{"key":"5_CR35","unstructured":"Qiao, B., Zou, Z., Huang, Y., Fang, K., Zhu, X., Chen, Y.: A joint model for entity and relation extraction based on BERT. In: Neural Computing and Applications pp. 1\u201311 (2022)"},{"key":"5_CR36","doi-asserted-by":"crossref","unstructured":"Rastogi, M., et al.: Information extraction from document images via FCA based template detection and knowledge graph rule induction. In: CVPR Workshops, pp. 558\u2013559 (2020)","DOI":"10.1109\/CVPRW50498.2020.00287"},{"key":"5_CR37","doi-asserted-by":"crossref","unstructured":"Rusinol, M., Benkhelfallah, T., Poulain\u00a0dAndecy, V.: Field extraction from administrative documents by incremental structural templates. In: ICDAR, pp. 1100\u20131104 (2013)","DOI":"10.1109\/ICDAR.2013.223"},{"key":"5_CR38","doi-asserted-by":"crossref","unstructured":"Schuster, D., et al.: Intellix \u2013 end-user trained information extraction for document archiving. In: ICDAR, pp. 101\u2013105 (2013)","DOI":"10.1109\/ICDAR.2013.28"},{"key":"5_CR39","doi-asserted-by":"crossref","unstructured":"Shi, D., Liu, S., Du, J., Zhu, H.: LayoutGCN: a lightweight architecture for visually rich document understanding. In: ICDAR, pp. 149\u2013165 (2023)","DOI":"10.1007\/978-3-031-41682-8_10"},{"key":"5_CR40","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Gupta, A., Girshick, R.: Training region-based object detectors with online hard example mining. In: CVPR, pp. 761\u2013769 (2016)","DOI":"10.1109\/CVPR.2016.89"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"\u0160imsa, \u0160., et al.: Docile benchmark for document information localization and extraction. In: ICDAR, pp. 147\u2013166 (2023)","DOI":"10.1007\/978-3-031-41679-8_9"},{"key":"5_CR42","doi-asserted-by":"crossref","unstructured":"Wang, J., Jin, L., Ding, K.: Lilt: a simple yet effective language-independent layout transformer for structured document understanding. In: ACL, pp. 7747\u20137757 (2022)","DOI":"10.18653\/v1\/2022.acl-long.534"},{"key":"5_CR43","doi-asserted-by":"crossref","unstructured":"Wang, J., Lu, W.: Two are better than one: joint entity and relation extraction with table-sequence encoders. In: EMNLP, pp. 1706\u20131721 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.133"},{"key":"5_CR44","doi-asserted-by":"crossref","unstructured":"Wang, Y., Sun, C., Wu, Y., Zhou, H., Li, L., Yan, J.: Unire: a unified label space for entity relation extraction. In: ACL, pp. 220\u2014231 (2021)","DOI":"10.18653\/v1\/2021.acl-long.19"},{"key":"5_CR45","doi-asserted-by":"crossref","unstructured":"Wang, Y., Yu, B., Zhang, Y., Liu, T., Zhu, H., Sun, L.: TPlinker: single-stage joint extraction of entities and relations through token pair linking. In: COLING, pp. 1572\u20141582 (2020)","DOI":"10.18653\/v1\/2020.coling-main.138"},{"issue":"4","key":"5_CR46","doi-asserted-by":"publisher","first-page":"432","DOI":"10.1109\/34.385976","volume":"17","author":"T Watanabe","year":"1995","unstructured":"Watanabe, T., Luo, Q., Sugie, N.: Layout recognition of multi-kinds of table-form documents. TPAMI 17(4), 432\u2013445 (1995)","journal-title":"TPAMI"},{"key":"5_CR47","doi-asserted-by":"crossref","unstructured":"Xu, Y., et\u00a0al.: Layoutlmv2: multi-modal pre-training for visually-rich document understanding. In: ACL, pp. 2579\u20142591 (2021)","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"5_CR48","doi-asserted-by":"crossref","unstructured":"Xu, Y., Li, M., Cui, L., Huang, S., Wei, F., Zhou, M.: LayoutLM: pre-training of text and layout for document image understanding. In: KDD, pp. 1192\u20131200 (2020)","DOI":"10.1145\/3394486.3403172"},{"key":"5_CR49","doi-asserted-by":"crossref","unstructured":"Xu, Y., et al.: Xfund: a benchmark dataset for multilingual visually rich form understanding. In: ACL Findings, pp. 3214\u20133224 (2022)","DOI":"10.18653\/v1\/2022.findings-acl.253"},{"key":"5_CR50","doi-asserted-by":"crossref","unstructured":"Yan, H., Sun, Y., Li, X., Zhou, Y., Huang, X., Qiu, X.: UTC-IE: a unified token-pair classification architecture for information extraction. In: ACL, pp. 4096\u20134122 (2023)","DOI":"10.18653\/v1\/2023.acl-long.226"},{"key":"5_CR51","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: Modeling entities as semantic points for visual information extraction in the wild. In: CVPR, pp. 15358\u201315367 (2023)","DOI":"10.1109\/CVPR52729.2023.01474"},{"key":"5_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, P., et al.: Trie: end-to-end text reading and information extraction for document understanding. In: ACM MM, pp. 1413\u20131422 (2020)","DOI":"10.1145\/3394171.3413900"},{"key":"5_CR53","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Bo, Z., Wang, R., Cao, J., Li, C., Bao, Z.: Entity relation extraction as dependency parsing in visually rich documents. In: EMNLP, pp. 2759\u20132768 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.218"},{"key":"5_CR54","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1016\/j.neucom.2016.12.075","volume":"257","author":"S Zheng","year":"2017","unstructured":"Zheng, S., et al.: Joint entity and relation extraction based on a hybrid neural network. Neurocomputing 257, 59\u201366 (2017)","journal-title":"Neurocomputing"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition - ICDAR 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70552-6_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T14:07:42Z","timestamp":1749996462000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70552-6_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705519","9783031705526"],"references-count":54,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70552-6_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"11 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Athens","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icdar2024.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}