{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T16:45:00Z","timestamp":1780418700988,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,7,18]],"date-time":"2023-07-18T00:00:00Z","timestamp":1689638400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,19]]},"DOI":"10.1145\/3539618.3591886","type":"proceedings-article","created":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T00:22:23Z","timestamp":1689726143000},"page":"2807-2816","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["Form-NLU: Dataset for the Form Natural Language Understanding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5065-6911","authenticated-orcid":false,"given":"Yihao","family":"Ding","sequence":"first","affiliation":[{"name":"The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0590-7587","authenticated-orcid":false,"given":"Siqu","family":"Long","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7978-656X","authenticated-orcid":false,"given":"Jiabin","family":"Huang","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5547-5240","authenticated-orcid":false,"given":"Kaixuan","family":"Ren","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9024-3144","authenticated-orcid":false,"given":"Xingxiang","family":"Luo","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5272-3778","authenticated-orcid":false,"given":"Hyunsuk","family":"Chung","sequence":"additional","affiliation":[{"name":"FortifyEdge, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1948-6819","authenticated-orcid":false,"given":"Soyeon Caren","family":"Han","sequence":"additional","affiliation":[{"name":"The University of Western Australia, Perth, WA, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,7,18]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics. 3438--3453","author":"Cao Feiqi","year":"2022","unstructured":"Feiqi Cao, Soyeon Caren Han, Siqu Long, Changwei Xu, and Josiah Poon. 2022. Understanding Attention for Vision-and-Language Tasks. In Proceedings of the 29th International Conference on Computational Linguistics. 3438--3453."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.2725639"},{"key":"e_1_3_2_2_3_1","volume-title":"Complicated table structure recognition. arXiv preprint arXiv:1908.04729","author":"Chi Zewen","year":"2019","unstructured":"Zewen Chi, Heyan Huang, Heng-Da Xu, Houjin Yu, Wanxuan Yin, and Xian-Ling Mao. 2019. Complicated table structure recognition. arXiv preprint arXiv:1908.04729 (2019)."},{"key":"e_1_3_2_2_4_1","volume-title":"A coefficient of agreement for nominal scales. Educational and psychological measurement","author":"Cohen Jacob","year":"1960","unstructured":"Jacob Cohen. 1960. A coefficient of agreement for nominal scales. Educational and psychological measurement, Vol. 20, 1 (1960), 37--46."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02083"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00454"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01001"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548112"},{"key":"e_1_3_2_2_10_1","volume-title":"International Conference on Document Analysis Recognition.","author":"Huang Zheng","year":"2019","unstructured":"Zheng Huang, Kai Chen, Jianhua He, Xiang Bai, Dimosthenis Karatzas, Shijian Lu, and CV Jawahar. 2019. Icdar 2019 robust reading challenge on scanned receipts ocr and information extraction. In International Conference on Document Analysis Recognition."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDARW.2019.10029"},{"key":"e_1_3_2_2_12_1","volume-title":"International Conference on Machine Learning. PMLR, 5583--5594","author":"Kim Wonjae","year":"2021","unstructured":"Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. Vilt: Vision-and-language transformer without convolution or region supervision. In International Conference on Machine Learning. PMLR, 5583--5594."},{"key":"e_1_3_2_2_13_1","volume-title":"Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557","author":"Li Liunian Harold","year":"2019","unstructured":"Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, and Kai-Wei Chang. 2019. Visualbert: A simple and performant baseline for vision and language. arXiv preprint arXiv:1908.03557 (2019)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.82"},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics. 2906--2916","author":"Luo Siwen","year":"2022","unstructured":"Siwen Luo, Yihao Ding, Siqu Long, Josiah Poon, and Soyeon Caren Han. 2022. Doc-GCN: Heterogeneous Graph Convolutional Networks for Document Layout Analysis. In Proceedings of the 29th International Conference on Computational Linguistics. 2906--2916."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00225"},{"key":"e_1_3_2_2_17_1","volume-title":"Workshop on Document Intelligence at NeurIPS","author":"Park Seunghyun","year":"2019","unstructured":"Seunghyun Park, Seung Shin, Bado Lee, Junyeop Lee, Jaeheung Surh, Minjoon Seo, and Hwalsuk Lee. 2019. CORD: a consolidated receipt dataset for post-OCR parsing. In Workshop on Document Intelligence at NeurIPS 2019."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16558"},{"key":"e_1_3_2_2_19_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems, Vol. 28 (2015)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86549-8_36"},{"key":"e_1_3_2_2_21_1","volume-title":"Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490","author":"Tan Hao","year":"2019","unstructured":"Hao Tan and Mohit Bansal. 2019. Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490 (2019)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17635"},{"key":"e_1_3_2_2_23_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_24_1","volume-title":"Pointer networks. Advances in neural information processing systems","author":"Vinyals Oriol","year":"2015","unstructured":"Oriol Vinyals, Meire Fortunato, and Navdeep Jaitly. 2015. Pointer networks. Advances in neural information processing systems, Vol. 28 (2015)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i4.16378"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403172"},{"key":"e_1_3_2_2_27_1","volume-title":"Layoutxlm: Multimodal pre-training for multilingual visually-rich document understanding. arXiv preprint arXiv:2104.08836","author":"Xu Yiheng","year":"2021","unstructured":"Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, and Furu Wei. 2021a. Layoutxlm: Multimodal pre-training for multilingual visually-rich document understanding. arXiv preprint arXiv:2104.08836 (2021)."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.253"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.201"},{"key":"e_1_3_2_2_30_1","volume-title":"Entity Relation Extraction as Dependency Parsing in Visually Rich Documents. arXiv preprint arXiv:2110.09915","author":"Zhang Yue","year":"2021","unstructured":"Yue Zhang, Bo Zhang, Rui Wang, Junjie Cao, Chen Li, and Zuyi Bao. 2021. Entity Relation Extraction as Dependency Parsing in Visually Rich Documents. arXiv preprint arXiv:2110.09915 (2021)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00074"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_34"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00166"}],"event":{"name":"SIGIR '23: The 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Taipei Taiwan","acronym":"SIGIR '23","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591886","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539618.3591886","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:38:07Z","timestamp":1750178287000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591886"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,18]]},"references-count":33,"alternative-id":["10.1145\/3539618.3591886","10.1145\/3539618"],"URL":"https:\/\/doi.org\/10.1145\/3539618.3591886","relation":{},"subject":[],"published":{"date-parts":[[2023,7,18]]},"assertion":[{"value":"2023-07-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}