{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:11:09Z","timestamp":1750219869304,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nd\/4.0\/"}],"funder":[{"name":"National Key Research and Development Program of China","award":["No.2021YFB3100600"],"award-info":[{"award-number":["No.2021YFB3100600"]}]},{"name":"Strategic Priority Research Program of Chinese Academy of Sciences","award":["No.XDC02040400"],"award-info":[{"award-number":["No.XDC02040400"]}]},{"name":"Youth Innovation Promotion Association of Chinese Academy of Sciences","award":["No.2021153"],"award-info":[{"award-number":["No.2021153"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583387","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:51Z","timestamp":1682551851000},"page":"1683-1692","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Learning Structural Co-occurrences for Structured Web Data Extraction in Low-Resource Settings"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5936-6678","authenticated-orcid":false,"given":"Zhenyu","family":"Zhang","sequence":"first","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6804-1859","authenticated-orcid":false,"given":"Bowen","family":"Yu","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0750-6923","authenticated-orcid":false,"given":"Tingwen","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3213-9077","authenticated-orcid":false,"given":"Tianyun","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6464-3203","authenticated-orcid":false,"given":"Yubin","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2529-7643","authenticated-orcid":false,"given":"Li","family":"Guo","sequence":"additional","affiliation":[{"name":"Institute of Information Engineering, Chinese Academy of Sciences, China and School of Cyber Security, University of Chinese Academy of Sciences, China"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106428"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3417996"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87479-9_31"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476293"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.14778\/1938545.1938547"},{"key":"e_1_3_2_1_6_1","unstructured":"Xiang Deng Prashant Shiralkar Colin Lockard Binxuan Huang and Huan Sun. 2022. DOM-LM: Learning Generalizable Representations for HTML Documents. arXiv preprint arXiv:2201.10608(2022)."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT). 4171\u20134186","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT). 4171\u20134186."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623677"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1864708.1864773"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2011.5767842"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2009916.2010020"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.201"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1228"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.747"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.258"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313720"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.192"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.423"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.420"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449943"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403153"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i03.5681"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1194"},{"key":"e_1_3_2_1_24_1","volume-title":"ERNIE-Layout: Layout Knowledge Enhanced Pre-training for Visually-rich Document Understanding. In Findings of the Association for Computational Linguistics: EMNLP 2022 (EMNLP: Findings). 3744\u20133756","author":"Peng Qiming","year":"2022","unstructured":"Qiming Peng, Yinxu Pan, Wenjin Wang, Bin Luo, Zhenyu Zhang, Zhengjie Huang, Teng Hu, Weichong Yin, Yongfeng Chen, Yin Zhang, Shikun Feng, Yu Sun, Hua Wu, and Haifeng Wang. 2022. ERNIE-Layout: Layout Knowledge Enhanced Pre-training for Visually-rich Document Understanding. In Findings of the Association for Computational Linguistics: EMNLP 2022 (EMNLP: Findings). 3744\u20133756."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/3447689.3447703"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512032"},{"volume-title":"Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021","author":"Wang Ruize","key":"e_1_3_2_1_28_1","unstructured":"Ruize Wang, Duyu Tang, Nan Duan, Zhongyu Wei, Xuan-Jing Huang, Jianshu Ji, Guihong Cao, Daxin Jiang, and Ming Zhou. 2021. K-Adapter: Infusing Knowledge into Pre-Trained Models with Adapters. In Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021 (ACL: Findings). 1405\u20131418."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380098"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.139"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390334.1390343"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 8th International Conference on Learning Representations (ICLR). 1\u201313","author":"Xiong Wenhan","year":"2020","unstructured":"Wenhan Xiong, Jingfei Du, William\u00a0Yang Wang, and Veselin Stoyanov. 2020. Pretrained Encyclopedia: Weakly Supervised Knowledge-Pretrained Language Model. In Proceedings of the 8th International Conference on Learning Representations (ICLR). 1\u201313."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1417"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1224"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1139"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6509"},{"volume-title":"Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021","author":"Zhang Zhenyu","key":"e_1_3_2_1_38_1","unstructured":"Zhenyu Zhang, Bowen Yu, Xiaobo Shu, Xue Mengge, Tingwen Liu, and Li Guo. 2021. From What to Why: Improving Relation Extraction with Rationale Graph. In Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021 (ACL: Findings). 86\u201395."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1645962"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498424"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150457"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/1281192.1281288"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Austin TX USA","acronym":"WWW '23"},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583387","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583387","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:51Z","timestamp":1750178871000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583387"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":42,"alternative-id":["10.1145\/3543507.3583387","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583387","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}