{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:18:45Z","timestamp":1750220325491,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,12,14]],"date-time":"2021-12-14T00:00:00Z","timestamp":1639440000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["MOST-109-2221-E-008-060-MY3"],"award-info":[{"award-number":["MOST-109-2221-E-008-060-MY3"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,14]]},"DOI":"10.1145\/3486622.3493938","type":"proceedings-article","created":{"date-parts":[[2022,4,14]],"date-time":"2022-04-14T01:18:53Z","timestamp":1649899133000},"page":"326-334","source":"Crossref","is-referenced-by-count":1,"title":["Multi-Task Neural Sequence Labeling for Zero-Shot Cross-Language Boilerplate Removal"],"prefix":"10.1145","author":[{"given":"Yu-Hao","family":"Wu","sequence":"first","affiliation":[{"name":"National Central University, Taiwan"}]},{"given":"Chia-Hui","family":"Chang","sequence":"additional","affiliation":[{"name":"National Central University, Taiwan"}]}],"member":"320","published-online":{"date-parts":[[2022,4,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC\u201908)","author":"Baroni Marco","year":"2008","unstructured":"Marco Baroni , Francis Chantree , Adam Kilgarriff , and Serge Sharoff . 2008 . Cleaneval: a Competition for Cleaning Web Pages . In Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC\u201908) . European Language Resources Association (ELRA), Marrakech, Morocco, 6. http:\/\/www.lrec-conf.org\/proceedings\/lrec 2008\/pdf\/162_paper.pdf Marco Baroni, Francis Chantree, Adam Kilgarriff, and Serge Sharoff. 2008. Cleaneval: a Competition for Cleaning Web Pages. In Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC\u201908). European Language Resources Association (ELRA), Marrakech, Morocco, 6. http:\/\/www.lrec-conf.org\/proceedings\/lrec2008\/pdf\/162_paper.pdf"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415563"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.14778\/1453856.1453916"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1008992.1009070"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1109\/TKDE.2006.152","article-title":"A Survey of Web Information Extraction Systems","volume":"18","author":"Chang Chia-Hui","year":"2006","unstructured":"Chia-Hui Chang , Mohammed Kayed , Moheb\u00a0Ramzy Girgis , and Khaled\u00a0 F. Shaalan . 2006 . A Survey of Web Information Extraction Systems . IEEE Transactions on Knowledge & Data Engineering 18 , 10 (Oct 2006), 1411\u20131428. https:\/\/doi.org\/10.1109\/TKDE.2006.152 Chia-Hui Chang, Mohammed Kayed, Moheb\u00a0Ramzy Girgis, and Khaled\u00a0F. Shaalan. 2006. A Survey of Web Information Extraction Systems. IEEE Transactions on Knowledge & Data Engineering 18, 10 (Oct 2006), 1411\u20131428. https:\/\/doi.org\/10.1109\/TKDE.2006.152","journal-title":"IEEE Transactions on Knowledge & Data Engineering"},{"key":"e_1_3_2_1_6_1","first-page":"19","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding . In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies , Volume 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, Minnesota, 4171\u20134186. https:\/\/doi.org\/10. 18653\/v1\/N 19 - 1423 Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, Minnesota, 4171\u20134186. https:\/\/doi.org\/10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_1_7_1","volume-title":"C (Nov.","author":"Ferrara Emilio","year":"2014","unstructured":"Emilio Ferrara , Pasquale De\u00a0Meo , Giacomo Fiumara , and Robert Baumgartner . 2014. Web Data Extraction, Applications and Techniques. Know.-Based Syst. 70 , C (Nov. 2014 ), 301\u2013323. https:\/\/doi.org\/10.1016\/j.knosys.2014.07.007 Emilio Ferrara, Pasquale De\u00a0Meo, Giacomo Fiumara, and Robert Baumgartner. 2014. Web Data Extraction, Applications and Techniques. Know.-Based Syst. 70, C (Nov. 2014), 301\u2013323. https:\/\/doi.org\/10.1016\/j.knosys.2014.07.007"},{"volume-title":"Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval(SIGIR15","author":"Foley J.","key":"e_1_3_2_1_8_1","unstructured":"J. Foley , M. Bendersky , and V. Josifovski . 2015. Learning to Extract Local Events from the Web . In Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval(SIGIR15 , 09-13). ACM, Santiago, Chile, 423\u2013432. https:\/\/doi.org\/10.1145\/2766462.2767739 J. Foley, M. Bendersky, and V. Josifovski. 2015. Learning to Extract Local Events from the Web. In Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval(SIGIR15, 09-13). ACM, Santiago, Chile, 423\u2013432. https:\/\/doi.org\/10.1145\/2766462.2767739"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157096.3157211"},{"key":"e_1_3_2_1_10_1","first-page":"1","article-title":"Domain-Adversarial Training of Neural Networks","volume":"17","author":"Ganin Yaroslav","year":"2016","unstructured":"Yaroslav Ganin , Evgeniya Ustinova , Hana Ajakan , Pascal Germain , Hugo Larochelle , Fran\u00e7ois Laviolette , Mario Marchand , and Victor Lempitsky . 2016 . Domain-Adversarial Training of Neural Networks . J. Mach. Learn. Res. 17 , 1 (Jan. 2016), 2096\u20132030. Yaroslav Ganin, Evgeniya Ustinova, Hana Ajakan, Pascal Germain, Hugo Larochelle, Fran\u00e7ois Laviolette, Mario Marchand, and Victor Lempitsky. 2016. Domain-Adversarial Training of Neural Networks. J. Mach. Learn. Res. 17, 1 (Jan. 2016), 2096\u20132030.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775182"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"volume-title":"Database and Expert Systems Applications, Hendrik Decker, Lenka Lhotsk\u00e1, Sebastian Link, Josef Basl, and A.\u00a0Min Tjoa (Eds.)","author":"Kim Myungwon","key":"e_1_3_2_1_13_1","unstructured":"Myungwon Kim , Youngjin Kim , Wonmoon Song , and Ara Khil . 2013. Main Content Extraction from Web Documents Using Text Block Context . In Database and Expert Systems Applications, Hendrik Decker, Lenka Lhotsk\u00e1, Sebastian Link, Josef Basl, and A.\u00a0Min Tjoa (Eds.) . Springer Berlin Heidelberg, Berlin , Heidelberg , 81\u201393. Myungwon Kim, Youngjin Kim, Wonmoon Song, and Ara Khil. 2013. Main Content Extraction from Web Documents Using Text Block Context. In Database and Expert Systems Applications, Hendrik Decker, Lenka Lhotsk\u00e1, Sebastian Link, Josef Basl, and A.\u00a0Min Tjoa (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 81\u201393."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1718487.1718542"},{"key":"e_1_3_2_1_15_1","volume-title":"Article arXiv:1904.13310 (April","author":"Labach Alex","year":"2019","unstructured":"Alex Labach , Hojjat Salehinejad , and Shahrokh Valaee . 2019. Survey of Dropout Methods for Deep Neural Networks. arXiv e-prints abs\/1904.13310 , Article arXiv:1904.13310 (April 2019 ), 13\u00a0pages. arxiv:1904.13310\u00a0[cs.NE] Alex Labach, Hojjat Salehinejad, and Shahrokh Valaee. 2019. Survey of Dropout Methods for Deep Neural Networks. arXiv e-prints abs\/1904.13310, Article arXiv:1904.13310 (April 2019), 13\u00a0pages. arxiv:1904.13310\u00a0[cs.NE]"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/565117.565137"},{"volume-title":"Proceedings of the Eighteenth International Conference on Machine Learning(ICML \u201901)","author":"Lafferty D.","key":"e_1_3_2_1_17_1","unstructured":"John\u00a0 D. Lafferty , Andrew McCallum , and Fernando C . \u00a0N. Pereira. 2001. Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data . In Proceedings of the Eighteenth International Conference on Machine Learning(ICML \u201901) . Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 282\u2013289. John\u00a0D. Lafferty, Andrew McCallum, and Fernando C.\u00a0N. Pereira. 2001. Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data. In Proceedings of the Eighteenth International Conference on Machine Learning(ICML \u201901). Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 282\u2013289."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366424.3383547"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.datak.2006.04.002"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-016-9359-2"},{"volume-title":"The 4th Web as Corpus Workshop (WAC4)-Can we beat Google","author":"Spousta Miroslav","key":"e_1_3_2_1_21_1","unstructured":"Miroslav Spousta , M. Marek , and Pavel Pecina . 2008. Victor : the Web-Page Cleaning Tool . In The 4th Web as Corpus Workshop (WAC4)-Can we beat Google . European Language Resources Association (ELRA) , Marrakech, Morocco , 12\u201317. Miroslav Spousta, M. Marek, and Pavel Pecina. 2008. Victor : the Web-Page Cleaning Tool. In The 4th Web as Corpus Workshop (WAC4)-Can we beat Google. European Language Resources Association (ELRA), Marrakech, Morocco, 12\u201317."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3115\/1220355.1220497"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-76941-7_13"},{"volume-title":"Proceedings of the 28th ACM International Conference on Information and Knowledge Management","author":"Wang Qifan","key":"e_1_3_2_1_24_1","unstructured":"Qifan Wang , Bhargav Kanagal , Vijay Garg , and D. Sivakumar . 2019. Constructing a Comprehensive Events Database from the Web . In Proceedings of the 28th ACM International Conference on Information and Knowledge Management ( Beijing, China) (CIKM \u201919). Association for Computing Machinery, New York, NY, USA, 229\u2013238. https:\/\/doi.org\/10.1145\/3357384.3357986 Qifan Wang, Bhargav Kanagal, Vijay Garg, and D. Sivakumar. 2019. Constructing a Comprehensive Events Database from the Web. In Proceedings of the 28th ACM International Conference on Information and Knowledge Management (Beijing, China) (CIKM \u201919). Association for Computing Machinery, New York, NY, USA, 229\u2013238. https:\/\/doi.org\/10.1145\/3357384.3357986"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-93698-7_15"}],"event":{"name":"WI-IAT '21: IEEE\/WIC\/ACM International Conference on Web Intelligence","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence"],"location":"ESSENDON VIC Australia","acronym":"WI-IAT '21"},"container-title":["IEEE\/WIC\/ACM International Conference on Web Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3486622.3493938","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3486622.3493938","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:12:06Z","timestamp":1750191126000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3486622.3493938"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,14]]},"references-count":25,"alternative-id":["10.1145\/3486622.3493938","10.1145\/3486622"],"URL":"https:\/\/doi.org\/10.1145\/3486622.3493938","relation":{},"subject":[],"published":{"date-parts":[[2021,12,14]]}}}