{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T15:18:54Z","timestamp":1777130334722,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:00:00Z","timestamp":1587340800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,4,20]]},"DOI":"10.1145\/3366424.3383547","type":"proceedings-article","created":{"date-parts":[[2020,5,4]],"date-time":"2020-05-04T08:10:56Z","timestamp":1588579856000},"page":"226-229","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":19,"title":["Boilerplate Removal using a Neural Sequence Labeling Model"],"prefix":"10.1145","author":[{"given":"Jurek","family":"Leonhardt","sequence":"first","affiliation":[{"name":"L3S Research Center Hannover, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Avishek","family":"Anand","sequence":"additional","affiliation":[{"name":"L3S Research Center Hannover, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Megha","family":"Khosla","sequence":"additional","affiliation":[{"name":"L3S Research Center Hannover, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,4,20]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/511446.511522"},{"key":"e_1_3_2_1_2_1","unstructured":"Marco Baroni Francis Chantree Adam Kilgarriff and Serge Sharoff. 2008. Cleaneval: a Competition for Cleaning Web Pages.. In LREC.  Marco Baroni Francis Chantree Adam Kilgarriff and Serge Sharoff. 2008. Cleaneval: a Competition for Cleaning Web Pages.. In LREC."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1008992.1009070"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242582"},{"key":"e_1_3_2_1_5_1","unstructured":"Mostafa Dehghani Arash Mehrjou Stephan Gouws Jaap Kamps and Bernhard Sch\u00f6lkopf. 2017. Fidelity-weighted learning. arXiv preprint arXiv:1711.02799(2017).  Mostafa Dehghani Arash Mehrjou Stephan Gouws Jaap Kamps and Bernhard Sch\u00f6lkopf. 2017. Fidelity-weighted learning. arXiv preprint arXiv:1711.02799(2017)."},{"key":"e_1_3_2_1_6_1","unstructured":"Aidan Finn Nicholas Kushmerick and Barry Smyth. 2001. Fact or fiction: Content classification for digital libraries. (2001).  Aidan Finn Nicholas Kushmerick and Barry Smyth. 2001. Fact or fiction: Content classification for digital libraries. (2001)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1316902.1316920"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775182"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1718487.1718542"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/775047.775134"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1526709.1526840"},{"key":"e_1_3_2_1_12_1","volume-title":"4th Web as Corpus Workshop (WAC4)-Can we beat Google. 12\u201317.","author":"Spousta Miroslav","unstructured":"Miroslav Spousta , Michal Marek , and Pavel Pecina . 2008. Victor: the web-page cleaning tool . In 4th Web as Corpus Workshop (WAC4)-Can we beat Google. 12\u201317. Miroslav Spousta, Michal Marek, and Pavel Pecina. 2008. Victor: the web-page cleaning tool. In 4th Web as Corpus Workshop (WAC4)-Can we beat Google. 12\u201317."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2009916.2009952"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-76941-7_13"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1557019.1557163"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2736277.2741659"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/956750.956785"}],"event":{"name":"WWW '20: The Web Conference 2020","location":"Taipei Taiwan","acronym":"WWW '20","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the Web Conference 2020"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3366424.3383547","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3366424.3383547","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:33:06Z","timestamp":1750199586000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3366424.3383547"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,20]]},"references-count":17,"alternative-id":["10.1145\/3366424.3383547","10.1145\/3366424"],"URL":"https:\/\/doi.org\/10.1145\/3366424.3383547","relation":{},"subject":[],"published":{"date-parts":[[2020,4,20]]},"assertion":[{"value":"2020-04-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}