{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T05:33:19Z","timestamp":1777095199416,"version":"3.51.4"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T00:00:00Z","timestamp":1769990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T00:00:00Z","timestamp":1769990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,2,2]]},"DOI":"10.1109\/icsc67292.2026.00042","type":"proceedings-article","created":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T19:44:40Z","timestamp":1777059880000},"page":"253-258","source":"Crossref","is-referenced-by-count":0,"title":["Unsupervised Boilerplate Removal Using N-gram Sequence Analysis"],"prefix":"10.1109","author":[{"given":"Kishore","family":"Vanapalli","sequence":"first","affiliation":[{"name":"Carleton University Ottawa,School of Information Technology,ON,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shahzad","family":"Khan","sequence":"additional","affiliation":[{"name":"Gnowit Inc. Ottawa,ON,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M. Omair","family":"Shafiq","sequence":"additional","affiliation":[{"name":"Carleton University Ottawa,School of Information Technology,ON,Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Extraction of Relevant Images for Boilerplate Removal in Web Browsers","author":"Bose","year":"2020","journal-title":"arXiv preprint arXiv:2001.04338"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1568296.1568314"},{"key":"ref3","article-title":"Database System Concepts","author":"Silberschatz","year":"2010"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1718487.1718542"},{"key":"ref5","volume-title":"Removing Boilerplate and Duplicate Content from Web Corpora","author":"Pomik\u00e1lek","year":"2011"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3486622.3493938"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-76941-7_13"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9534308"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3726302.3730234"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324923000049"},{"key":"ref11","article-title":"The role of wire services in local newspapers","author":"Mitchell","year":"2015","journal-title":"Pew Research Center."},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-demo.15"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3366424.3383547"},{"key":"ref14","article-title":"Newswire: A Large-Scale Structured Database of a Century of Historical News","author":"Emily","year":"2024","journal-title":"arXiv:2406.09490."},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1111\/ajps.12175"},{"key":"ref16","article-title":"Web scraping statistics and trends you need to know in 2026","volume-title":"Scrapingdog Industry Report.","author":"Khatter","year":"2025"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/331499.331504"},{"key":"ref18","first-page":"281","article-title":"Some Methods for Classification and Analysis of Multivariate Observations","volume-title":"Proc. 5th Berkeley Symp. Mathematical Statistics and Probability","volume":"1","author":"MacQueen"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-017-1404-4"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/SEQUEN.1997.666900"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.9735\/2229-3981"},{"key":"ref22","article-title":"vAnalyst: AIpowered media monitoring and analysis","year":"2025"},{"key":"ref23","article-title":"Beautiful Soup Documentation","author":"Richardson","year":"2007"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1080\/07036337.2024.2354490"}],"event":{"name":"2026 International Conference on Semantic Computing (ICSC)","location":"Laguna Hills, CA, USA","start":{"date-parts":[[2026,2,2]]},"end":{"date-parts":[[2026,2,4]]}},"container-title":["2026 International Conference on Semantic Computing (ICSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11484077\/11486285\/11486472.pdf?arnumber=11486472","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T04:33:18Z","timestamp":1777091598000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11486472\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,2]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/icsc67292.2026.00042","relation":{},"subject":[],"published":{"date-parts":[[2026,2,2]]}}}