{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:22:41Z","timestamp":1750306961079,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2013,6,12]],"date-time":"2013-06-12T00:00:00Z","timestamp":1370995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"BUT FIT","award":["FIT-S-11-2"],"award-info":[{"award-number":["FIT-S-11-2"]}]},{"name":"MSM","award":["21630528"],"award-info":[{"award-number":["21630528"]}]},{"name":"IT4Innovations Centre of Excellence","award":["CZ.1.05\/1.1.00\/02.0070"],"award-info":[{"award-number":["CZ.1.05\/1.1.00\/02.0070"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2013,6,12]]},"DOI":"10.1145\/2479787.2479792","type":"proceedings-article","created":{"date-parts":[[2013,6,11]],"date-time":"2013-06-11T16:03:50Z","timestamp":1370966630000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Cluster-based page segmentation-a fast and precise method for web page pre-processing"],"prefix":"10.1145","author":[{"given":"Jan","family":"Zeleny","sequence":"first","affiliation":[{"name":"Brno University of Technology, Brno, Czech Republic"}]},{"given":"Radek","family":"Burget","sequence":"additional","affiliation":[{"name":"Brno University of Technology, Brno, Czech Republic"}]}],"member":"320","published-online":{"date-parts":[[2013,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/1304596.1304845"},{"key":"e_1_3_2_1_2_1","volume-title":"November","author":"Cai D.","year":"2003","unstructured":"D. Cai , S. Yu , J. rong Wen, and W. ying Ma. VIPS: a vision-based page segmentation algorithm. Microsoft technical report MSR-TR-2003-79 , November 2003 . D. Cai, S. Yu, J. rong Wen, and W. ying Ma. VIPS: a vision-based page segmentation algorithm. Microsoft technical report MSR-TR-2003-79, November 2003."},{"key":"e_1_3_2_1_3_1","first-page":"18","volume-title":"Proceedings of the 4th Web as a Corpus Workshop, 6th Language Resources and Evaluation Conference., LREC 2008","author":"Fragkou P. G., P.","year":"2008","unstructured":"P. G., P. Fragkou , A. Theodorakos , V. Karkaletsis , and C. D. Spyropoulos . Segmenting HTML pages using visual and semantic information . In Proceedings of the 4th Web as a Corpus Workshop, 6th Language Resources and Evaluation Conference., LREC 2008 , pages 18 -- 25 , June 2008 . P. G., P. Fragkou, A. Theodorakos, V. Karkaletsis, and C. D. Spyropoulos. Segmenting HTML pages using visual and semantic information. In Proceedings of the 4th Web as a Corpus Workshop, 6th Language Resources and Evaluation Conference., LREC 2008, pages 18--25, June 2008."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1062745.1062763"},{"key":"e_1_3_2_1_5_1","first-page":"66","volume-title":"Proceedings of the IASTED International Conference on Internet and Multimedia Systems and Applications, EuroIMSA '08","author":"Gottron T.","year":"2008","unstructured":"T. Gottron . Bridging the gap: from multi document template detection to single document content extraction . In Proceedings of the IASTED International Conference on Internet and Multimedia Systems and Applications, EuroIMSA '08 , pages 66 -- 71 , Anaheim, CA, USA , 2008 . ACTA Press. T. Gottron. Bridging the gap: from multi document template detection to single document content extraction. In Proceedings of the IASTED International Conference on Internet and Multimedia Systems and Applications, EuroIMSA '08, pages 66--71, Anaheim, CA, USA, 2008. ACTA Press."},{"key":"e_1_3_2_1_6_1","unstructured":"W. H. W. Group. XHTML 1.0 the extensible hypertext markup language. W3C Recommendation August 2002.  W. H. W. Group. XHTML 1.0 the extensible hypertext markup language. W3C Recommendation August 2002."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.datak.2009.10.002"},{"key":"e_1_3_2_1_8_1","volume-title":"April","author":"Hors A. L.","year":"2004","unstructured":"A. L. Hors , P. L. Hegaret , L. Wood , G. Nicol , J. Robie , M. Champion , and S. Byrne . Document object model (DOM) level 3 document object model core. W3C Recommendation , April 2004 . A. L. Hors, P. L. Hegaret, L. Wood, G. Nicol, J. Robie, M. Champion, and S. Byrne. Document object model (DOM) level 3 document object model core. W3C Recommendation, April 2004."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646204"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/956750.956826"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.109"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/988672.988740"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the 4th Web as Corpus Workshop, LREC","author":"Spousta M.","year":"2008","unstructured":"M. Spousta , M. Marek , and P. Pecina . Victor: the Web-Page Cleaning Tool . In Proceedings of the 4th Web as Corpus Workshop, LREC , 2008 . M. Spousta, M. Marek, and P. Pecina. Victor: the Web-Page Cleaning Tool. In Proceedings of the 4th Web as Corpus Workshop, LREC, 2008."},{"key":"e_1_3_2_1_14_1","volume-title":"January","author":"Stenback J.","year":"2003","unstructured":"J. Stenback , P. L. Hegaret , and A. L. Hors . Document object model (DOM) level 2 document object model html. W3C Recommendation , January 2003 . J. Stenback, P. L. Hegaret, and A. L. Hors. Document object model (DOM) level 2 document object model html. W3C Recommendation, January 2003."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPIRE.2001.989761"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-009-0059-3"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1183614.1183654"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/956750.956785"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/775152.775155"}],"event":{"name":"WIMS '13: 3rd International Conference on Web Intelligence, Mining and Semantics","sponsor":["UAM Autonomous University of Madrid"],"location":"Madrid Spain","acronym":"WIMS '13"},"container-title":["Proceedings of the 3rd International Conference on Web Intelligence, Mining and Semantics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2479787.2479792","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2479787.2479792","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T08:39:13Z","timestamp":1750235953000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2479787.2479792"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,6,12]]},"references-count":19,"alternative-id":["10.1145\/2479787.2479792","10.1145\/2479787"],"URL":"https:\/\/doi.org\/10.1145\/2479787.2479792","relation":{},"subject":[],"published":{"date-parts":[[2013,6,12]]},"assertion":[{"value":"2013-06-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}