{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:43:34Z","timestamp":1740123814589,"version":"3.37.3"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2018,9,12]],"date-time":"2018-09-12T00:00:00Z","timestamp":1536710400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["World Wide Web"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s11280-018-0634-6","type":"journal-article","created":{"date-parts":[[2018,9,12]],"date-time":"2018-09-12T07:21:02Z","timestamp":1536736862000},"page":"1999-2015","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Large-scale holistic approach to Web block classification: assembling the jigsaws of a Web page puzzle"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9066-7743","authenticated-orcid":false,"given":"Andrey","family":"Kravchenko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,9,12]]},"reference":[{"key":"634_CR1","unstructured":"Abiteboul, S., Hull, R., Vianu, V.: Foundations of Databases. Addison-Wesley Longman Publishing Co. Inc. (1995)"},{"key":"634_CR2","doi-asserted-by":"crossref","unstructured":"Ashok, V., Puzis, Y., Borodin, Y., Ramakrishnan, I.V.: Web screen reading automation assistance using semantic abstraction IUI\u201917 (2017)","DOI":"10.1145\/3025171.3025229"},{"key":"634_CR3","doi-asserted-by":"crossref","unstructured":"Burget, R., Rudolfova, I.: Web page element classification based on visual features. In: 2009 First Asia conference on intelligent information and database systems (2009)","DOI":"10.1109\/ACIIDS.2009.71"},{"key":"634_CR4","doi-asserted-by":"crossref","unstructured":"Cai, D., He, X., Wen, J., Ma, W.: Block-level link analysis. SIGIR\u201904 (2004)","DOI":"10.1145\/1008992.1009068"},{"key":"634_CR5","doi-asserted-by":"crossref","unstructured":"Cao, Y., Niu, Z., Dai, L., Zhao, Y.: Extraction of informative blocks from Web pages ALPIT\u201908 (2008)","DOI":"10.1109\/ALPIT.2008.106"},{"key":"634_CR6","unstructured":"Chen, J., Zhou, B., Shi, J., Zhang, H., Fengwu, Q.: Function-Based Object Model Towards Website Adaptation. WWW\u201910 (2010)"},{"key":"634_CR7","doi-asserted-by":"crossref","unstructured":"Cook, S.A.: The complexity of theorem-proving procedures STOC\u201971 (1971)","DOI":"10.1145\/800157.805047"},{"key":"634_CR8","doi-asserted-by":"crossref","unstructured":"de Moor, O., Gottlob, G., Furche, T., Sellers, A. (eds.): Datalog Reloaded, Revised Selected Papers. LNCS (2011)","DOI":"10.1007\/978-3-642-24206-9"},{"key":"634_CR9","unstructured":"Fayzrakhmanov, R.R.: Web Accessibility for the blind through visual representation analysis. PhD Thesis (2013)"},{"key":"634_CR10","doi-asserted-by":"crossref","unstructured":"Ferrara, E., De Meo, P., Fiumara, G., Baumgartner, R.: Web data extraction, applications, and techniques: a survey. Knowl.-Based Syst., vol. 70 (2014)","DOI":"10.1016\/j.knosys.2014.07.007"},{"key":"634_CR11","doi-asserted-by":"crossref","unstructured":"Furche, T., Gottlob, G., Grasso, G., Gunes, O., Guo, X., Kravchenko, A., Orsi, G., Schallhart, C., Sellers, A.J., Wang, C.: DIADEM: Domain-centric, intelligent, automated data extraction methodology. WWW\u201912 (2012)","DOI":"10.1145\/2187980.2188025"},{"key":"634_CR12","doi-asserted-by":"crossref","unstructured":"Furche, T., Grasso, G., Kravchenko, A., Schallhart, C.: Turn the page automated traversal of paginated websites. ICWE\u201912 (2012)","DOI":"10.1007\/978-3-642-31753-8_27"},{"issue":"4","key":"634_CR13","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1007\/s10032-010-0136-2","volume":"14","author":"A Goel","year":"2011","unstructured":"Goel, A., Michelson, M., Knoblock, C.A.: Harvesting maps on the Web. Int. J. Doc. Anal. Recognit. 14(4), 349\u2013372 (2011)","journal-title":"Int. J. Doc. Anal. Recognit."},{"key":"634_CR14","doi-asserted-by":"crossref","unstructured":"Gupta, S., Kaiser, G., Neistadt, D., Grimm, P.: DOM-based content extraction of html documents. WWW 2003 (2003)","DOI":"10.1145\/775152.775182"},{"key":"634_CR15","doi-asserted-by":"crossref","unstructured":"Kang, J., Choi, J.: Block classification of a Web page by using a combination of multiple classifiers. In: Fourth International Conference on Networked Computing and Advanced Information Management (2008)","DOI":"10.1109\/NCM.2008.170"},{"issue":"11","key":"634_CR16","first-page":"1893","volume":"14","author":"J Kang","year":"2008","unstructured":"Kang, J., Choi, J.: Recognising informative Web page blocks using visual segmentation for efficient information extraction. J. Univ. Comput. Sci. 14(11), 1893\u20131910 (2008)","journal-title":"J. Univ. Comput. Sci."},{"key":"634_CR17","doi-asserted-by":"crossref","unstructured":"Karp, R.M.: Reducibility among combinatorial problems. Complexity of Computer Computations (1972)","DOI":"10.1007\/978-1-4684-2001-2_9"},{"key":"#cr-split#-634_CR18.1","doi-asserted-by":"crossref","unstructured":"Keller, M., Hartenstein, H.: GRABEX: A graph-based method for Web site block classification and its application on mining breadcrumb trails. In: 2013 IEEE\/WIC\/ACM International Conferences on Web Intelligence","DOI":"10.1109\/WI-IAT.2013.42"},{"key":"#cr-split#-634_CR18.2","unstructured":"(WI) and Intelligent Agent Technology (IAT) (2013)"},{"key":"634_CR19","doi-asserted-by":"crossref","unstructured":"Kordomatis, I., Herzog, C., Fayzrakhmanov, R.R., Kr\u00fcpl-Sypien, B., Holzinger, W., Baumgartner, R.: Web Object Identification for Web Automation and Meta-search. WIMS\u201913 (2013)","DOI":"10.1145\/2479787.2479798"},{"key":"634_CR20","unstructured":"Kravchenko, A.: BERyL: A system for Web block classification. Transactions on Computational Science (2018)"},{"key":"634_CR21","doi-asserted-by":"crossref","unstructured":"Kr\u00fcpl-Sypien, B., Fayzrakhmanov, R.R., Holzinger, W., Panzenb\u00f6ck, M., Baumgartner, R.: A versatile model for Web page representation, information extraction and content re-packaging. DocEng\u201911 (2011)","DOI":"10.1145\/2034691.2034721"},{"key":"634_CR22","doi-asserted-by":"crossref","unstructured":"Lee, C.H., Kan, M., Lai, S.: Stylistic and Lexical Co-Training for Web Block Classification. WIDM\u201904 (2004)","DOI":"10.1145\/1031453.1031478"},{"issue":"1","key":"634_CR23","first-page":"271","volume":"6","author":"C Li","year":"2010","unstructured":"Li, C., Dong, J., Chen, J.: Extraction of informative blocks from Web pages based on VIPS. J. Comput. Inf. Syst. 6(1), 271\u2013277 (2010)","journal-title":"J. Comput. Inf. Syst."},{"issue":"3","key":"634_CR24","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1109\/TKDE.2009.109","volume":"22","author":"W Liu","year":"2010","unstructured":"Liu, W., Meng, X.: VIDE: A Vision-Based approach for deep Web data extraction. IEEE Trans. Knowl. Data Eng. 22(3), 447\u2013460 (2010)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"634_CR25","doi-asserted-by":"crossref","unstructured":"Luo, P., Lin, F., Xiong, Y., Zhao, Y., Shi, Z.: Towards combining Web classification and Web information extraction: a case study. KDD\u201909 (2009)","DOI":"10.1145\/1557019.1557152"},{"key":"634_CR26","doi-asserted-by":"crossref","unstructured":"Maekawa, T., Hara, T., Nishio, S.: Image classification for mobile Web browsing. WWW\u201906 (2006)","DOI":"10.1145\/1135777.1135789"},{"issue":"3","key":"634_CR27","first-page":"235","volume":"16","author":"J Ross Quinlan","year":"1993","unstructured":"Ross Quinlan, J.: C4.5: Programs for machine learning. Mach. Learn. 16(3), 235\u2013240 (1993)","journal-title":"Mach. Learn."},{"key":"634_CR28","doi-asserted-by":"crossref","unstructured":"Vadrevu, S., Velipasaoglu, E.: Identifying primary content from Web page and its application to Web search ranking. WWW\u201911 (2011)","DOI":"10.1145\/1963192.1963261"},{"key":"634_CR29","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, C., Wang, C., Pei, J., Bu, J., Guan, Z., Zhang, W.V.: Can we learn a Template-Independent wrapper for news article extraction from a single training site? KDD\u201909 (2009)","DOI":"10.1145\/1557019.1557163"},{"key":"634_CR30","doi-asserted-by":"crossref","unstructured":"Wu, C., Zeng, G., Xu, G.: A Web page segmentation algorithm for extracting product information. In: Proceedings of the 2006 IEEE International Conference on Information Acquisition (2006)","DOI":"10.1109\/ICIA.2006.305954"},{"key":"634_CR31","doi-asserted-by":"crossref","unstructured":"Xiang, P., Yang, X., Shi, Y.: Web page segmentation based on gestalt theory 2007. In: IEEE International Conference on Multimedia and Expo (2007)","DOI":"10.1109\/ICME.2007.4285135"},{"key":"634_CR32","doi-asserted-by":"crossref","unstructured":"Zheng, S., Song, R., Wen, J., Giles, C.L.: Efficient record-level wrapper induction. CIKM\u201909 (2009)","DOI":"10.1145\/1645953.1645962"}],"container-title":["World Wide Web"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-018-0634-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11280-018-0634-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11280-018-0634-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,9]],"date-time":"2020-11-09T15:05:53Z","timestamp":1604934353000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11280-018-0634-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,12]]},"references-count":33,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["634"],"URL":"https:\/\/doi.org\/10.1007\/s11280-018-0634-6","relation":{},"ISSN":["1386-145X","1573-1413"],"issn-type":[{"type":"print","value":"1386-145X"},{"type":"electronic","value":"1573-1413"}],"subject":[],"published":{"date-parts":[[2018,9,12]]},"assertion":[{"value":"11 April 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 June 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 September 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}