{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T03:06:23Z","timestamp":1725505583678},"publisher-location":"Berlin, Heidelberg","reference-count":18,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540786450"},{"type":"electronic","value":"9783540786467"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-78646-7_7","type":"book-chapter","created":{"date-parts":[[2008,3,26]],"date-time":"2008-03-26T08:45:33Z","timestamp":1206521133000},"page":"40-51","source":"Crossref","is-referenced-by-count":7,"title":["Clustering Template Based Web Documents"],"prefix":"10.1007","author":[{"given":"Thomas","family":"Gottron","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"7_CR1","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1145\/511446.511522","volume-title":"WWW 2002: Proceedings of the 11th International Conference on World Wide Web","author":"Z. Bar-Yossef","year":"2002","unstructured":"Bar-Yossef, Z., Rajagopalan, S.: Template detection via data mining and its applications. In: WWW 2002: Proceedings of the 11th International Conference on World Wide Web, pp. 580\u2013591. ACM Press, New York (2002)"},{"key":"7_CR2","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1145\/956863.956907","volume-title":"CIKM 2003: Proceedings of the twelfth International Conference on Information and Knowledge Management","author":"G. Yang","year":"2003","unstructured":"Yang, G., Ramakrishnan, I.V., Kifer, M.: On the complexity of schema inference from web pages in the presence of nullable data attributes. In: CIKM 2003: Proceedings of the twelfth International Conference on Information and Knowledge Management, pp. 224\u2013231. ACM Press, New York (2003)"},{"key":"7_CR3","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1145\/775047.775134","volume-title":"KDD 2002: Proceedings of the eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"S.H. Lin","year":"2002","unstructured":"Lin, S.H., Ho, J.M.: Discovering informative content blocks from web documents. In: KDD 2002: Proceedings of the eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 588\u2013593. ACM Press, New York (2002)"},{"key":"7_CR4","doi-asserted-by":"publisher","first-page":"1722","DOI":"10.1145\/1066677.1067065","volume-title":"SAC 2005: Proceedings of the 2005 ACM Symposium on Applied Computing","author":"S. Debnath","year":"2005","unstructured":"Debnath, S., Mitra, P., Giles, C.L.: Automatic extraction of informative blocks from webpages. In: SAC 2005, pp. 1722\u20131726. ACM Press, New York (2005)"},{"key":"7_CR5","doi-asserted-by":"publisher","first-page":"296","DOI":"10.1145\/956750.956785","volume-title":"KDD 2003: Proceedings of the ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"L. Yi","year":"2003","unstructured":"Yi, L., Liu, B., Li, X.: Eliminating noisy information in web pages for data mining. In: KDD 2003: Proceedings of the ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 296\u2013305. ACM Press, New York (2003)"},{"key":"7_CR6","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1145\/988672.988740","volume-title":"WWW 2004: Proceedings of the 13th International Conference on World Wide Web","author":"D.C. Reis","year":"2004","unstructured":"Reis, D.C., Golgher, P.B., Silva, A.S., Laender, A.F.: Automatic web news extraction using tree edit distance. In: WWW 2004: Proceedings of the 13th International Conference on World Wide Web, pp. 502\u2013511. ACM Press, New York (2004), doi:10.1145\/988672.988740"},{"key":"7_CR7","doi-asserted-by":"publisher","first-page":"830","DOI":"10.1145\/1062745.1062763","volume-title":"WWW 2005: Special Interest Tracks and Posters of the 14th International Conference on World Wide Web","author":"D. Gibson","year":"2005","unstructured":"Gibson, D., Punera, K., Tomkins, A.: The volume and evolution of web page templates. In: WWW 2005: Special Interest Tracks and Posters of the 14th International Conference on World Wide Web, pp. 830\u2013839. ACM Press, New York (2005)"},{"key":"7_CR8","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1145\/1242572.1242582","volume-title":"WWW 2007: Proceedings of the 16th International Conference on World Wide Web","author":"D. Chakrabarti","year":"2007","unstructured":"Chakrabarti, D., Kumar, R., Punera, K.: Page-level template detection via isotonic smoothing. In: WWW 2007: Proceedings of the 16th International Conference on World Wide Web, pp. 61\u201370. ACM Press, New York (2007)"},{"key":"7_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1007\/BFb0053296","volume-title":"Evolutionary Programming VII","author":"I.F. Cruz","year":"1998","unstructured":"Cruz, I.F., Borisov, S., Marks, M.A., Webbs, T.R.: Measuring structural similarity among web documents: preliminary results. In: Porto, V.W., Waagen, D. (eds.) EP 1998. LNCS, vol.\u00a01447, pp. 513\u2013524. Springer, Heidelberg (1998)"},{"key":"7_CR10","unstructured":"Buttler, D.: A short survey of document structure similarity algorithms. In: IC 2004: Proceedings of the International Conference on Internet Computing, pp. 3\u20139. CSREA Press (2004)"},{"issue":"8-13","key":"7_CR11","first-page":"1157","volume":"29","author":"A.Z. Broder","year":"1997","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S., Zweig, G.: Syntactic clustering of the web. Computer Networks\u00a029(8-13), 1157\u20131166 (1997)","journal-title":"Computer Networks"},{"key":"7_CR12","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1145\/956750.956822","volume-title":"KDD 2003: Proceedings of the ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","author":"S. Joshi","year":"2003","unstructured":"Joshi, S., Agrawal, N., Krishnapuram, R., Negi, S.: A bag of paths model for measuring structural similarity in web documents. In: KDD 2003: Proceedings of the ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 577\u2013582. ACM Press, New York (2003)"},{"key":"7_CR13","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1145\/1166160.1166183","volume-title":"DocEng 2006: Proceedings of the 2006 ACM Symposium on Document Engineering","author":"T. Lindholm","year":"2006","unstructured":"Lindholm, T., Kangasharju, J., Tarkoma, S.: Fast and simple XML tree differencing by sequence alignment. In: DocEng 2006: Proceedings of the 2006 ACM Symposium on Document Engineering, pp. 75\u201384. ACM Press, New York (2006)"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Shi, L., Niu, C., Zhou, M., Gao, J.: A DOM tree alignment model for mining parallel data from the web. In: ACL 2006: Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the ACL, Morristown, NJ, USA, Association for Computational Linguistics, pp. 489\u2013496 (2006)","DOI":"10.3115\/1220175.1220237"},{"key":"7_CR15","volume-title":"Web Data Mining \u2013 Exploring Hyperlinks, Contents, and Usage Data","author":"B. Liu","year":"2007","unstructured":"Liu, B.: Web Data Mining \u2013 Exploring Hyperlinks, Contents, and Usage Data. Springer, Heidelberg (2007)"},{"issue":"2","key":"7_CR16","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/BF02289694","volume":"29","author":"J.B. Kruskal","year":"1964","unstructured":"Kruskal, J.B.: Nonmetric multidimensional scaling: A numerical method. Psychometrika\u00a029(2), 115\u2013129 (1964)","journal-title":"Psychometrika"},{"issue":"336","key":"7_CR17","doi-asserted-by":"publisher","first-page":"846","DOI":"10.2307\/2284239","volume":"66","author":"W.M. Rand","year":"1971","unstructured":"Rand, W.M.: Objective criteria for the evaluation of clustering methods. Journal of the American Statistical Association\u00a066(336), 846\u2013850 (1971)","journal-title":"Journal of the American Statistical Association"},{"key":"7_CR18","unstructured":"Strehl, A., Ghosh, J., Mooney, R.: Impact of similarity measures on web-page clustering. In: AAAI 2000: Proceedings of the 17th National Conference on Artificial Intelligence: Workshop of Artificial Intelligence for Web Search, AAAI, pp. 58\u201364 (2000)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-78646-7_7.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T07:17:21Z","timestamp":1619507841000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-78646-7_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540786450","9783540786467"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-78646-7_7","relation":{},"subject":[]}}