{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T00:48:04Z","timestamp":1743122884049,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319604374"},{"type":"electronic","value":"9783319604381"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-60438-1_44","type":"book-chapter","created":{"date-parts":[[2017,6,13]],"date-time":"2017-06-13T00:13:43Z","timestamp":1497312823000},"page":"446-456","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploiting Web Sites Structural and Content Features for Web Pages Clustering"],"prefix":"10.1007","author":[{"given":"Pasqua Fabiana","family":"Lanotte","sequence":"first","affiliation":[]},{"given":"Fabio","family":"Fumarola","sequence":"additional","affiliation":[]},{"given":"Donato","family":"Malerba","sequence":"additional","affiliation":[]},{"given":"Michelangelo","family":"Ceci","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,6,14]]},"reference":[{"key":"44_CR1","doi-asserted-by":"crossref","unstructured":"Angelova, R., Siersdorfer, S.: A neighborhood-based approach for clustering of linked document collections. In: Proceedings of CIKM 2006, pp. 778\u2013779. ACM, New York (2006)","DOI":"10.1145\/1183614.1183726"},{"key":"44_CR2","doi-asserted-by":"crossref","unstructured":"Bohunsky, P., Gatterbauer, W.: Visual structure-based web page clustering and retrieval. In: Proceedings of the 19th International Conference on World Wide Web, WWW 2010, pp. 1067\u20131068. ACM, New York (2010)","DOI":"10.1145\/1772690.1772807"},{"key":"44_CR3","unstructured":"Buttler, D.: A short survey of document structure similarity algorithms. In: Proceedings of the International Conference on Internet Computing, IC 2004, Las Vegas, Nevada, USA, 21\u201324 June 2004, vol. 1, pp. 3\u20139 (2004)"},{"key":"44_CR4","doi-asserted-by":"crossref","unstructured":"Calado, P., Cristo, M., Moura, E., Ziviani, N., Ribeiro-Neto, B., Gon\u00e7alves, M.A.: Combining link-based and content-based methods for web document classification. In: Proceedings of the Twelfth International Conference on Information and Knowledge Management, CIKM 2003, pp. 394\u2013401. ACM, New York (2003)","DOI":"10.1145\/956863.956938"},{"key":"44_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1007\/978-3-642-37456-2_14","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"RJGB Campello","year":"2013","unstructured":"Campello, R.J.G.B., Moulavi, D., Sander, J.: Density-based clustering based on hierarchical density estimates. In: Pei, J., Tseng, V.S., Cao, L., Motoda, H., Xu, G. (eds.) PAKDD 2013. LNCS, vol. 7819, pp. 160\u2013172. Springer, Heidelberg (2013). doi:10.1007\/978-3-642-37456-2_14"},{"issue":"1","key":"44_CR6","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1016\/j.datak.2008.06.006","volume":"67","author":"MH Chehreghani","year":"2008","unstructured":"Chehreghani, M.H., Abolhassani, H., Chehreghani, M.H.: Improving density-based methods for hierarchical clustering of web pages. Data Knowl. Eng. 67(1), 30\u201350 (2008)","journal-title":"Data Knowl. Eng."},{"issue":"3","key":"44_CR7","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.datak.2004.11.004","volume":"54","author":"V Crescenzi","year":"2005","unstructured":"Crescenzi, V., Merialdo, P., Missier, P.: Clustering web pages based on their structure. Data Knowl. Eng. 54(3), 279\u2013299 (2005)","journal-title":"Data Knowl. Eng."},{"key":"44_CR8","unstructured":"Fathi, M., Adly, N., Nagi, M.: Web documents classification using text, anchor, title and metadata information. In: Proceedings of the International Conference on Computer Science, Software Engineering, Information Technology, e-Business and Applications, pp. 1\u20138 (2004)"},{"key":"44_CR9","first-page":"168","volume-title":"Selected Papers of J.R. Firth 1952-59","author":"J Firth","year":"1968","unstructured":"Firth, J.: A synopsis of linguistic theory 1930-55. In: Palmer, F.R. (ed.) Selected Papers of J.R. Firth 1952-59, pp. 168\u2013205. Longmans, London (1968)"},{"key":"44_CR10","doi-asserted-by":"crossref","unstructured":"Fumarola, F., Weninger, T., Barber, R., Malerba, D., Han, J.: Hylien: a hybrid approach to general list extraction on the web. In: Proceedings of the 20th International Conference on World Wide Web, WWW 2011, Hyderabad, India, 28 March - 1 April 2011 (Companion Volume), pp. 35\u201336 (2011)","DOI":"10.1145\/1963192.1963211"},{"key":"44_CR11","doi-asserted-by":"crossref","unstructured":"Gornerup, O., Gillblad, D., Vasiloudis, T.: Knowing an object by the company it keeps: a domain-agnostic scheme for similarity discovery. In: Proceedings of the 2015 IEEE International Conference on Data Mining (ICDM), ICDM 2015, pp. 121\u2013130. IEEE Computer Society, Washington, DC (2015)","DOI":"10.1109\/ICDM.2015.85"},{"issue":"2","key":"44_CR12","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1137\/090771806","volume":"53","author":"N Halko","year":"2011","unstructured":"Halko, N., Martinsson, P.G., Tropp, J.A.: Finding structure with randomness: probabilistic algorithms for constructing approximate matrix decompositions. SIAM Rev. 53(2), 217\u2013288 (2011)","journal-title":"SIAM Rev."},{"key":"44_CR13","doi-asserted-by":"crossref","unstructured":"Haveliwala, T.H., Gionis, A., Klein, D., Indyk, P.: Evaluating strategies for similarity search on the web. In: Proceedings of WWW 2002, pp. 432\u2013442. ACM, New York (2002)","DOI":"10.1145\/511499.511502"},{"issue":"1","key":"44_CR14","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/BF01908075","volume":"2","author":"L Hubert","year":"1985","unstructured":"Hubert, L., Arabie, P.: Comparing partitions. J. Classif. 2(1), 193\u2013218 (1985)","journal-title":"J. Classif."},{"key":"44_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1007\/978-3-319-08326-1_37","volume-title":"Foundations of Intelligent Systems","author":"PF Lanotte","year":"2014","unstructured":"Lanotte, P.F., Fumarola, F., Ceci, M., Scarpino, A., Torelli, M.D., Malerba, D.: Automatic extraction of logical web lists. In: Andreasen, T., Christiansen, H., Cubero, J.-C., Ra\u015b, Z.W. (eds.) ISMIS 2014. LNCS, vol. 8502, pp. 365\u2013374. Springer, Cham (2014). doi:10.1007\/978-3-319-08326-1_37"},{"key":"44_CR16","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/978-3-642-13672-6_22","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"CX Lin","year":"2010","unstructured":"Lin, C.X., Yu, Y., Han, J., Liu, B.: Hierarchical web-page clustering via in-page and cross-page link structures. In: Zaki, M.J., Yu, J.X., Ravindran, B., Pudi, V. (eds.) PAKDD 2010. LNCS (LNAI), vol. 6119, pp. 222\u2013229. Springer, Heidelberg (2010). doi:10.1007\/978-3-642-13672-6_22"},{"key":"44_CR17","first-page":"3111","volume":"26","author":"T Mikolov","year":"2013","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. Adv. Neural Inf. Process. Syst. 26, 3111\u20133119 (2013)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"44_CR18","doi-asserted-by":"crossref","unstructured":"Perozzi, B., Al-Rfou, R., Skiena, S.: Deepwalk: online learning of social representations. In: ACM SIGKDD 2014, KDD 2014, pp. 701\u2013710. ACM, New York (2014)","DOI":"10.1145\/2623330.2623732"},{"key":"44_CR19","doi-asserted-by":"crossref","unstructured":"Qi, X., Davison, B.D.: Knowing a web page by the company it keeps. In: Proceedings of the 15th ACM International Conference on Information and Knowledge Management, CIKM 2006, pp. 228\u2013237. ACM, New York (2006)","DOI":"10.1145\/1183614.1183650"},{"key":"44_CR20","unstructured":"Rosenberg, A., Hirschberg, J.: V-measure: a conditional entropy-based external cluster evaluation measure. In: EMNLP-CoNLL 2007, pp. 410\u2013420 (2007)"},{"issue":"1","key":"44_CR21","first-page":"33","volume":"20","author":"M Sahlgren","year":"2008","unstructured":"Sahlgren, M.: The distributional hypothesis. Ital. J. Linguist. 20(1), 33\u201354 (2008)","journal-title":"Ital. J. Linguist."},{"key":"44_CR22","doi-asserted-by":"crossref","unstructured":"Tang, J., Qu, M., Wang, M., Zhang, M., Yan, J., Mei, Q.: Line: large-scale information network embedding. In: Proceedings of the 24th International Conference on World Wide Web, WWW 2015, New York, NY, USA, pp. 1067\u20131077 (2015)","DOI":"10.1145\/2736277.2741093"},{"issue":"1","key":"44_CR23","first-page":"141","volume":"37","author":"PD Turney","year":"2010","unstructured":"Turney, P.D., Pantel, P.: From frequency to meaning: vector space models of semantics. J. Artif. Int. Res. 37(1), 141\u2013188 (2010)","journal-title":"J. Artif. Int. Res."},{"key":"44_CR24","doi-asserted-by":"crossref","unstructured":"Weninger, T., Johnston, T.J., Han, J.: The parallel path framework for entity discovery on the web. ACM Trans. Web 7(3), 16:1\u201316:29 (2013)","DOI":"10.1145\/2516633.2516638"}],"container-title":["Lecture Notes in Computer Science","Foundations of Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-60438-1_44","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T08:42:34Z","timestamp":1710232954000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-60438-1_44"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319604374","9783319604381"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-60438-1_44","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"14 June 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISMIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Methodologies for Intelligent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Warsaw","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 June 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 June 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ismis2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ismis2017.ii.pw.edu.pl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}