{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T20:01:32Z","timestamp":1759694492240},"publisher-location":"Berlin, Heidelberg","reference-count":29,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642312731"},{"type":"electronic","value":"9783642312748"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-31274-8_4","type":"book-chapter","created":{"date-parts":[[2012,6,20]],"date-time":"2012-06-20T01:04:04Z","timestamp":1340154244000},"page":"43-57","source":"Crossref","is-referenced-by-count":8,"title":["Analysis and Detection of Web Spam by Means of Web Content"],"prefix":"10.1007","author":[{"given":"V\u00edctor M.","family":"Prieto","sequence":"first","affiliation":[]},{"given":"Manuel","family":"\u00c1lvarez","sequence":"additional","affiliation":[]},{"given":"Rafael","family":"L\u00f3pez-Garc\u00eda","sequence":"additional","affiliation":[]},{"given":"Fidel","family":"Cacheda","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Amitay, E., Carmel, D., Darlow, A., Lempel, R., Soffer, A.: The connectivity sonar: detecting site functionality by structural patterns. In: Proceedings of the Fourteenth ACM Conference on Hypertext and Hypermedia, pp. 38\u201347. ACM Press (2003)","DOI":"10.1145\/900058.900060"},{"key":"4_CR2","unstructured":"Benczur, A.A., Csalogany, K., Sarlos, T., Uher, M., Uher, M.: Spamrank - fully automatic link spam detection. In: Proceedings of the First International Workshop on Adversarial Information Retrieval on the Web (AIRWeb) (2005)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Breiman, L., Breiman, L.: Bagging predictors. In: Machine Learning, pp. 123\u2013140 (1996)","DOI":"10.1007\/BF00058655"},{"issue":"5","key":"4_CR4","first-page":"377","volume":"4","author":"C. Castillo","year":"2010","unstructured":"Castillo, C., Davison, B.D.: Adversarial Web Search\u00a04(5), 377\u2013486 (2010)","journal-title":"Adversarial Web Search"},{"key":"4_CR5","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1145\/1244408.1244423","volume-title":"Proceedings of the 3rd International Workshop on Adversarial Information Retrieval on the Web, AIRWeb 2007","author":"K. Chellapilla","year":"2007","unstructured":"Chellapilla, K., Maykov, A.: A taxonomy of javascript redirection spam. In: Proceedings of the 3rd International Workshop on Adversarial Information Retrieval on the Web, AIRWeb 2007, pp. 81\u201388. ACM, New York (2007)"},{"key":"4_CR6","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1145\/1772690.1772720","volume-title":"Proceedings of the 19th International Conference on World Wide Web, WWW 2010","author":"M. Cova","year":"2010","unstructured":"Cova, M., Kruegel, C., Vigna, G.: Detection and analysis of drive-by-download attacks and malicious javascript code. In: Proceedings of the 19th International Conference on World Wide Web, WWW 2010, pp. 281\u2013290. ACM, New York (2010)"},{"key":"4_CR7","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J. Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: Mapreduce: simplified data processing on large clusters. Commun. ACM\u00a051, 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"key":"4_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1017074.1017077","volume-title":"Proceedings of the 7th International Workshop on the Web and Databases: colocated with ACM SIGMOD\/PODS, WebDB 2004","author":"D. Fetterly","year":"2004","unstructured":"Fetterly, D., Manasse, M., Najork, M.: Spam, damn spam, and statistics: using statistical analysis to locate spam web pages. In: Proceedings of the 7th International Workshop on the Web and Databases: colocated with ACM SIGMOD\/PODS, WebDB 2004, pp. 1\u20136. ACM, New York (2004)"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Fetterly, D., Manasse, M., Najork, M.: Detecting phrase-level duplication on the world wide web. In: Proceedings of the 28th Annual International ACM SIGIR Conference on Research & Development in Information Retrieval, pp. 170\u2013177. ACM Press (2005)","DOI":"10.1145\/1076034.1076066"},{"key":"4_CR10","unstructured":"Gonzalez\u00a0Jesus, B.W., Cristina, A.: Implementacion y evaluacion de un detector masivo de web spam (2009)"},{"key":"4_CR11","unstructured":"Gyongyi, Z., Garcia-Molina, H.: Web spam taxonomy. Technical Report 2004-25, Stanford InfoLab (March 2004)"},{"key":"4_CR12","unstructured":"Gy\u00f6ngyi, Z., Garcia-Molina, H.: Link spam alliances. In: Proceedings of the 31st International Conference on Very Large Data Bases, VLDB 2005, pp. 517\u2013528. VLDB Endowment (2005)"},{"key":"4_CR13","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1145\/1656274.1656278","volume":"11","author":"M. Hall","year":"2009","unstructured":"Hall, M., Frank, E., Holmes, G., Pfahringer, B., Reutemann, P., Witten, I.H.: The weka data mining software: an update. SIGKDD Explor. Newsl.\u00a011, 10\u201318 (2009)","journal-title":"SIGKDD Explor. Newsl."},{"key":"4_CR14","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/792550.792553","volume":"36","author":"M.R. Henzinger","year":"2002","unstructured":"Henzinger, M.R., Motwani, R., Silverstein, C.: Challenges in web search engines. SIGIR Forum\u00a036, 11\u201322 (2002)","journal-title":"SIGIR Forum"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Hidalgo, J.M.G.: Evaluating cost-sensitive unsolicited bulk email categorization (2002)","DOI":"10.1145\/508909.508911"},{"key":"4_CR16","unstructured":"Jansen, B.J., Spink, A.: An analysis of web documents retrieved and viewed (2003)"},{"key":"4_CR17","unstructured":"Kohavi, R.: A study of cross-validation and bootstrap for accuracy estimation and model selection, pp. 1137\u20131143. Morgan Kaufmann (1995)"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Ntoulas, A., Manasse, M.: Detecting spam web pages through content analysis. In: Proceedings of the World Wide Web Conference, pp. 83\u201392. ACM Press (2006)","DOI":"10.1145\/1135777.1135794"},{"key":"4_CR19","unstructured":"Quinlan, J.R.: Bagging, boosting, and c4.5. In: Proceedings of the Thirteenth National Conference on Artificial Intelligence, pp. 725\u2013730. AAAI Press (1996)"},{"key":"4_CR20","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1613\/jair.279","volume":"4","author":"J.R. Quinlan","year":"1996","unstructured":"Quinlan, J.R.: Improved use of continuous attributes in c4.5. Journal of Artificial Intelligence Research\u00a04, 77\u201390 (1996)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"4_CR21","volume-title":"C4.5: programs for machine learning","author":"J.R. Quinlan","year":"1993","unstructured":"Quinlan, J.R.: C4.5: programs for machine learning. Morgan Kaufmann Publishers Inc., San Francisco (1993)"},{"key":"4_CR22","unstructured":"Sahami, M., Dumais, S., Heckerman, D., Horvitz, E.: A bayesian approach to filtering junk e-mail (1998)"},{"key":"4_CR23","unstructured":"Webb, S.: Introducing the webb spam corpus: Using email spam to identify web spam automatically. In: Proceedings of the 3rd Conference on Email and AntiSpam (CEAS) (2006) (Mountain View)"},{"key":"4_CR24","unstructured":"Webb, S.: Webb Spam Corpus (2011), \n                    \n                      http:\/\/www.cc.gatech.edu\/projects\/doi\/WebbSpamCorpus.html"},{"key":"4_CR25","unstructured":"Wu, B., Davison, B.D.: Cloaking and redirection: A preliminary study (2005)"},{"key":"4_CR26","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1145\/1062745.1062762","volume-title":"Special Interest Tracks and Posters of the 14th International Conference on World Wide Web, WWW 2005","author":"B. Wu","year":"2005","unstructured":"Wu, B., Davison, B.D.: Identifying link farm spam pages. In: Special Interest Tracks and Posters of the 14th International Conference on World Wide Web, WWW 2005, pp. 820\u2013829. ACM, New York (2005)"},{"key":"4_CR27","unstructured":"Yahoo!: Web spam challenge (2011), \n                    \n                      http:\/\/webspam.lip6.fr\/wiki\/pmwiki.php"},{"key":"4_CR28","unstructured":"Yahoo!: Web Spam Detection - Resources for Research on Web Spam (2011), \n                    \n                      http:\/\/barcelona.research.yahoo.net\/webspam\/"},{"key":"4_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1007\/978-3-540-30216-2_8","volume-title":"Algorithms and Models for the Web-Graph","author":"H. Zhang","year":"2004","unstructured":"Zhang, H., Goel, A., Govindan, R., Mason, K., Van Roy, B.: Making Eigenvector-Based Reputation Systems Robust to Collusion. In: Leonardi, S. (ed.) WAW 2004. LNCS, vol.\u00a03243, pp. 92\u2013104. Springer, Heidelberg (2004)"}],"container-title":["Lecture Notes in Computer Science","Multidisciplinary Information Retrieval"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-31274-8_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,3]],"date-time":"2019-05-03T06:59:48Z","timestamp":1556866788000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-31274-8_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642312731","9783642312748"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-31274-8_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}