{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T12:01:10Z","timestamp":1780574470195,"version":"3.54.1"},"publisher-location":"Berlin, Heidelberg","reference-count":26,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642173158","type":"print"},{"value":"9783642173165","type":"electronic"}],"license":[{"start":{"date-parts":[[2010,1,1]],"date-time":"2010-01-01T00:00:00Z","timestamp":1262304000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-17316-5_16","type":"book-chapter","created":{"date-parts":[[2010,11,12]],"date-time":"2010-11-12T00:19:48Z","timestamp":1289521188000},"page":"169-180","source":"Crossref","is-referenced-by-count":3,"title":["Fixing the Threshold for Effective Detection of Near Duplicate Web Documents in Web Crawling"],"prefix":"10.1007","author":[{"given":"V. A.","family":"Narayana","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"P.","family":"Premchand","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"A.","family":"Govardhan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Narayana, V.A., Premchand, P., Govardhan, A.: A Novel and Efficient Approach For Near Duplicate Page Detection in Web crawling. In: IEEE International Advance Computing Conference, Patiala, pp. 1492\u20131496 (2009)","DOI":"10.1109\/IADCC.2009.4809238"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Pant, G., Srinivasan, P., Menczer, F.: Crawling the Web. In: Web Dynamics: Adapting to Change in Content, Size, Topology and Use. Springer, Heidelberg (2004)","DOI":"10.1007\/978-3-662-10874-1_7"},{"key":"16_CR3","unstructured":"Balamurugan, S., Rajkumar, N.: Design and Implementation of a New Model Web Crawler with Enhanced Reliability. Proceedings of World Academy of Science, Engineering and Technology\u00a032 (2008) ISSN 2070-3740"},{"key":"16_CR4","first-page":"241","volume-title":"Proc. 24th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"F. Menczer","year":"2001","unstructured":"Menczer, F., Pant, G., Srinivasan, P., Ruiz, M.E.: Evaluating topic-driven web crawlers. In: Proc. 24th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 241\u2013249. ACM, New Orleans (2001)"},{"key":"16_CR5","doi-asserted-by":"crossref","first-page":"679","DOI":"10.1145\/775152.775247","volume-title":"International Conference on World Wide Web","author":"A.Z. Broder","year":"2003","unstructured":"Broder, A.Z., Najork, M., Wiener, J.L.: Efficient URL caching for World Wide Web crawling. In: International Conference on World Wide Web, pp. 679\u2013689. ACM, Budapest (2003)"},{"key":"16_CR6","volume-title":"Mining the Web: Discovering Knowledge from Hypertext Data","author":"S. Chakrabarti","year":"2002","unstructured":"Chakrabarti, S.: Mining the Web: Discovering Knowledge from Hypertext Data. Morgan Kaufmann, San Francisco (2002)"},{"issue":"1-7","key":"16_CR7","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1016\/S0169-7552(98)00108-1","volume":"30","author":"J. Cho","year":"1998","unstructured":"Cho, J., Garcia-Molina, H., Page, L.: Efficient crawling through URL ordering. Computer Networks and ISDN Systems\u00a030(1-7), 161\u2013172 (1998)","journal-title":"Computer Networks and ISDN Systems"},{"key":"16_CR8","first-page":"380","volume-title":"Proc. 34th Annual Symposium on Theory of Computing (STOC 2002)","author":"M. Charikar","year":"2002","unstructured":"Charikar, M.: Similarity estimation techniques from rounding algorithms. In: Proc. 34th Annual Symposium on Theory of Computing (STOC 2002), pp. 380\u2013388. ACM, Montreal (2002)"},{"issue":"2","key":"16_CR9","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1145\/335191.335429","volume":"29","author":"J. Cho","year":"2000","unstructured":"Cho, J., Shivakumar, N., Garcia-Molina, H.: Finding replicated web collections. ACM SIGMOD Record\u00a029(2), 355\u2013366 (2000)","journal-title":"ACM SIGMOD Record"},{"key":"16_CR10","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1145\/956863.956946","volume-title":"CIKM","author":"J.G. Conrad","year":"2003","unstructured":"Conrad, J.G., Guo, X.S., Schriber, C.P.: Online duplicate document detection: signature reliability in a dynamic retrieval environment. In: CIKM, pp. 443\u2013452. ACM, New Orleans (2003)"},{"key":"16_CR11","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1145\/1060745.1060805","volume-title":"Proceedings of the 14th International Conference on World Wide Web","author":"S. Pandey","year":"2005","unstructured":"Pandey, S., Olston, C.: User-centric Web crawling. In: Proceedings of the 14th International Conference on World Wide Web, pp. 401\u2013411. ACM, Chiba (2005)"},{"key":"16_CR12","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1145\/1367497.1367516","volume-title":"Proceeding of the 17th International Conference on World Wide Web","author":"C. Xiao","year":"2008","unstructured":"Xiao, C., Wang, W., Lin, X.M., Xu Yu, J.: Efficient Similarity Joins for Near Duplicate Detection. In: Proceeding of the 17th International Conference on World Wide Web, pp. 131\u2013140. ACM, Beijing (2008)"},{"key":"16_CR13","doi-asserted-by":"crossref","first-page":"284","DOI":"10.1145\/1148170.1148222","volume-title":"Proceedings of the 29th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval","author":"M. Henzinger","year":"2006","unstructured":"Henzinger, M.: Finding near-duplicate web pages: a large-scale evaluation of algorithms. In: Proceedings of the 29th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 284\u2013291. ACM, Seattle (2006)"},{"issue":"1","key":"16_CR14","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1145\/1067268.1067287","volume":"39","author":"C. Castillo","year":"2005","unstructured":"Castillo, C.: Effective web crawling. ACM SIGIR Forum\u00a039(1), 55\u201356 (2005)","journal-title":"ACM SIGIR Forum"},{"key":"16_CR15","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1145\/1242572.1242592","volume-title":"Proceedings of the 16th International Conference on World Wide Web","author":"G.S. Manku","year":"2007","unstructured":"Manku, G.S., Jain, A., Sarma, A.D.: Detecting near-duplicates for web crawling. In: Proceedings of the 16th International Conference on World Wide Web, pp. 141\u2013150. ACM, Banff (2007)"},{"key":"16_CR16","first-page":"721","volume-title":"VLDB","author":"D. Gibson","year":"2005","unstructured":"Gibson, D., Kumar, R., Tomkins, A.: Discovering large dense subgraphs in massive graphs. In: VLDB, pp. 721\u2013732. ACM, Trondheim (2005)"},{"key":"16_CR17","doi-asserted-by":"crossref","first-page":"678","DOI":"10.1145\/1081870.1081956","volume-title":"KDD","author":"E. Spertus","year":"2005","unstructured":"Spertus, E., Sahami, M., Buyukkokten, O.: Evaluating similarity measures: a large-scale study in the orkut social network. In: KDD, pp. 678\u2013684. ACM, Chicago (2005)"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Singh, A., Srivatsa, M., Liu, L., Miller, T.: Apoidea: A Decentralized Peer-to-Peer Architecture for Crawling the World Wide Web. In: Proceedings of the SIGIR 2003 Workshop on Distributed Information Retrieval. LNCS, pp. 126\u2013130. ACM, Toronto (2003)","DOI":"10.1007\/978-3-540-24610-7_10"},{"key":"16_CR19","first-page":"22","volume":"11","author":"J.B. Lovins","year":"1968","unstructured":"Lovins, J.B.: Development of a stemming algorithm. Mechanical Translation and Computational Linguistics\u00a011, 22\u201331 (1968)","journal-title":"Mechanical Translation and Computational Linguistics"},{"key":"16_CR20","series-title":"Lecture Notes in Computer Science","first-page":"161","volume-title":"Advances in Cross-Language Information Retrieval","author":"M. Bacchin","year":"2003","unstructured":"Bacchin, M., Ferro, N., Melucci, M.: Experiments to evaluate a statistical stemming algorithm. In: Peters, C., Braschler, M., Gonzalo, J. (eds.) CLEF 2002. LNCS, vol.\u00a02785, pp. 161\u2013168. Springer, Heidelberg (2003)"},{"issue":"2","key":"16_CR21","doi-asserted-by":"publisher","first-page":"398","DOI":"10.1145\/568271.223855","volume":"24","author":"S. Brin","year":"1995","unstructured":"Brin, S., Davis, J., Garcia-Molina, H.: Copy detection mechanisms for digital documents. ACM SIGMOD Record\u00a024(2), 398\u2013409 (1995)","journal-title":"ACM SIGMOD Record"},{"key":"16_CR22","first-page":"391","volume-title":"Proceedings of WWW6 1997","author":"A.Z. Broder","year":"1997","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S., Zweig, G.: Syntactic clustering of the web. In: Proceedings of WWW6 1997, pp. 391\u2013404. Elsevier Science, Santa Clara (1997)"},{"key":"16_CR23","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1145\/956863.956946","volume-title":"Proceedings of the Twelfth International Conference on Information and Knowledge Management","author":"J. Conrad","year":"2003","unstructured":"Conrad, J., Schriber, C.P.: Online duplicate document detection: signature reliability in a dynamic retrieval environment. In: Proceedings of the Twelfth International Conference on Information and Knowledge Management, pp. 443\u2013452. ACM, New Orleans (2003)"},{"key":"16_CR24","first-page":"517","volume-title":"Proceedings of the Fourteenth International Conference on Information and Knowledge Management, CIKM 2005","author":"D. Metzler","year":"2005","unstructured":"Metzler, D., Bernstein, Y., Bruce Croft, W.: Similarity Measures for Tracking Information Flow. In: Proceedings of the Fourteenth International Conference on Information and Knowledge Management, CIKM 2005, pp. 517\u2013524. ACM, Bremen (2005)"},{"key":"16_CR25","first-page":"78","volume-title":"Proceedings of the 2005 National conference on Digital Government Research","author":"H. Yang","year":"2005","unstructured":"Yang, H., Callan, J.: Near-duplicate detection for eRulemaking. In: Proceedings of the 2005 National conference on Digital Government Research, pp. 78\u201386. Digital Government Society of North America, Atlanta (2005)"},{"key":"16_CR26","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1145\/1146598.1146663","volume-title":"Proceedings of the 2006 International Conference on Digital Government Research","author":"H. Yang","year":"2006","unstructured":"Yang, H., Callan, J., Shulman, S.: Next steps in near-duplicate detection for eRulemaking. In: Proceedings of the 2006 International Conference on Digital Government Research, pp. 239\u2013248. ACM, San Diego (2006)"}],"container-title":["Lecture Notes in Computer Science","Advanced Data Mining and Applications"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-17316-5_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T05:38:32Z","timestamp":1685857112000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-17316-5_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642173158","9783642173165"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-17316-5_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010]]}}}