{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T03:45:43Z","timestamp":1775187943794,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":14,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2006,8,6]]},"DOI":"10.1145\/1148170.1148222","type":"proceedings-article","created":{"date-parts":[[2006,10,18]],"date-time":"2006-10-18T22:04:00Z","timestamp":1161209040000},"page":"284-291","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":270,"title":["Finding near-duplicate web pages"],"prefix":"10.1145","author":[{"given":"Monika","family":"Henzinger","sequence":"first","affiliation":[{"name":"Google Inc. &amp; Ecole F\u00e9d\u00e9rale de Lausanne (EPFL)"}]}],"member":"320","published-online":{"date-parts":[[2006,8,6]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/223784.223855"},{"key":"e_1_3_2_1_2_1","volume-title":"Renato Capocelli","author":"Broder A.","year":"1993","unstructured":"A. Broder . Some applications of Rabin's fingerprinting method . In Renato Capocelli , Alfredo De Santis, and Ugo Vaccaro, editors, Sequences II : Methods in Communications, Security, and Computer Science , 1993 :143--152.]] A. Broder. Some applications of Rabin's fingerprinting method. In Renato Capocelli, Alfredo De Santis, and Ugo Vaccaro, editors, Sequences II: Methods in Communications, Security, and Computer Science, 1993:143--152.]]"},{"key":"e_1_3_2_1_3_1","volume-title":"Syntactic Clustering of the Web. In 6th International World Wide Web Conference (Apr.","author":"Broder A.","year":"1997","unstructured":"A. Broder , S. Glassman , M. Manasse , and G. Zweig . Syntactic Clustering of the Web. In 6th International World Wide Web Conference (Apr. 1997 ), 393--404.]] A. Broder, S. Glassman, M. Manasse, and G. Zweig. Syntactic Clustering of the Web. In 6th International World Wide Web Conference (Apr. 1997), 393--404.]]"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/509907.509965"},{"key":"e_1_3_2_1_5_1","unstructured":"M. S. Charikar. Private communication.]]  M. S. Charikar. Private communication.]]"},{"key":"e_1_3_2_1_6_1","volume-title":"MapReduce: Simplified Data Processing on Large Clusters. In 6th Symposium on Operating System Design and Implementation (Dec.","author":"Dean J.","year":"2004","unstructured":"J. Dean and S. Ghemawat . MapReduce: Simplified Data Processing on Large Clusters. In 6th Symposium on Operating System Design and Implementation (Dec. 2004 ), 137--150.]] J. Dean and S. Ghemawat. MapReduce: Simplified Data Processing on Large Clusters. In 6th Symposium on Operating System Design and Implementation (Dec. 2004), 137--150.]]"},{"key":"e_1_3_2_1_7_1","volume-title":"On the Evolution of Clusters of Near-Duplicate Web Pages. In 1st Latin American Web Congress (Nov.","author":"Fetterly D.","year":"2003","unstructured":"D. Fetterly , M. Manasse , and M. Najork . On the Evolution of Clusters of Near-Duplicate Web Pages. In 1st Latin American Web Congress (Nov. 2003 ), 37--45.]] D. Fetterly, M. Manasse, and M. Najork. On the Evolution of Clusters of Near-Duplicate Web Pages. In 1st Latin American Web Congress (Nov. 2003), 37--45.]]"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1076034.1076066"},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. of the 2nd USENIX Workshop on Electronic Commerce (Nov","author":"Heintze N.","year":"1996","unstructured":"N. Heintze . Scalable Document Fingerprinting . In Proc. of the 2nd USENIX Workshop on Electronic Commerce (Nov 1996 ).]] N. Heintze. Scalable Document Fingerprinting. In Proc. of the 2nd USENIX Workshop on Electronic Commerce (Nov 1996).]]"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.10170"},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. of the USENIX","author":"Manber U.","year":"1994","unstructured":"U. Manber . Finding similar files in a large file system . In Proc. of the USENIX Winter 1994 Technical Conference (Jan. 1994).]] U. Manber. Finding similar files in a large file system. In Proc. of the USENIX Winter 1994 Technical Conference (Jan. 1994).]]"},{"key":"e_1_3_2_1_13_1","volume-title":"Proc. International Conference on Theory and Practice of Digital Libraries (June","author":"Shivakumar N.","year":"1995","unstructured":"N. Shivakumar and H. Garcia-Molina . SCAM: a copy detection mechanism for digital documents . In Proc. International Conference on Theory and Practice of Digital Libraries (June 1995 ).]] N. Shivakumar and H. Garcia-Molina. SCAM: a copy detection mechanism for digital documents. In Proc. International Conference on Theory and Practice of Digital Libraries (June 1995).]]"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/226931.226961"},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. Workshop on Web Databases (March","author":"Shivakumar N.","year":"1998","unstructured":"N. Shivakumar and H. Garcia-Molina . Finding near-replicas of documents on the web . In Proc. Workshop on Web Databases (March 1998 ), 204--212.]] N. Shivakumar and H. Garcia-Molina. Finding near-replicas of documents on the web. In Proc. Workshop on Web Databases (March 1998), 204--212.]]"}],"event":{"name":"SIGIR06: The 29th Annual International SIGIR Conference","location":"Seattle Washington USA","acronym":"SIGIR06","sponsor":["ACM Association for Computing Machinery","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1148170.1148222","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,10]],"date-time":"2023-01-10T11:11:46Z","timestamp":1673349106000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1148170.1148222"}},"subtitle":["a large-scale evaluation of algorithms"],"short-title":[],"issued":{"date-parts":[[2006,8,6]]},"references-count":14,"alternative-id":["10.1145\/1148170.1148222","10.1145\/1148170"],"URL":"https:\/\/doi.org\/10.1145\/1148170.1148222","relation":{},"subject":[],"published":{"date-parts":[[2006,8,6]]},"assertion":[{"value":"2006-08-06","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}