{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T04:16:58Z","timestamp":1742530618994,"version":"3.40.1"},"publisher-location":"Berlin, Heidelberg","reference-count":14,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642283192"},{"type":"electronic","value":"9783642283208"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-28320-8_20","type":"book-chapter","created":{"date-parts":[[2012,2,17]],"date-time":"2012-02-17T13:04:49Z","timestamp":1329483889000},"page":"234-242","source":"Crossref","is-referenced-by-count":0,"title":["A Fusion of Algorithms in Near Duplicate Document Detection"],"prefix":"10.1007","author":[{"given":"Jun","family":"Fan","sequence":"first","affiliation":[]},{"given":"Tiejun","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Brin, S., Davis, J., Garcia-Molina, H.: Copy Detection Mechanisms for Digital Documents. In: Proceedings of the ACM SIGMOD Annual Conference (1995)","DOI":"10.1145\/223784.223855"},{"key":"20_CR2","unstructured":"Shivakumar, N., Garcia-Molina, H.: SCAM: A copy detection mechanism for digital documents. In: Proceedings of the 2nd International Conference in Theory and Practice of Digital Libraries, DL 1995 (1995)"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Shivakumar, N., Garcia-Molina, H.: Building a scalable and accurate copy detection mechanism. In: Proceedings of the 1st ACM Conference on Digital Libraries, DL 1996 (1996)","DOI":"10.1145\/226931.226961"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Chowdhury, A., Frieder, O., Grossman, D., Mccabe, M.C.: Collection statistics for fast duplicate document detection. ACM Transactions on Information Systems\u00a020(2) (2002)","DOI":"10.1145\/506309.506311"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Ko\u0142cz, A., Chowdhury, A., Alspector, J.: Improved Robustness of Signature-Based Near-Replica Detection via Lexicon Randomization. In: Proceedings of the tenth ACM SIGKDD, Seattle, WA, USA (2004)","DOI":"10.1145\/1014052.1014127"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Conrad, J.G., Guo, X.S., Schriber, C.P.: Online Duplicate Document Detection: Signature Reliability in a Dynamic Retrieval Environment. In: Proceedings of the Twelfth International Conference on Information and Knowledge Management (2003)","DOI":"10.1145\/956863.956946"},{"key":"20_CR7","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S.: Syntactic clustering of the Web. In: Proceedings of the 6th International Web Conference (1997)"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Broder, A.Z., Charikar, M., Frieze, A., Mitzenmacher, M.: Min-Wise Independent Permutations. Journal of Computer and System Sciences, 630\u2013659 (2000)","DOI":"10.1006\/jcss.1999.1690"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Fetterly, D., Manasse, M., Najork, M.: On the evolution of clusters of near-duplicate web pages. In: Proceedings of First Latin American Web Congress, pp. 37\u201345 (2003)","DOI":"10.1109\/LAWEB.2003.1250280"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Fetterly, D., Manasse, M., Najork, M.: Detecting Phrase-level Duplication on the World Wide Web. In: The 28th ACM SIGIR, pp. 170\u2013177 (2005)","DOI":"10.1145\/1076034.1076066"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Charikar, M.S.: Similarity estimation techniques from rounding algorithms. In: Proceedings of 34th Annual Symposium on Theory of Computing (2002)","DOI":"10.1145\/509907.509965"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Henzinger, M.: Finding near-duplicate web pages: a large-scale evaluation of algorithms. In: Proceedings of the 29th ACM SIGIR, pp. 284\u2013291 (2006)","DOI":"10.1145\/1148170.1148222"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Manku, G.S., Jain, A., Sarma, A.D.: Detecting near-duplicates for web crawling. In: Proceedings of the 16th International Conference on World Wide Web, pp. 141\u2013150 (2007)","DOI":"10.1145\/1242572.1242592"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Theobald, M., Siddharth, J., Paepcke, A.: SpotSigs: robust and efficient near duplicate detection in large web collections. In: Proceedings of ACM SIGIR (2008)","DOI":"10.1145\/1390334.1390431"}],"container-title":["Lecture Notes in Computer Science","New Frontiers in Applied Data Mining"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-28320-8_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T01:50:02Z","timestamp":1742521802000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-28320-8_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642283192","9783642283208"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-28320-8_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}