{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T03:24:41Z","timestamp":1725506681462},"publisher-location":"Berlin, Heidelberg","reference-count":23,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540782391"},{"type":"electronic","value":"9783540782469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-78246-9_71","type":"book-chapter","created":{"date-parts":[[2008,4,12]],"date-time":"2008-04-12T11:51:49Z","timestamp":1208001109000},"page":"601-609","source":"Crossref","is-referenced-by-count":6,"title":["New Issues in Near-duplicate Detection"],"prefix":"10.1007","author":[{"given":"Martin","family":"Potthast","sequence":"first","affiliation":[]},{"given":"Benno","family":"Stein","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"71_CR1_71","doi-asserted-by":"crossref","unstructured":"BERNSTEIN, Y. and ZOBEL, J. (2004): A scalable system for identifying co-derivative documents, Proc. of SPIRE \u201904.","DOI":"10.1007\/978-3-540-30213-1_6"},{"key":"71_CR2_71","doi-asserted-by":"crossref","unstructured":"BRIN, S., DAVIS, J. and GARCIA-MOLINA, H. (1995): Copy detection mechanisms for digital documents, Proc. of SIGMOD \u201995.","DOI":"10.1145\/223784.223855"},{"key":"71_CR3_71","doi-asserted-by":"crossref","unstructured":"BRODER, A. (2000): Identifying and filtering near-duplicate documents, Proc. of COM \u201900.","DOI":"10.1007\/3-540-45123-4_1"},{"key":"71_CR4_71","doi-asserted-by":"crossref","unstructured":"BRODER, A., EIRON, N., FONTOURA, M., HERSCOVICI, M., LEMPEL, R., MCPHERSON, J., QI, R. and SHEKITA, E. (2006): Indexing Shared Content in Information Retrieval Systems, Proc. of EDBT \u201906.","DOI":"10.1007\/11687238_21"},{"key":"71_CR5_71","doi-asserted-by":"crossref","unstructured":"CHARIKAR, M. (2002): Similarity Estimation Techniques from Rounding Algorithms, Proc. of STOC \u201902.","DOI":"10.1145\/509907.509965"},{"key":"71_CR6_71","doi-asserted-by":"crossref","unstructured":"CHOWDHURY, A., FRIEDER, O., GROSSMAN, D. and MCCABE, M. (2002): Collection statistics for fast duplicate document detection, ACM Trans. Inf. Syst.,20.","DOI":"10.1145\/506309.506311"},{"key":"71_CR7_71","doi-asserted-by":"crossref","unstructured":"CONRAD, J., GUO, X. and SCHRIBER, C. (2003): Online duplicate document detection: signature reliability in a dynamic retrieval environment, Proc. of CIKM \u201903.","DOI":"10.1145\/956863.956946"},{"key":"71_CR8_71","doi-asserted-by":"crossref","unstructured":"CONRAD, J. and SCHRIBER, C. (2004): Constructing a text corpus for inexact duplicate detection, Proc. of SIGIR \u201904.","DOI":"10.1145\/1008992.1009131"},{"key":"71_CR9_71","doi-asserted-by":"crossref","unstructured":"DATAR, M., IMMORLICA, N., INDYK, P. and MIRROKNI, V. (2004): Locality-Sensitive Hashing Scheme Based on p-Stable Distributions, Proc. of SCG \u201904.","DOI":"10.1145\/997817.997857"},{"key":"71_CR10_71","doi-asserted-by":"crossref","unstructured":"FETTERLY, D., MANASSE, M. and NAJORK, M. (2003): On the Evolution of Clusters of Near-Duplicate Web Pages, Proc. of LA-WEB \u201903.","DOI":"10.1109\/LAWEB.2003.1250280"},{"key":"71_CR11_71","doi-asserted-by":"crossref","unstructured":"FORMAN, G., ESHGHI, K. and CHIOCCHETTI, S. (2005): Finding similar files in large document repositories, Proc. of KDD \u201905.","DOI":"10.1145\/1081870.1081916"},{"key":"71_CR12_71","unstructured":"HEINTZE, N. (1996): Scalable document fingerprinting, Proc. of USENIX-EC \u201996."},{"key":"71_CR13_71","doi-asserted-by":"crossref","unstructured":"HENZINGER, M. (2006): Finding Near-Duplicate Web Pages: a Large-Scale Evaluation of Algorithms, Proc. of SIGIR \u201906.","DOI":"10.1145\/1148170.1148222"},{"key":"71_CR14_71","unstructured":"HOAD, T. and ZOBEL, J. (2003): Methods for Identifying Versioned and Plagiarised Documents, Jour. of ASIST, 54."},{"key":"71_CR15_71","unstructured":"INDYK, P. and MOTWANI, R. (1998): Approximate Nearest Neighbor\u2014Towards Removing the Curse of Dimensionality, Proc. of STOC \u201998."},{"key":"71_CR16_71","unstructured":"KO\u0110CZ, A., CHOWDHURY, A. and ALSPECTOR, J. (2004): Improved robustness of signature-based near-replica detection via lexicon randomization, Proc. of KDD \u201904."},{"key":"71_CR17_71","unstructured":"MANBER, U. (1994): Finding similar files in a large file system, Proc. of USENIX-TC \u201994"},{"key":"71_CR18_71","doi-asserted-by":"crossref","unstructured":"SCHLEIMER, S., WILKERSON, D. and AIKEN, A. (2003): Winnowing: local algorithms for document fingerprinting, Proc. of SIGMOD \u201903.","DOI":"10.1145\/872757.872770"},{"key":"71_CR19_71","unstructured":"STEIN, B. (2005): Fuzzy-Fingerprints for Text-based Information Retrieval, Proc. of I-KNOW \u201905."},{"key":"71_CR20_71","doi-asserted-by":"crossref","unstructured":"STEIN, B. (2007): Principles of Hash-based Text Retrieval, Proc. of SIGIR \u201907.","DOI":"10.1145\/1277741.1277832"},{"key":"71_CR21_71","unstructured":"WEBER, R., SCHEK, H. and BLOTT, S. (1998): A Quantitative Analysis and Performance Study for Similarity-Search Methods in High-Dimensional Spaces, Proc. of VLDB \u201998."},{"key":"71_CR22_71","doi-asserted-by":"crossref","unstructured":"YE, S., WEN, J. and MA, W. (2006): A Systematic Study of Parameter Correlations in Large Scale Duplicate Document Detection, Proc. of PAKDD \u201906.","DOI":"10.1007\/11731139_33"},{"key":"71_CR23_71","unstructured":"ZOBEL, J. and BERNSTEIN, Y. (2006): The case of the duplicate documents: Measurement, search, and science, Proc. of APWeb \u201906."}],"container-title":["Studies in Classification, Data Analysis, and Knowledge Organization","Data Analysis, Machine Learning and Applications"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-78246-9_71.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T07:02:19Z","timestamp":1619506939000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-78246-9_71"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540782391","9783540782469"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-78246-9_71","relation":{},"ISSN":["1431-8814"],"issn-type":[{"type":"print","value":"1431-8814"}],"subject":[],"published":{"date-parts":[[2008]]}}}