{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T23:41:26Z","timestamp":1725579686912},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642199301"},{"type":"electronic","value":"9783642199318"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-19931-8_15","type":"book-chapter","created":{"date-parts":[[2011,3,25]],"date-time":"2011-03-25T18:18:22Z","timestamp":1301077102000},"page":"117-125","source":"Crossref","is-referenced-by-count":2,"title":["Web Document Duplicate Detection Using Fuzzy Hashing"],"prefix":"10.1007","author":[{"given":"Carlos G.","family":"Figuerola","sequence":"first","affiliation":[]},{"given":"Raquel G\u00f3mez","family":"D\u00edaz","sequence":"additional","affiliation":[]},{"given":"Jos\u00e9 L.","family":"Alonso Berrocal","sequence":"additional","affiliation":[]},{"given":"Angel F.","family":"Zazo Rodr\u00edguez","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"#cr-split#-15_CR1.1","unstructured":"Bar-Ilan, J.: Expectations versus reality - search engine features needed for web research at mid 2005. Cybermetrics 9"},{"key":"#cr-split#-15_CR1.2","unstructured":"(1) (2005), http:\/\/www.cindoc.csic.es\/cybermetrics\/articles\/v9i1p2.html"},{"issue":"11-16","key":"15_CR2","doi-asserted-by":"publisher","first-page":"1579","DOI":"10.1016\/S1389-1286(99)00021-3","volume":"31","author":"K. Bharat","year":"1999","unstructured":"Bharat, K., Broder, A.: Mirror, mirror on the web: A study of host pairs with replicated content. Computer Networks\u00a031(11-16), 1579\u20131590 (1999), http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.90.1488&rep=rep1&type=pdf","journal-title":"Computer Networks"},{"key":"15_CR3","unstructured":"Chowdhury, A.: Duplicate data detection (2004), retrieved from http:\/\/ir.iit.edu\/~abdur\/Research\/Duplicate.html , http:\/\/gogamza.mireene.co.kr\/wp-content\/uploads\/1\/XbsrPeUgh6.pdf"},{"issue":"2","key":"15_CR4","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/506309.506311","volume":"20","author":"A. Chowdhury","year":"2002","unstructured":"Chowdhury, A., Frieder, O., Grossman, D., McCabe, M.: Collection statistics for fast duplicate document detection. ACM Transactions on Information Systems (TOIS)\u00a020(2), 171\u2013191 (2002), http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.5.3673&rep=rep1&type=pdf","journal-title":"ACM Transactions on Information Systems (TOIS)"},{"issue":"3","key":"15_CR5","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/363958.363994","volume":"7","author":"F. Damerau","year":"1964","unstructured":"Damerau, F.: A technique for computer detection and correction of spelling errors. Communications of the ACM\u00a07(3), 171\u2013176 (1964), http:\/\/www.cis.uni-muenchen.de\/~heller\/SuchMasch\/apcadg\/liter.atur\/data\/damerau_distance.pdf","journal-title":"Communications of the ACM"},{"key":"15_CR6","unstructured":"Figuerola, C.G., Alonso\u00a0Berrocal, J.L., Zazo\u00a0Rodr\u00edguez, \u00c1.F., Rodr\u00edguez V\u00e1zquez\u00a0de Aldana, E.: Dise\u00f1o de spiders. Tech. Rep. DPTOIA-IT-2006-002 (2006)"},{"key":"15_CR7","doi-asserted-by":"crossref","first-page":"53","DOI":"10.54886\/scire.v16i2.4016","volume":"16","author":"C.G. Figuerola","year":"2010","unstructured":"Figuerola, C.G., G\u00f3mez D\u00edaz, R., Alonso Berrocal, J.L., Zazo Rodr\u00edguez, A.F.: Proyecto 7: un motor de recuperaci\u00f3n web colaborativo. Scire. Representaci\u00f3n y Organizaci\u00f3n del Conocimiento\u00a016, 53\u201360 (2010)","journal-title":"Scire. Representaci\u00f3n y Organizaci\u00f3n del Conocimiento"},{"issue":"2","key":"15_CR8","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1002\/j.1538-7305.1950.tb00463.x","volume":"29","author":"R. Hamming","year":"1950","unstructured":"Hamming, R.: Error detecting and error correcting codes. Bell System Technical Journal\u00a029(2), 147\u2013160 (1950), http:\/\/www.lee.eng.uerj.br\/~gil\/redesII\/hamming.pdf","journal-title":"Bell System Technical Journal"},{"key":"15_CR9","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/j.diin.2006.06.015","volume":"3","author":"J. Kornblum","year":"2006","unstructured":"Kornblum, J.: Identifying almost identical files using context triggered piecewise hashing. Digital Investigation\u00a03, 91\u201397 (2006), https:\/\/www.dfrws.org\/2006\/proceedings\/12-Kornblum.pdf","journal-title":"Digital Investigation"},{"key":"15_CR10","unstructured":"Kornblum, J.: Beyond fuzzy hash. In: US Digital Forensic and Incident Response Summit 2010 (2010), http:\/\/computer-forensics.sans.org\/community\/summits\/2010\/files\/19-beyond-fuzzy-hashing-kornblum.pdf"},{"key":"15_CR11","unstructured":"Kornblum, J.: Fuzzy hashing and sseep (2010), http:\/\/ssdeep.sourceforge.net\/"},{"issue":"8","key":"15_CR12","first-page":"707","volume":"10","author":"V. Levenshtein","year":"1966","unstructured":"Levenshtein, V.: Binary codes capable of correcting deletions, insertions, and reversals. Soviet Physics Doklady\u00a010(8), 707\u2013710 (1966)","journal-title":"Soviet Physics Doklady"},{"key":"15_CR13","unstructured":"Milenko, D.: ssdeep 2.5. python wrapper for ssdeep library (2010), http:\/\/pypi.python.org\/pypi\/ssdeep"},{"issue":"1","key":"15_CR14","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/375360.375365","volume":"33","author":"G. Navarro","year":"2001","unstructured":"Navarro, G.: A guided tour to approximate string matching. ACM computing surveys (CSUR)\u00a033(1), 31\u201388 (2001), http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.96.7225&rep=rep1&type=pdf","journal-title":"ACM computing surveys (CSUR)"},{"key":"15_CR15","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1145\/634067.634256","volume-title":"CHI 2001 Extended Abstracts on Human Factors in Computing Systems","author":"R. Soukoreff","year":"2001","unstructured":"Soukoreff, R., MacKenzie, I.: Measuring errors in text entry tasks: an application of the levenshtein string distance statistic. In: CHI 2001 Extended Abstracts on Human Factors in Computing Systems, pp. 319\u2013320. ACM, New York (2001), http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.22.757&rep=rep1&type=pdf"},{"key":"15_CR16","volume-title":"Introduction to data mining","author":"P. Tan","year":"2006","unstructured":"Tan, P., Steinbach, M., Kumar, V., et al.: Introduction to data mining. Pearson Addison Wesley, Boston (2006), http:\/\/www.pphust.cn\/uploadfiles\/200912\/20091204204805761.pdf"},{"key":"15_CR17","unstructured":"Tridgell, A.: Spamsum overview and code (2002), http:\/\/samba.org\/ftp\/unpacked\/junkcode\/spamsum"},{"key":"15_CR18","unstructured":"Tridgell, A., Mackerras, P.: The rsync algorithm (2004), http:\/\/dspace-prod1.anu.edu.au\/bitstream\/1885\/40765\/2\/TR-CS-96-05.pdf"},{"key":"15_CR19","doi-asserted-by":"publisher","first-page":"693","DOI":"10.1109\/GRC.2005.1547380","volume-title":"2005 IEEE International Conference on Granular Computing","author":"R. Yerra","year":"2005","unstructured":"Yerra, R., Ng, Y.: Detecting similar html documents using a fuzzy set information retrieval approach. In: 2005 IEEE International Conference on Granular Computing, vol.\u00a02, pp. 693\u2013699. IEEE, Los Alamitos (2005), http:\/\/faculty.cs.byu.edu\/~dennis\/papers\/ieee-grc.ps"}],"container-title":["Advances in Intelligent and Soft Computing","Trends in Practical Applications of Agents and Multiagent Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-19931-8_15.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,5]],"date-time":"2023-06-05T22:30:17Z","timestamp":1686004217000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-19931-8_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642199301","9783642199318"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-19931-8_15","relation":{},"ISSN":["1867-5662","1867-5670"],"issn-type":[{"type":"print","value":"1867-5662"},{"type":"electronic","value":"1867-5670"}],"subject":[],"published":{"date-parts":[[2011]]}}}