{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T15:43:33Z","timestamp":1725551013661},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642122743"},{"type":"electronic","value":"9783642122750"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-12275-0_44","type":"book-chapter","created":{"date-parts":[[2010,3,31]],"date-time":"2010-03-31T20:04:10Z","timestamp":1270065850000},"page":"508-519","source":"Crossref","is-referenced-by-count":3,"title":["Temporal Shingling for Version Identification in Web Archives"],"prefix":"10.1007","author":[{"given":"Ralf","family":"Schenkel","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"44_CR1","doi-asserted-by":"crossref","unstructured":"Anand, A., et al.: EverLast: a distributed architecture for preserving the web. In: JCDL, pp. 331\u2013340 (2009)","DOI":"10.1145\/1555400.1555455"},{"key":"44_CR2","doi-asserted-by":"crossref","unstructured":"Brin, S., Davis, J., Garcia-Molina, H.: Copy detection mechanisms for digital documents. In: SIGMOD Conference, pp. 398\u2013409 (1995)","DOI":"10.1145\/223784.223855"},{"key":"44_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/3-540-45123-4_1","volume-title":"Combinatorial Pattern Matching","author":"A.Z. Broder","year":"2000","unstructured":"Broder, A.Z.: Identifying and filtering near-duplicate documents. In: Giancarlo, R., Sankoff, D. (eds.) CPM 2000. LNCS, vol.\u00a01848, pp. 1\u201310. Springer, Heidelberg (2000)"},{"issue":"8-13","key":"44_CR4","first-page":"1157","volume":"29","author":"A.Z. Broder","year":"1997","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S., Zweig, G.: Syntactic clustering of the web. Computer Networks\u00a029(8-13), 1157\u20131166 (1997)","journal-title":"Computer Networks"},{"key":"44_CR5","doi-asserted-by":"crossref","unstructured":"Charikar, M.: Similarity estimation techniques from rounding algorithms. In: STOC, pp. 380\u2013388 (2002)","DOI":"10.1145\/509907.509965"},{"issue":"4","key":"44_CR6","doi-asserted-by":"publisher","first-page":"390","DOI":"10.1145\/958942.958945","volume":"28","author":"J. Cho","year":"2003","unstructured":"Cho, J., Garcia-Molina, H.: Effective page refresh policies for web crawlers. ACM Trans. Database Syst.\u00a028(4), 390\u2013426 (2003)","journal-title":"ACM Trans. Database Syst."},{"issue":"3","key":"44_CR7","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1145\/857166.857170","volume":"3","author":"J. Cho","year":"2003","unstructured":"Cho, J., Garcia-Molina, H.: Estimating frequency of change. ACM Trans. Internet Techn.\u00a03(3), 256\u2013290 (2003)","journal-title":"ACM Trans. Internet Techn."},{"issue":"2","key":"44_CR8","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/506309.506311","volume":"20","author":"A. Chowdhury","year":"2002","unstructured":"Chowdhury, A., et al.: Collection statistics for fast duplicate document detection. ACM Trans. Inf. Syst.\u00a020(2), 171\u2013191 (2002)","journal-title":"ACM Trans. Inf. Syst."},{"key":"44_CR9","doi-asserted-by":"crossref","unstructured":"Conrad, J.G., et al.: Online duplicate document detection: signature reliability in a dynamic retrieval environment. In: CIKM, pp. 443\u2013452 (2003)","DOI":"10.1145\/956863.956946"},{"key":"44_CR10","doi-asserted-by":"crossref","unstructured":"Henzinger, M.R.: Finding near-duplicate web pages: a large-scale evaluation of algorithms. In: SIGIR, pp. 284\u2013291 (2006)","DOI":"10.1145\/1148170.1148222"},{"issue":"3","key":"44_CR11","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1002\/asi.10170","volume":"54","author":"T.C. Hoad","year":"2003","unstructured":"Hoad, T.C., Zobel, J.: Methods for identifying versioned and plagiarized documents. JASIST\u00a054(3), 203\u2013215 (2003)","journal-title":"JASIST"},{"key":"44_CR12","doi-asserted-by":"crossref","unstructured":"Kolcz, A., Chowdhury, A., Alspector, J.: Improved robustness of signature-based near-replica detection via lexicon randomization. In: KDD, pp. 605\u2013610 (2004)","DOI":"10.1145\/1014052.1014127"},{"key":"44_CR13","unstructured":"Manber, U.: Finding similar files in a large file system. In: USENIX Winter, pp. 1\u201310 (1994)"},{"key":"44_CR14","doi-asserted-by":"crossref","unstructured":"Manku, G.S., Jain, A., Sarma, A.D.: Detecting near-duplicates for web crawling. In: WWW, pp. 141\u2013150 (2007)","DOI":"10.1145\/1242572.1242592"},{"key":"44_CR15","doi-asserted-by":"crossref","unstructured":"Olston, C., Pandey, S.: Recrawl scheduling based on information longevity. In: WWW, pp. 437\u2013446 (2008)","DOI":"10.1145\/1367497.1367557"},{"key":"44_CR16","doi-asserted-by":"crossref","unstructured":"Theobald, M., Siddharth, J., Paepcke, A.: SpotSigs: robust and efficient near duplicate detection in large web collections. In: SIGIR, pp. 563\u2013570 (2008)","DOI":"10.1145\/1390334.1390431"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-12275-0_44","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T21:47:25Z","timestamp":1558993645000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-12275-0_44"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642122743","9783642122750"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-12275-0_44","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2010]]}}}