{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T13:49:33Z","timestamp":1725544173966},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540332060"},{"type":"electronic","value":"9783540332077"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11731139_33","type":"book-chapter","created":{"date-parts":[[2006,3,9]],"date-time":"2006-03-09T04:03:44Z","timestamp":1141877024000},"page":"275-284","source":"Crossref","is-referenced-by-count":2,"title":["A Systematic Study of Parameter Correlations in Large Scale Duplicate Document Detection"],"prefix":"10.1007","author":[{"given":"Shaozhi","family":"Ye","sequence":"first","affiliation":[]},{"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[]},{"given":"Wei-Ying","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"33_CR1","doi-asserted-by":"crossref","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S., Zweig, G.: Syntactic clustering of the Web. In: Proceedings of the 6th International World Wide Web Conference (WWW) (1997)","DOI":"10.1016\/S0169-7552(97)00031-7"},{"key":"33_CR2","doi-asserted-by":"crossref","unstructured":"Bharat, K., Broder, A.Z.: Mirror, mirror on the Web: A study of host pairs with replicated content. In: Proceedings of the 8th International World Wide Web Conference (WWW), pp. 501\u2013512 (1999)","DOI":"10.1016\/S1389-1286(99)00021-3"},{"issue":"12","key":"33_CR3","doi-asserted-by":"publisher","first-page":"1114","DOI":"10.1002\/1097-4571(2000)9999:9999<::AID-ASI1025>3.0.CO;2-0","volume":"51","author":"K. Bharat","year":"2000","unstructured":"Bharat, K., Broder, A.Z., Dean, J., Henzinger, M.R.: A comparison of techniques to find mirrored hosts on the WWW. Journal of the American Society for Information Science (JASIS)\u00a051(12), 1114\u20131122 (2000)","journal-title":"Journal of the American Society for Information Science (JASIS)"},{"key":"33_CR4","doi-asserted-by":"crossref","unstructured":"Fetterly, D., Manasse, M., Najork, M., Wiener, J.: A large-scale study of the evolution of web pages. In: Proceedings of the 12th International World Wide Web Conference (WWW), pp. 669\u2013678 (2003)","DOI":"10.1145\/775152.775246"},{"key":"33_CR5","doi-asserted-by":"crossref","unstructured":"Fetterly, D., Manasse, M., Najork, M.: On the evolution of clusters of near-duplicate web pages. In: Proceedings of the 1st Latin American Web Congress (LA-Web), pp. 37\u201345 (2003)","DOI":"10.1109\/LAWEB.2003.1250280"},{"key":"33_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1007\/978-3-540-24655-8_6","volume-title":"Advanced Web Technologies and Applications","author":"S. Ye","year":"2004","unstructured":"Ye, S., Song, R., Wen, J.R., Ma, W.Y.: A query-dependent duplicate detection approach for large scale search engines. In: Yu, J.X., Lin, X., Lu, H., Zhang, Y. (eds.) APWeb 2004. LNCS, vol.\u00a03007, pp. 48\u201358. Springer, Heidelberg (2004)"},{"issue":"2","key":"33_CR7","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1145\/792550.792554","volume":"36","author":"I. Soboroff","year":"2002","unstructured":"Soboroff, I.: Do TREC Web collections look like the Web? SIGIR Forum\u00a036(2), 23\u201331 (2002)","journal-title":"SIGIR Forum"},{"key":"33_CR8","doi-asserted-by":"crossref","unstructured":"Brin, S., Davis, J., Garcia-Molina, H.: Copy detection mechanisms for digital documents. In: Proceedings of the 1995 ACM International Conference on Management of Data (SIGMOD), pp. 398\u2013409 (1995)","DOI":"10.1145\/223784.223855"},{"key":"33_CR9","unstructured":"Heintze, N.: Scalable document fingerprinting. In: Proceedings of the 2nd USENIX Electronic Commerce Workshop, pp. 191\u2013200 (1996)"},{"key":"33_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1007\/10704656_13","volume-title":"The World Wide Web and Databases","author":"N. Shivakumar","year":"1999","unstructured":"Shivakumar, N., Garcia-Molina, H.: Finding near-replicas of documents and servers on the Web. In: Atzeni, P., Mendelzon, A.O., Mecca, G. (eds.) WebDB 1998. LNCS, vol.\u00a01590, pp. 204\u2013212. Springer, Heidelberg (1999)"},{"key":"33_CR11","doi-asserted-by":"crossref","unstructured":"Cho, J., Shivakumar, N., Garcia-Molina, H.: Finding replicated Web collections. In: Proceedings of the 2000 ACM International Conference on Management of Data (SIGMOD), pp. 355\u2013366 (2000)","DOI":"10.1145\/342009.335429"},{"issue":"2","key":"33_CR12","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/506309.506311","volume":"20","author":"A. Chowdhury","year":"2002","unstructured":"Chowdhury, A., Frieder, O., Grossman, D., McCabe, M.C.: Collection statistics for fast duplicate document detection. ACM Trans. Inf. Syst.\u00a020(2), 171\u2013191 (2002)","journal-title":"ACM Trans. Inf. Syst."},{"key":"33_CR13","doi-asserted-by":"crossref","unstructured":"Cooper, J.W., Coden, A., Brown, E.W.: Detecting similar documents using salient terms. In: Proceedings of the 11th ACM International Conference on Information and Knowledge Management (CIKM), pp. 245\u2013251 (2002)","DOI":"10.1145\/584792.584835"},{"key":"33_CR14","doi-asserted-by":"crossref","unstructured":"Conrad, J.G., Guo, X.S., Schriber, C.P.: Online duplicate document detection: signature reliability in a dynamic retrieval environment. In: Proceedings of the 12th International Conference on Information and knowledge management (CIKM), pp. 443\u2013452 (2003)","DOI":"10.1145\/956863.956946"},{"key":"33_CR15","unstructured":"Rabin, M.: Fingerprinting by random polynomials. Technical report tr-15-81, Center for Research in Computing Technology, Harvard University (1981)"},{"key":"33_CR16","first-page":"31","volume-title":"An Introduction to Probability Theory and Its Applications","author":"W. Feller","year":"1968","unstructured":"Feller, W.: An Introduction to Probability Theory and Its Applications, 3rd edn., vol.\u00a01, pp. 31\u201332. Wiley, Chichester (1968)","edition":"3"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11731139_33.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T06:51:42Z","timestamp":1619506302000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11731139_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540332060","9783540332077"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/11731139_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}