{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T13:49:11Z","timestamp":1725544151236},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540332060"},{"type":"electronic","value":"9783540332077"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11731139_42","type":"book-chapter","created":{"date-parts":[[2006,3,9]],"date-time":"2006-03-09T04:03:44Z","timestamp":1141877024000},"page":"361-370","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Duplicate Collection Detection Through Replica Boundary Discovery"],"prefix":"10.1007","author":[{"given":"Zhigang","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Weijia","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Xiaoming","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"42_CR1","first-page":"21","volume-title":"Proceedings of Compression and Complexity of Sequences 1997","author":"A.Z. Broder","year":"1997","unstructured":"Broder, A.Z.: On the resemblance and containment of documents. In: Proceedings of Compression and Complexity of Sequences 1997, pp. 21\u201329. IEEE Computer Society, Los Alamitos (1997)"},{"key":"42_CR2","doi-asserted-by":"crossref","unstructured":"Broder, A.Z.: Identifying and Filtering Near-Duplicate Documents. In: 11th Annual Symposium on Combinatorial Pattern Matching, June 2000, pp. 1\u201310 (2000)","DOI":"10.1007\/3-540-45123-4_1"},{"key":"42_CR3","doi-asserted-by":"crossref","unstructured":"Broder, Z., Glassman, S.C., Manasse, M.S., Eig, G.: Syntactic clustering of the Web. In: Proceedings of the sixth International World Wide Web Conference, pp. 391\u2013404 (1997)","DOI":"10.1016\/S0169-7552(97)00031-7"},{"key":"42_CR4","doi-asserted-by":"crossref","unstructured":"Cho, J., Shivakumar, N., Garcia-Molina, H.: Finding Replicated Web Collections. In: SIGMOD Conference 2000, pp. 355\u2013366 (2000)","DOI":"10.1145\/342009.335429"},{"key":"42_CR5","unstructured":"Heintze, N.: Scalable Document Fingerprinting. In: Proceedings of the Second USENIX Workshop on Electronic Commerce, pp. 191\u2013200 (1996)"},{"key":"42_CR6","doi-asserted-by":"crossref","unstructured":"Kotcz, A., Chowdhury, A., Alspector, J.: Improved robustness of signature-based near-replica detection via lexicon randomization. In: Proceedings of the 2004 ACM SIGKDD Conference, pp. 605\u2013610 (2004)","DOI":"10.1145\/1014052.1014127"},{"key":"42_CR7","doi-asserted-by":"crossref","unstructured":"Bharat, K., Broder, A.Z.: Mirror, Mirror, on the Web: A study of host pairs with replicated content. In: Proceedings of 8th International Conference on World Wide Web (WWW 1999) (May 1999)","DOI":"10.1016\/S1389-1286(99)00021-3"},{"issue":"3","key":"42_CR8","first-page":"175","volume":"2","author":"Z. Zhang","year":"2004","unstructured":"Zhang, Z., Chen, J., Li, X.: A Preprocessing Framework and Approach for Web Applications. Journal of Web Engineering\u00a02(3), 175\u2013191 (2004)","journal-title":"Journal of Web Engineering"},{"issue":"2","key":"42_CR9","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/506309.506311","volume":"20","author":"A. Chowdhury","year":"2002","unstructured":"Chowdhury, A., Frieder, O., Grossman, D.A., McCabe, M.C.: Collection statistics for fast duplicated document detection. ACM Transactions on Information Systems\u00a020(2), 171\u2013191 (2002)","journal-title":"ACM Transactions on Information Systems"},{"key":"42_CR10","doi-asserted-by":"crossref","unstructured":"Brin, S., Davis, J., Garcia-Molina, H.: Copy detection mechanisms for digital documents. In: Proceedings of the ACM SIGMOD Annual Conference, San Francisco, CA (May 1995)","DOI":"10.1145\/223784.223855"},{"key":"42_CR11","unstructured":"Shivakumar, N., Garcia-Molina, H.: SCAM: A Copy Detection Mechanism for Digital Documents. In: Proceedings of the 2nd International Conference on Theory and Practice of Digital Libraries (1995)"},{"key":"42_CR12","doi-asserted-by":"crossref","unstructured":"Shivakumar, N., Garcia-Molina, H.: Building a Scalable and Accurate Copy Detection Mechanism. In: Proceedings of the 3nd International Conference on Theory and Practice of Digital Libraries (1996)","DOI":"10.1145\/226931.226961"},{"key":"42_CR13","doi-asserted-by":"crossref","unstructured":"Xi, W., Fox, E.A., Tan, R.P., Shu, J.: Machine Learning Approach for Homepage Finding Task. In: Proceedings of the 9th International Symposium on String Processing and Information Retrieval, Lisbon, Portugal, September 11-15, pp. 145\u2013159 (2002)","DOI":"10.1007\/3-540-45735-6_14"},{"key":"42_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1007\/10704656_13","volume-title":"The World Wide Web and Databases","author":"N. Shivakumar","year":"1998","unstructured":"Shivakumar, N., Garcia-Molina, H.: Finding near-replicas of documents on the Web. In: Atzeni, P., Mendelzon, A.O., Mecca, G. (eds.) WebDB 1998. LNCS, vol.\u00a01590, pp. 204\u2013212. Springer, Heidelberg (1998)"},{"key":"42_CR15","unstructured":"Henzinger, M., Motwani, R., Silverstein.: Challenges in Web Search Engines. In: Proceedings of the 18th International Joint Conference on Artificial Intelligene (2003)"},{"key":"42_CR16","doi-asserted-by":"crossref","unstructured":"Bharat, K., Broder, A., Dean, J., Henzinger, M.R.: A Comparison of Techniques to find mirrored hosts on the WWW. Journal of the American Society for Information Science\u00a051(12), 1114\u20131122","DOI":"10.1002\/1097-4571(2000)9999:9999<::AID-ASI1025>3.0.CO;2-0"},{"key":"42_CR17","unstructured":"Tianwang Web search engine, http:\/\/e.pku.edu.cn"}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11731139_42.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T06:51:46Z","timestamp":1619506306000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11731139_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540332060","9783540332077"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/11731139_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}