{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T19:32:39Z","timestamp":1725564759137},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540213710"},{"type":"electronic","value":"9783540246558"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2004]]},"DOI":"10.1007\/978-3-540-24655-8_6","type":"book-chapter","created":{"date-parts":[[2010,9,8]],"date-time":"2010-09-08T20:25:37Z","timestamp":1283977537000},"page":"48-58","source":"Crossref","is-referenced-by-count":2,"title":["A Query-Dependent Duplicate Detection Approach for Large Scale Search Engines"],"prefix":"10.1007","author":[{"given":"Shaozhi","family":"Ye","sequence":"first","affiliation":[]},{"given":"Ruihua","family":"Song","sequence":"additional","affiliation":[]},{"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[]},{"given":"Wei-Ying","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Brin, S., Davis, J., Garcia-Molina, H.: Copy Detection Mechanisms for Digital Documents. In: Proceeding of the Special Interest Group on Management of Data (SIGMOD 1995), pp. 298\u2013409 (1995)","DOI":"10.1145\/223784.223855"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Denning, P.J.: Plagiarism in the Web. Communications of the ACM\u00a038 ( December 1995)","DOI":"10.1145\/219663.219675"},{"key":"6_CR3","unstructured":"Heintze, N.: Scalable Document Fingerprinting. In: Proceedings of the Second USENIX Electronic Commerce Worksop, pp.191-200 (November 1996)"},{"key":"6_CR4","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S.: Syntactic Clastering of the Web. In: Proceedings of the Sixth International World Wide Web Conference(WWW6) (1997)"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Shivakumar, N., Garica-Molina, H.: Finding Near-Replicas of Documents on the Web. In: International Workshop on the Web and Databases (WebDB 1998) (1998)","DOI":"10.1007\/10704656_13"},{"key":"6_CR6","unstructured":"Silverstein, C., Henzinger, M., Marais, H., Moricz, M.: Analysis of a Very Large AltaVista Query Log. Technical Report 1998-014, Digital System Research Center (October 1998)"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Lopresti, D.P.: Models and Algorithms for Duplicate Document Detection. In: Proceedings of the 5th International Conference on Document Analysis and Recognition (September 1999)","DOI":"10.1109\/ICDAR.1999.791783"},{"key":"6_CR8","unstructured":"Bharat, K., Broder, A.: Mirror on the Web: A Study of HostPairs with Replicated Content. In: Proceedings of 8th International World Wide Web Conference (WWW8 1999), pp.501\u2013512 (1999)"},{"key":"6_CR9","unstructured":"Turner, M., Katsnelson, Y., Smith, J.: Large-Scale Duplicate Document Detection in Operation. In: Proceedings of the 2001 Symposium on Document Image Understanding Technology (2001)"},{"issue":"2","key":"6_CR10","doi-asserted-by":"crossref","first-page":"226","DOI":"10.1002\/1097-4571(2000)9999:9999<::AID-ASI1591>3.0.CO;2-R","volume":"53","author":"A. Spink","year":"2001","unstructured":"Spink, A., Wolfram, D., Jansen, B., Saracevic, T.: Searching The Web: The Public and Their Queries. Journal of the American Society for Information Science\u00a053(2), 226\u2013234 (2001)","journal-title":"Journal of the American Society for Information Science"},{"issue":"2","key":"6_CR11","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/506309.506311","volume":"20","author":"A. Chowdhury","year":"2002","unstructured":"Chowdhury, A., Frieder, O., Grossman, D., McCabe, M.C.: Collection Statistics for Fast Duplicate Document Detection. ACM Transactions on Information Systems\u00a020(2), 171\u2013191 (2002)","journal-title":"ACM Transactions on Information Systems"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Cooper, J.W., Coden, A.R., Brown, E.W.: Detecting Similar Documents using Salient Terms. In: the 11th International Conference on Information and Knowledge Management, CIKM 2002 (November 2002)","DOI":"10.1145\/584792.584835"},{"key":"6_CR13","unstructured":"Xie, Y., O\u2019Hallaron, D.: Locality in Search Engine Queries and its Implications for Caching. In: Proceedings of IEEE Infocom (June 2002)"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Bar-Yossef, Z., Rajagopalan, S.: Temlate Detection via Data Mining and its Applications. In: Proceedings of the 11th International World Wide Web Conference, WWW 2002 (2002)","DOI":"10.1145\/511446.511522"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Yu, S., Cai, D., Wen, J.-R., Ma, W.-Y.: Improving Pseudo-Relevance Feedback in Web Infromation Retrieval Using Web Page Segmentation. In: Proceedings of the 12th International World Wide Web Conference, WWW 2003, May 2003, pp.11\u201318 (2003)","DOI":"10.1145\/775152.775155"},{"key":"6_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"406","DOI":"10.1007\/3-540-36901-5_42","volume-title":"Web Technologies and Applications","author":"D. Cai","year":"2003","unstructured":"Cai, D., Yu, S., Wen, J.-R., Ma, W.-Y.: Extracting Content Structure for Web Pages Based on Visual Representation. In: Zhou, X., Zhang, Y., Orlowska, M.E. (eds.) APWeb 2003. LNCS, vol.\u00a02642, pp. 406\u2013417. Springer, Heidelberg (2003)"}],"container-title":["Lecture Notes in Computer Science","Advanced Web Technologies and Applications"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-24655-8_6.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,19]],"date-time":"2020-11-19T04:50:43Z","timestamp":1605761443000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-24655-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2004]]},"ISBN":["9783540213710","9783540246558"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-24655-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2004]]}}}