{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T01:33:50Z","timestamp":1743039230514,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":12,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811076046"},{"type":"electronic","value":"9789811076053"}],"license":[{"start":{"date-parts":[[2017,12,20]],"date-time":"2017-12-20T00:00:00Z","timestamp":1513728000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-981-10-7605-3_82","type":"book-chapter","created":{"date-parts":[[2017,12,19]],"date-time":"2017-12-19T06:22:45Z","timestamp":1513664565000},"page":"499-504","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Hadoop Based Parallel Deduplication Method for Web Documents"],"prefix":"10.1007","author":[{"given":"Junjie","family":"Song","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuhui","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,12,20]]},"reference":[{"key":"82_CR1","doi-asserted-by":"crossref","unstructured":"Lopresti, D.P.: Models and algorithms for duplicate document detection. In: Proceedings of the Fifth International Conference on Document Analysis and Recognition, ICDAR 1999, pp. 297\u2013300. IEEE (1999)","DOI":"10.1109\/ICDAR.1999.791783"},{"key":"82_CR2","unstructured":"Jianyong, W., Zhengmao, X., Ming, L., et al.: Research and evaluation of near-replicas of Web pages detection algorithms. Chin. J. Electron. (2000)"},{"key":"82_CR3","unstructured":"Liu, S., Zhang, Y., Xia, Y., et al.: Duplicate web page elimination based on HTML and extraction of long sentence. Microcomput. Appl. (2009)"},{"key":"82_CR4","unstructured":"Salton, G., McGill, M.J.: Introduction to modern information retrieval (1986)"},{"issue":"11","key":"82_CR5","doi-asserted-by":"publisher","first-page":"1022","DOI":"10.1145\/182.358466","volume":"26","author":"G Salton","year":"1983","unstructured":"Salton, G., Fox, E.A., Wu, H.: Extended Boolean information retrieval. Commun. ACM 26(11), 1022\u20131036 (1983)","journal-title":"Commun. ACM"},{"issue":"5","key":"82_CR6","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1016\/0306-4573(88)90021-0","volume":"24","author":"G Salton","year":"1988","unstructured":"Salton, G., Buckley, C.: Term-weighting approaches in automatic text retrieval. Inf. Process. Manage. 24(5), 513\u2013523 (1988)","journal-title":"Inf. Process. Manage."},{"key":"82_CR7","unstructured":"Wan, J., Yu, W., Xu, X.: Design and implement of distributed document clustering based on MapReduce. In: Proceedings of the Second Symposium International Computer Science and Computational Technology (ISCSCT), Huangshan, PR China, pp. 278\u2013280 (2009)"},{"key":"82_CR8","unstructured":"Mihalcea, R., Tarau, P.: TextRank: bringing order into texts. Association for Computational Linguistics (2004)"},{"key":"82_CR9","unstructured":"Page, L., Brin, S., Motwani, R., et al.: The PageRank citation ranking: bringing order to the web. Stanford InfoLab (1999)"},{"issue":"3","key":"82_CR10","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1145\/3828.3835","volume":"32","author":"DD Sleator","year":"1985","unstructured":"Sleator, D.D., Tarjan, R.E.: Self-adjusting binary search trees. J. ACM (JACM) 32(3), 652\u2013686 (1985)","journal-title":"J. ACM (JACM)"},{"issue":"1","key":"82_CR11","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: MapReduce: simplified data processing on large clusters. Commun. ACM 51(1), 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"issue":"8\u201313","key":"82_CR12","doi-asserted-by":"publisher","first-page":"1157","DOI":"10.1016\/S0169-7552(97)00031-7","volume":"29","author":"AZ Broder","year":"1997","unstructured":"Broder, A.Z., Glassman, S.C., Manasse, M.S., et al.: Syntactic clustering of the web. Comput. Netw. ISDN Syst. 29(8\u201313), 1157\u20131166 (1997)","journal-title":"Comput. Netw. ISDN Syst."}],"container-title":["Lecture Notes in Electrical Engineering","Advances in Computer Science and Ubiquitous Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-10-7605-3_82","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,8,4]],"date-time":"2018-08-04T12:40:56Z","timestamp":1533386456000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-10-7605-3_82"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,20]]},"ISBN":["9789811076046","9789811076053"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-981-10-7605-3_82","relation":{},"ISSN":["1876-1100","1876-1119"],"issn-type":[{"type":"print","value":"1876-1100"},{"type":"electronic","value":"1876-1119"}],"subject":[],"published":{"date-parts":[[2017,12,20]]}}}