{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,30]],"date-time":"2022-03-30T17:40:03Z","timestamp":1648662003953},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2014,5,5]],"date-time":"2014-05-05T00:00:00Z","timestamp":1399248000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2014,8]]},"DOI":"10.1007\/s11227-014-1197-7","type":"journal-article","created":{"date-parts":[[2014,5,4]],"date-time":"2014-05-04T02:55:24Z","timestamp":1399172124000},"page":"930-954","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Strategic and suave processing for performing similarity joins using MapReduce"],"prefix":"10.1007","volume":"69","author":[{"given":"Mahalakshmi","family":"Lakshminarayanan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"William F.","family":"Acosta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"suffix":"II","given":"Robert C.","family":"Green","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vijay","family":"Devabhaktuni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,5,5]]},"reference":[{"key":"1197_CR1","unstructured":"Arasu A, Ganti V, Kaushik R (2006) Efficient exact set-similarity joins. In: Proceedings of the 32nd international conference on Very large data bases, VLDB Endowment, pp 918\u2013929"},{"key":"1197_CR2","doi-asserted-by":"crossref","unstructured":"Baraglia R, De Francisci Morales G, Lucchese C (2010) Document similarity self-join with mapreduce. In: 2010 IEEE 10th International Conference on Data Mining (ICDM), IEEE, pp 731\u2013736","DOI":"10.1109\/ICDM.2010.70"},{"key":"1197_CR3","doi-asserted-by":"crossref","unstructured":"Bayardo RJ, Ma Y, Srikant R (2007) Scaling up all pairs similarity search. In: Proceedings of the 16th international conference on World Wide Web. ACM, New York, pp 131\u2013140","DOI":"10.1145\/1242572.1242591"},{"issue":"8","key":"1197_CR4","doi-asserted-by":"publisher","first-page":"1157","DOI":"10.1016\/S0169-7552(97)00031-7","volume":"29","author":"AZ Broder","year":"1997","unstructured":"Broder AZ, Glassman SC, Manasse MS, Zweig G (1997) Syntactic clustering of the web. Comput Netw ISDN Syst 29(8):1157\u20131166","journal-title":"Comput Netw ISDN Syst"},{"key":"1197_CR5","doi-asserted-by":"crossref","unstructured":"Chaudhuri S, Ganti V, Kaushik R (2006) A primitive operator for similarity joins in data cleaning. In: Proceedings of the 22nd international conference on data engineering, 2006. ICDE\u201906. IEEE, New York, pp 5\u20135","DOI":"10.1109\/ICDE.2006.9"},{"issue":"1","key":"1197_CR6","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean J, Ghemawat S (2008) Mapreduce: simplified data processing on large clusters. Commun ACM 51(1):107\u2013113","journal-title":"Commun ACM"},{"key":"1197_CR7","doi-asserted-by":"crossref","unstructured":"Elsayed T, Lin J, Oard DW (2008) Pairwise document similarity in large collections with mapreduce. In: Proceedings of the 46th annual meeting of the association for computational linguistics on human language technologies: short papers. association for, computational linguistics, pp 265\u2013268","DOI":"10.3115\/1557690.1557767"},{"issue":"4","key":"1197_CR8","first-page":"228","volume":"2","author":"D Fetterly","year":"2003","unstructured":"Fetterly D, Manasse M, Najork M (2003) On the evolution of clusters of near-duplicate web pages. J Web Eng 2(4):228\u2013246","journal-title":"J Web Eng"},{"key":"1197_CR9","doi-asserted-by":"crossref","unstructured":"Hadjieleftheriou M, Chandel A, Koudas N, Srivastava D (2008) Fast indexes and algorithms for set similarity selection queries. In: IEEE 24th International Conference on Data Engineering, 2008. ICDE 2008. IEEE, New York pp 267\u2013276","DOI":"10.1109\/ICDE.2008.4497435"},{"key":"1197_CR10","doi-asserted-by":"crossref","unstructured":"Hadjieleftheriou M, Koudas N, Srivastava D (2009) Incremental maintenance of length normalized indexes for approximate string matching. In: Proceedings of the 2009 ACM SIGMOD international conference on management of data. ACM, New York, pp 429\u2013440","DOI":"10.1145\/1559845.1559891"},{"key":"1197_CR11","doi-asserted-by":"crossref","unstructured":"Henzinger M (2006) Finding near-duplicate web pages: a large-scale evaluation of algorithms. In: Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval. ACM, New York, pp 284\u2013291","DOI":"10.1145\/1148170.1148222"},{"issue":"3","key":"1197_CR12","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1002\/asi.10170","volume":"54","author":"TC Hoad","year":"2003","unstructured":"Hoad TC, Zobel J (2003) Methods for identifying versioned and plagiarized documents. J Am Soc Inf Sci Technol 54(3):203\u2013215","journal-title":"J Am Soc Inf Sci Technol"},{"key":"1197_CR13","doi-asserted-by":"crossref","unstructured":"Indyk P, Motwani R (1998) Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of the thirtieth annual ACM symposium on theory of computing. ACM, New York, pp 604\u2013613","DOI":"10.1145\/276698.276876"},{"issue":"8","key":"1197_CR14","doi-asserted-by":"publisher","first-page":"704","DOI":"10.14778\/2212351.2212353","volume":"5","author":"A Metwally","year":"2012","unstructured":"Metwally A, Faloutsos C (2012) V-smart-join: a scalable mapreduce framework for all-pair similarity joins of multisets and vectors. Proc VLDB Endow 5(8):704\u2013715","journal-title":"Proc VLDB Endow"},{"key":"1197_CR15","doi-asserted-by":"crossref","unstructured":"Metwally A, Agrawal D, El Abbadi A (2007) Detectives: detecting coalition hit inflation attacks in advertising networks streams. In: Proceedings of the 16th international conference on World Wide Web. ACM, New York, pp 241\u2013250","DOI":"10.1145\/1242572.1242606"},{"key":"1197_CR16","unstructured":"Ricardo BY et al (1999) Modern information retrieval. Pearson Education India, Delhi"},{"key":"1197_CR17","doi-asserted-by":"crossref","unstructured":"Sarawagi S, Bhamidipaty A (2002) Interactive deduplication using active learning. In: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, New York, pp 269\u2013278","DOI":"10.1145\/775047.775087"},{"key":"1197_CR18","doi-asserted-by":"crossref","unstructured":"Sarawagi S, Kirpal A (2004) Efficient set joins on similarity predicates. In: Proceedings of the 2004 ACM SIGMOD international conference on management of data. ACM, New York, pp 743\u2013754","DOI":"10.1145\/1007568.1007652"},{"issue":"3","key":"1197_CR19","first-page":"73","volume":"37","author":"D Singh","year":"2007","unstructured":"Singh D, Ibrahim A, Yohanna T, Singh J (2007) An overview of the applications of multisets. Novi Sad J Math 37(3):73\u201392","journal-title":"Novi Sad J Math"},{"key":"1197_CR20","doi-asserted-by":"crossref","unstructured":"Spertus E, Sahami M, Buyukkokten O (2005) Evaluating similarity measures: a large-scale study in the orkut social network. In: Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining. ACM, New York, pp 678\u2013684","DOI":"10.1145\/1081870.1081956"},{"key":"1197_CR21","unstructured":"The Apache Software Foundation (2014) Hadoop. URL: \n                    http:\/\/hadoop.apache.org"},{"key":"1197_CR22","doi-asserted-by":"crossref","unstructured":"Vernica R, Adviser-Carey MJ (2011) Efficient processing of set-similarity joins on large clusters. California State University at Long Beach","DOI":"10.1145\/1807167.1807222"},{"key":"1197_CR23","doi-asserted-by":"crossref","unstructured":"Vernica R, Carey MJ, Li C (2010) Efficient parallel set-similarity joins using mapreduce. In: Proceedings of the 2010 ACM SIGMOD international conference on management of data. ACM, New York, pp 495\u2013506","DOI":"10.1145\/1807167.1807222"},{"key":"1197_CR24","unstructured":"White T (2009) Hadoop: the definitive guide: the definitive guide. O\u2019Reilly Media"},{"key":"1197_CR25","unstructured":"Winkler WE (1999) The state of record linkage and current research problems. In: Statistical Research Division, US Census Bureau, Citeseer"},{"key":"1197_CR26","doi-asserted-by":"crossref","unstructured":"Xiao C, Wang W, Lin X, Shang H (2009) Top-k set similarity joins. In: IEEE 25th international conference on data engineering, 2009. ICDE\u201909. IEEE, New York, pp 916\u2013927","DOI":"10.1109\/ICDE.2009.111"},{"issue":"3","key":"1197_CR27","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1145\/2000824.2000825","volume":"36","author":"C Xiao","year":"2011","unstructured":"Xiao C, Wang W, Lin X, Yu JX, Wang G (2011) Efficient similarity joins for near-duplicate detection. ACM Trans Database Syst (TODS) 36(3):15","journal-title":"ACM Trans Database Syst (TODS)"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1197-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-014-1197-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1197-7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1197-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,17]],"date-time":"2020-02-17T21:33:17Z","timestamp":1581975197000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-014-1197-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,5,5]]},"references-count":27,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2014,8]]}},"alternative-id":["1197"],"URL":"https:\/\/doi.org\/10.1007\/s11227-014-1197-7","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,5,5]]},"assertion":[{"value":"5 May 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}