{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T08:00:23Z","timestamp":1725868823264},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319480565"},{"type":"electronic","value":"9783319480572"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-48057-2_4","type":"book-chapter","created":{"date-parts":[[2016,10,22]],"date-time":"2016-10-22T04:37:01Z","timestamp":1477111021000},"page":"53-71","source":"Crossref","is-referenced-by-count":0,"title":["TLCSim: A Large-Scale Two-Level Clustering Similarity Search with MapReduce"],"prefix":"10.1007","author":[{"given":"Trong Nhan","family":"Phan","sequence":"first","affiliation":[]},{"given":"Markus","family":"J\u00e4ger","sequence":"additional","affiliation":[]},{"given":"Stefan","family":"Nadschl\u00e4ger","sequence":"additional","affiliation":[]},{"given":"Pablo","family":"G\u00f3mez-P\u00e9rez","sequence":"additional","affiliation":[]},{"given":"Christian","family":"Huber","sequence":"additional","affiliation":[]},{"given":"Josef","family":"K\u00fcng","sequence":"additional","affiliation":[]},{"given":"Cong An","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,10,23]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Alabduljalil, M.A., Tang, X., Yang, T.: Optimizing parallel algorithms for all pairs similarity search. In: Proceedings of the 6th ACM WSDM, pp. 203\u2013212. ACM, New York (2013)","DOI":"10.1145\/2433396.2433422"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Baraglia, R., De Francisci, M., Lucchese, C.: Document similarity self-join with MapReduce. In: Proceedings of the 2010 IEEE ICDM, pp. 731\u2013736. IEEE Computer Society, Washington (2010)","DOI":"10.1109\/ICDM.2010.70"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., Srikant, R.: Scaling up all pairs similarity search. In: Proceedings of the 16th WWW, pp. 131\u2013140. ACM, New York (2007)","DOI":"10.1145\/1242572.1242591"},{"key":"4_CR4","volume-title":"Introduction to Algorithms","author":"TH Cormen","year":"2001","unstructured":"Cormen, T.H., Stein, C., Rivest, R.L., Leiserson, C.E.: Introduction to Algorithms. McGraw-Hill Higher Education, New York (2001)"},{"issue":"1","key":"4_CR5","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: MapReduce: simplified data processing on large clusters. Commun. ACM 51(1), 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Deng, D., Li, G., Hao, S., Wang, J., Feng, J.: MassJoin: a MapReduce-based method for scalable string similarity joins. In: 30th IEEE ICDE, pp. 340\u2013351 (2014)","DOI":"10.1109\/ICDE.2014.6816663"},{"issue":"1","key":"4_CR7","first-page":"15","volume":"36","author":"J Dittrich","year":"2013","unstructured":"Dittrich, J., Richter, S., Schuh, S., Quian-Ruiz, J.-A.: Efficient or Hadoop: why not both? IEEE Data Eng. Bull. 36(1), 15\u201323 (2013)","journal-title":"IEEE Data Eng. Bull."},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Drew, J., Hahsler, M.: Strand: fast sequence comparison using MapReduce and locality sensitive hashing. In: Proceedings of the 5th ACM BCB, pp. 506\u2013513. ACM, New York (2014)","DOI":"10.1145\/2649387.2649436"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Elsayed, T., Lin, J., Oard, D.W.: Pairwise document similarity in large collections with MapReduce. In: Proceedings of the 46th ACL-HLT: Short Papers, pp. 265\u2013268. Association for Computational Linguistics, Stroudsburg (2008)","DOI":"10.3115\/1557690.1557767"},{"key":"4_CR10","volume-title":"Fundamentals of Discrete Math for Computer Science: A Problem-Solving Primer","author":"T Jenkyns","year":"2012","unstructured":"Jenkyns, T., Stephenson, B.: Fundamentals of Discrete Math for Computer Science: A Problem-Solving Primer. Springer, London (2012)"},{"key":"4_CR11","series-title":"Sorting and Searching","volume-title":"The Art of Computer Programming","author":"DE Knuth","year":"1998","unstructured":"Knuth, D.E.: The Art of Computer Programming. Sorting and Searching, vol. 3, 2nd edn. Addison Wesley Longman Publishing Co. Inc., Boston (1998)","edition":"2"},{"key":"4_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1007\/978-3-642-20291-9_46","volume-title":"Web Technologies and Applications","author":"R Li","year":"2011","unstructured":"Li, R., Ju, L., Peng, Z., Yu, Z., Wang, C.: Batch text similarity search with MapReduce. In: Du, X., Fan, W., Wang, J., Peng, Z., Sharaf, M.A. (eds.) APWeb 2011. LNCS, vol. 6612, pp. 412\u2013423. Springer, Heidelberg (2011). doi: 10.1007\/978-3-642-20291-9_46"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Lin, J.: Brute force and indexed approaches to pairwise document similarity comparisons with MapReduce. In: Proceedings of the 32nd ACM SIGIR, pp. 155\u2013162. ACM, New York (2009)","DOI":"10.1145\/1571941.1571970"},{"issue":"8","key":"4_CR14","doi-asserted-by":"crossref","first-page":"704","DOI":"10.14778\/2212351.2212353","volume":"5","author":"A Metwally","year":"2012","unstructured":"Metwally, A., Faloutsos, C.: V-SMART-Join: a scalable MapReduce framework for all-pair similarity joins of multisets and vectors. Proc. VLDB Endowment 5(8), 704\u2013715 (2012)","journal-title":"Proc. VLDB Endowment"},{"key":"4_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1007\/978-3-319-10067-8_5","volume-title":"Data Management in Cloud, Grid and P2P Systems","author":"TN Phan","year":"2014","unstructured":"Phan, T.N., K\u00fcng, J., Dang, T.K.: An elastic approximate similarity search in very large datasets with MapReduce. In: Hameurlain, A., Dang, T.K., Morvan, F. (eds.) Globe 2014. LNCS, vol. 8648, pp. 49\u201360. Springer, Heidelberg (2014)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Phan, T.N., J\u00e4ger, M., Nadschl\u00e4ger, S., K\u00fcng, J., Dang, T.K.: An efficient document indexing-based similarity search in large datasets. In: Proceedings of the 2nd FDSE, pp. 16\u201331 (2015)","DOI":"10.1007\/978-3-319-26135-5_2"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Phan, T.N., K\u00fcng, J., Dang, T.K.: eHSim: an efficient hybrid similarity search with MapReduce. In: Proceedings of the 30th IEEE AINA, pp. 422\u2013429. IEEE Computer Society (2016)","DOI":"10.1109\/AINA.2016.37"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Rajaraman, A., Ullman, J.D.: Chapter 3: finding similar items. In: Mining of Massive Datasets, pp. 71\u2013127. Cambridge University Press (2011)","DOI":"10.1017\/CBO9781139058452"},{"issue":"10","key":"4_CR19","first-page":"2217","volume":"25","author":"C Rong","year":"2013","unstructured":"Rong, C., Lu, W., Wang, X., Du, X., Chen, Y., Tung, A.K.H.: Efficient and scalable processing of string similarity join. IEEE TKDE 25(10), 2217\u20132230 (2013)","journal-title":"IEEE TKDE"},{"issue":"5","key":"4_CR20","doi-asserted-by":"crossref","first-page":"430","DOI":"10.14778\/2140436.2140440","volume":"5","author":"V Satuluri","year":"2012","unstructured":"Satuluri, V., Parthasarathy, S.: Bayesian locality sensitive hashing for fast similarity search. Proc. VLDB Endowment 5(5), 430\u2013441 (2012)","journal-title":"Proc. VLDB Endowment"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Theobald, M., Siddharth, J., Paepcke, A.: SpotSigs: robust and efficient near duplicate detection in large web collections. In: Proceedings of the 31st ACM SIGIR, pp. 563\u2013570. ACM, New York (2008)","DOI":"10.1145\/1390334.1390431"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Vernica, R., Carey, M.J., Li, C.: Efficient parallel set-similarity joins using MapReduce. In: Proceedings of the 2010 ACM SIGMOD, pp. 495\u2013506. ACM, New York (2010)","DOI":"10.1145\/1807167.1807222"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, G., Deng, D., Zhang, Y., Feng, J.: Two birds with one stone: an efficient hierarchical framework for top-k and threshold-based string similarity search. In: Gehrke, J., et al. (ed.) 31st IEEE ICDE, pp. 519\u2013530 (2015)","DOI":"10.1109\/ICDE.2015.7113311"},{"issue":"3","key":"4_CR24","doi-asserted-by":"crossref","first-page":"15:1","DOI":"10.1145\/2000824.2000825","volume":"36","author":"C Xiao","year":"2011","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X., Wang, G.: Efficient similarity joins for near-duplicate detection. ACM TODS 36(3), 15:1\u201315:41 (2011)","journal-title":"ACM TODS"},{"issue":"1","key":"4_CR25","first-page":"1605","volume":"14","author":"RB Zadeh","year":"2013","unstructured":"Zadeh, R.B., Goel, A.: Dimension independent similarity computation. J. Mach. Learn. Res. 14(1), 1605\u20131626 (2013)","journal-title":"J. Mach. Learn. Res."},{"key":"4_CR26","volume-title":"Similarity Search: The Metric Space Approach","author":"P Zezula","year":"2010","unstructured":"Zezula, P., Amato, G., Dohnal, V., Batko, M.: Similarity Search: The Metric Space Approach. Springer, New York (2010)"},{"key":"4_CR27","unstructured":"Zhang, D., Yang, G., Hu, Y., Jin, Z., Cai, D., He, X.: A unified approximate nearest neighbor search scheme by combining data structure and hashing. In: Proceedings of the 23rd IJCAI, pp. 681\u2013687. AAAI Press (2013)"}],"container-title":["Lecture Notes in Computer Science","Future Data and Security Engineering"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-48057-2_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,11]],"date-time":"2022-07-11T00:10:52Z","timestamp":1657498252000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-48057-2_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319480565","9783319480572"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-48057-2_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}