{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,5]],"date-time":"2025-06-05T04:14:39Z","timestamp":1749096879231,"version":"3.41.0"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2016,8,9]],"date-time":"2016-08-09T00:00:00Z","timestamp":1470700800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2017,10]]},"DOI":"10.1007\/s11704-016-5346-4","type":"journal-article","created":{"date-parts":[[2016,8,9]],"date-time":"2016-08-09T11:01:02Z","timestamp":1470740462000},"page":"895-911","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["MapReduce-based entity matching with multiple blocking functions"],"prefix":"10.1007","volume":"11","author":[{"given":"Cheqing","family":"Jin","sequence":"first","affiliation":[]},{"given":"Jie","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Huiping","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,8,9]]},"reference":[{"issue":"1","key":"5346_CR1","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1007\/s00778-008-0098-x","volume":"18","author":"O Benjelloun","year":"2009","unstructured":"Benjelloun O, Garcia-Molina H, Menestrina D, Su Q, Whang S E, Widom J. Swoosh: a generic approach to entity resolution. The VLDB Journal\u2014The International Journal on Very Large Data Bases, 2009, 18(1): 255\u2013276","journal-title":"The VLDB Journal\u2014The International Journal on Very Large Data Bases"},{"key":"5346_CR2","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1145\/956750.956759","volume-title":"Proceedings of the 9th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining.","author":"M Bilenko","year":"2003","unstructured":"Bilenko M, Mooney R J. Adadptive duplicate detection using learnable string similarity measures. In: Proceedings of the 9th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2003, 39\u201348"},{"issue":"1\u20132","key":"5346_CR3","doi-asserted-by":"crossref","first-page":"417","DOI":"10.14778\/1920841.1920897","volume":"3","author":"S T Guo","year":"2010","unstructured":"Guo S T, Dong X L, Srivastava D, Zajac R. Record linkage with uniqueness constraints and erroneous values. Proceedings of the VLDB Endowment, 2010, 3(1\u20132): 417\u2013428","journal-title":"Proceedings of the VLDB Endowment"},{"issue":"11","key":"5346_CR4","doi-asserted-by":"crossref","first-page":"956","DOI":"10.14778\/3402707.3402733","volume":"4","author":"P Li","year":"2011","unstructured":"Li P, Dong X L, Maurino A, Srivastava D. Linkingtemporal records. Proceedings of the VLDB Endowment, 2011, 4(11): 956\u2013967","journal-title":"Proceedings of the VLDB Endowment"},{"issue":"4","key":"5346_CR5","doi-asserted-by":"crossref","first-page":"208","DOI":"10.14778\/1938545.1938546","volume":"4","author":"V Rastogi","year":"2011","unstructured":"Rastogi V, Dalvi N, Garofalakis M. Large-scale collective entity matching. Proceedings of the VLDB Endowment, 2011, 4(4): 208\u2013218","journal-title":"Proceedings of the VLDB Endowment"},{"key":"5346_CR6","first-page":"87","volume-title":"Proceedings of the 6th IEEE International Conference on Data Mining.","author":"M Bilenko","year":"2006","unstructured":"Bilenko M, Kamath B, Mooney R J. Adaptive blocking: learning to scale up record linkage. In: Proceedings of the 6th IEEE International Conference on Data Mining. 2006, 87\u201396"},{"issue":"9","key":"5346_CR7","doi-asserted-by":"crossref","first-page":"1537","DOI":"10.1109\/TKDE.2011.127","volume":"24","author":"P Christen","year":"2012","unstructured":"Christen P. A survey of indexing techniques for scalable record linkage and deduplication. IEEE Transactions on Knowledge and Data Engineering, 2012, 24(9): 1537\u20131555","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"2","key":"5346_CR8","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1145\/1921632.1921635","volume":"5","author":"T Vries De","year":"2011","unstructured":"De Vries T, Ke H, Chawla S, Christen P. Robust record linkage blocking using suffix arrays and bloom filters. ACM Transactions on Knowledge Discovery from Data, 2011, 5(2): 9","journal-title":"ACM Transactions on Knowledge Discovery from Data"},{"key":"5346_CR9","first-page":"440","volume-title":"Proceedings of the National Conference on Artificial Intelligence.","author":"M Michelson","year":"2006","unstructured":"Michelson M, Knoblock C A. Learning blocking schemes for record linkage. In: Proceedings of the National Conference on Artificial Intelligence. 2006, 440\u2013445"},{"issue":"328","key":"5346_CR10","doi-asserted-by":"crossref","first-page":"1183","DOI":"10.1080\/01621459.1969.10501049","volume":"64","author":"I P Fellegi","year":"1969","unstructured":"Fellegi I P, Sunter A B. A theory for record linkage. Journal of the American Statistical Association, 1969, 64(328): 1183\u20131210","journal-title":"Journal of the American Statistical Association"},{"issue":"2","key":"5346_CR11","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1145\/568271.223807","volume":"24","author":"M A Hern\u00e1ndez","year":"1995","unstructured":"Hern\u00e1ndez M A, Stolfo S J. The merge\/purge problem for large databases. ACM SIGMOD Record, 1995, 24(2): 127\u2013138","journal-title":"ACM SIGMOD Record"},{"issue":"6","key":"5346_CR12","first-page":"518","volume":"99","author":"A Gionis","year":"1999","unstructured":"Gionis A, Indyk P, Motwani R. Similarity search in high dimensions via hashing. The VLDB Journal \u2014 The International Journal on Very Large Data Bases, 1999, 99(6): 518\u2013529","journal-title":"The VLDB Journal \u2014 The International Journal on Very Large Data Bases"},{"key":"5346_CR13","first-page":"604","volume-title":"Proceedings of the 30th Annual ACM Symposium on Theory of Computing.","author":"P Indyk","year":"1998","unstructured":"Indyk P, Motwani R. Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of the 30th Annual ACM Symposium on Theory of Computing. 1998, 604\u2013613"},{"issue":"1","key":"5346_CR14","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1007\/s00450-011-0177-x","volume":"27","author":"L Kolb","year":"2012","unstructured":"Kolb L, Thor A, Rahm E. Multi-pass sorted neighborhood blocking with MapReduce. Computer Science-Research and Development, 2012, 27(1): 45\u201363","journal-title":"Computer Science-Research and Development"},{"key":"5346_CR15","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1145\/1559845.1559870","volume-title":"Proceedings of the 2009 ACM SIGMOD International Conference on Management of Data.","author":"S E Whang","year":"2009","unstructured":"Whang S E, Menestrina D, Koutrika G, Theobald M, Garcia-Molina H. Entity resolution with iterative blocking. In: Proceedings of the 2009 ACM SIGMOD International Conference on Management of Data. 2009, 219\u2013232"},{"key":"5346_CR16","first-page":"618","volume-title":"Proceedings of the 28th IEEE International Conference on Data Engineering.","author":"L Kolb","year":"2012","unstructured":"Kolb L, Thor A, Rahm E. Load balancing for MapReduce-based entity resolution. In: Proceedings of the 28th IEEE International Conference on Data Engineering. 2012, 618\u2013629"},{"issue":"1\u20132","key":"5346_CR17","doi-asserted-by":"crossref","first-page":"484","DOI":"10.14778\/1920841.1920904","volume":"3","author":"H K\u00f6pcke","year":"2010","unstructured":"K\u00f6pcke H, Thor A, Rahm E. Evaluation of entity resolution approaches on real-world match problems. Proceedings of the VLDB Endowment, 2010, 3(1\u20132): 484\u2013493","journal-title":"Proceedings of the VLDB Endowment"},{"key":"5346_CR18","first-page":"1","volume-title":"Proceedings of the 2nd Workshop on Data Analytics in the Cloud.","author":"L Kolb","year":"2013","unstructured":"Kolb L, Thor A, Rahm E. Don\u2019t match twice:redundancy-free similarity computation with MapReduce. In: Proceedings of the 2nd Workshop on Data Analytics in the Cloud. 2013, 1\u20135"},{"issue":"1","key":"5346_CR19","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1007\/s13222-012-0110-x","volume":"13","author":"L Kolb","year":"2013","unstructured":"Kolb L, Rahm E. Parallel entity resolution with dedoop. Datenbank-Spektrum, 2013, 13(1): 23\u201332","journal-title":"Datenbank-Spektrum"},{"issue":"1","key":"5346_CR20","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean J, Ghemawat S. MapReduce: simplified data processing on large clusters. Communications of the ACM, 2008, 51(1): 107\u2013113","journal-title":"Communications of the ACM"},{"key":"5346_CR21","volume-title":"Hadoop: The Definitive Guide","author":"T White","year":"2012","unstructured":"White T. Hadoop: The Definitive Guide. 3rd ed. O\u2019Reilly Media, Inc., 2012","edition":"3"},{"issue":"5","key":"5346_CR22","doi-asserted-by":"crossref","first-page":"604","DOI":"10.1109\/TNET.2002.803864","volume":"10","author":"M Mitzenmacher","year":"2002","unstructured":"Mitzenmacher M. Compressed bloom filters. IEEE\/ACM Transactions on Networking, 2002, 10(5): 604\u2013612","journal-title":"IEEE\/ACM Transactions on Networking"},{"key":"5346_CR23","doi-asserted-by":"crossref","first-page":"495","DOI":"10.1145\/1807167.1807222","volume-title":"Proceedings of the 2010 ACM SIGMOD International Conference on Management of Data.","author":"R Vernica","year":"2010","unstructured":"Vernica R, CareyMJ, Li C. Efficient parallel set-similarity joins using MapReduce. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of Data. 2010, 495\u2013506"},{"key":"5346_CR24","first-page":"25","volume":"3","author":"R Baxter","year":"2003","unstructured":"Baxter R, Christen P, Churches T. A comparison of fast blocking methods for record linkage. ACM SIGKDD, 2003, 3: 25\u201327","journal-title":"ACM SIGKDD"},{"key":"5346_CR25","first-page":"475","volume-title":"Proceedings of the 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining.","author":"W W Cohen","year":"2002","unstructured":"Cohen W W, Richman J. Learning to match and cluster large highdimensional data sets for data integration. In: Proceedings of the 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2002, 475\u2013480"},{"key":"5346_CR26","first-page":"137","volume-title":"Proceedings of the 8th International Conference on Database Systems for Advanced Applications.","author":"L Jin","year":"2003","unstructured":"Jin L, Li C, Mehrotra S. Efficient record linkage in large data sets. In: Proceedings of the 8th International Conference on Database Systems for Advanced Applications. 2003, 137\u2013146"},{"issue":"1","key":"5346_CR27","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1007\/s11704-013-3158-3","volume":"8","author":"Y B He","year":"2014","unstructured":"He Y B, Tan H Y, Luo WM, Feng S Z, Fan J P. MR-DBSCAN: a scalable MapReduce-based DBSCAN algorithm for heavily skewed data. Frontiers of Computer Science, 2014, 8(1): 83\u201399","journal-title":"Frontiers of Computer Science"},{"issue":"12","key":"5346_CR28","doi-asserted-by":"crossref","first-page":"1059","DOI":"10.14778\/2732977.2732981","volume":"7","author":"A Sarma Das","year":"2014","unstructured":"Das Sarma A, He Y Y, Chaudhuri S. Clusterjoin: a similarity joins framework using map-reduce. Proceedings of the VLDB Endowment, 2014, 7(12): 1059\u20131070","journal-title":"Proceedings of the VLDB Endowment"},{"key":"5346_CR29","first-page":"340","volume-title":"proceedings of the 30th IEEE International Conference on Data Engineering.","author":"D Deng","year":"2014","unstructured":"Deng D, Li G L, Hao S, Wang J N, Feng J H. Massjoin: a MapReducebased method for scalable string similarity joins. In: proceedings of the 30th IEEE International Conference on Data Engineering. 2014, 340\u2013351"},{"key":"5346_CR30","first-page":"510","volume-title":"Proceedings of the 28th IEEE International Conference on Data Engineering.","author":"Y Kim","year":"2012","unstructured":"Kim Y, Shim K. Parallel top-k similarity join algorithms using MapReduce. In: Proceedings of the 28th IEEE International Conference on Data Engineering. 2012, 510\u2013521"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11704-016-5346-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-016-5346-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-016-5346-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-016-5346-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T16:27:27Z","timestamp":1749054447000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11704-016-5346-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,8,9]]},"references-count":30,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2017,10]]}},"alternative-id":["5346"],"URL":"https:\/\/doi.org\/10.1007\/s11704-016-5346-4","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"type":"print","value":"2095-2228"},{"type":"electronic","value":"2095-2236"}],"subject":[],"published":{"date-parts":[[2016,8,9]]}}}