{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T23:34:09Z","timestamp":1743118449597,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319252544"},{"type":"electronic","value":"9783319252551"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-25255-1_42","type":"book-chapter","created":{"date-parts":[[2015,9,24]],"date-time":"2015-09-24T05:39:46Z","timestamp":1443073186000},"page":"509-521","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Random-Based Algorithm for Efficient Entity Matching"],"prefix":"10.1007","author":[{"given":"Pingfu","family":"Chao","sequence":"first","affiliation":[]},{"given":"Zhu","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Yuming","family":"Li","sequence":"additional","affiliation":[]},{"given":"Junhua","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Rong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Aoying","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,11,13]]},"reference":[{"key":"42_CR1","doi-asserted-by":"crossref","unstructured":"Baraglia, R., De Francisci Morales, G., Lucchese, C.: Document similarity self-join with mapreduce. In: 2010 IEEE 10th International Conference on Data Mining (ICDM), pp. 731\u2013736. IEEE (2010)","DOI":"10.1109\/ICDM.2010.70"},{"key":"42_CR2","doi-asserted-by":"crossref","unstructured":"Charikar, M.S.: Similarity estimation techniques from rounding algorithms. In: Proceedings of the Thiry-Fourth Annual ACM symposium on Theory of Computing, pp. 380\u2013388. ACM (2002)","DOI":"10.1145\/509907.509965"},{"issue":"1","key":"42_CR3","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J. Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: Mapreduce: simplified data processing on large clusters. Communications of the ACM\u00a051(1), 107\u2013113 (2008)","journal-title":"Communications of the ACM"},{"issue":"328","key":"42_CR4","doi-asserted-by":"publisher","first-page":"1183","DOI":"10.1080\/01621459.1969.10501049","volume":"64","author":"I.P. Fellegi","year":"1969","unstructured":"Fellegi, I.P., Sunter, A.B.: A theory for record linkage. Journal of the American Statistical Association\u00a064(328), 1183\u20131210 (1969)","journal-title":"Journal of the American Statistical Association"},{"issue":"6","key":"42_CR5","doi-asserted-by":"publisher","first-page":"1115","DOI":"10.1145\/227683.227684","volume":"42","author":"M.X. Goemans","year":"1995","unstructured":"Goemans, M.X., Williamson, D.P.: Improved approximation algorithms for maximum cut and satisfiability problems using semidefinite programming. Journal of the ACM (JACM)\u00a042(6), 1115\u20131145 (1995)","journal-title":"Journal of the ACM (JACM)"},{"key":"42_CR6","doi-asserted-by":"crossref","unstructured":"Indyk, P., Motwani, R.: Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of the Thirtieth Annual ACM Symposium on Theory of Computing, pp. 604\u2013613. ACM (1998)","DOI":"10.1145\/276698.276876"},{"key":"42_CR7","doi-asserted-by":"crossref","unstructured":"Kiefer, T., Volk, P.B., Lehner, W.: Pairwise element computation with mapreduce. In: Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing, pp. 826\u2013833. ACM (2010)","DOI":"10.1145\/1851476.1851595"},{"key":"42_CR8","doi-asserted-by":"crossref","unstructured":"Kim, Y., Shim, K.: Parallel top-k similarity join algorithms using mapreduce. In: 2012 IEEE 28th International Conference on Data Engineering (ICDE), pp. 510\u2013521. IEEE (2012)","DOI":"10.1109\/ICDE.2012.87"},{"key":"42_CR9","doi-asserted-by":"crossref","unstructured":"Kolb, L., Thor, A., Rahm, E.: Parallel sorted neighborhood blocking with mapreduce. arXiv preprint arXiv:1010.3053 (2010)","DOI":"10.1007\/s00450-011-0177-x"},{"issue":"12","key":"42_CR10","doi-asserted-by":"publisher","first-page":"1878","DOI":"10.14778\/2367502.2367527","volume":"5","author":"L. Kolb","year":"2012","unstructured":"Kolb, L., Thor, A., Rahm, E.: Dedoop: efficient deduplication with hadoop. Proceedings of the VLDB Endowment\u00a05(12), 1878\u20131881 (2012)","journal-title":"Proceedings of the VLDB Endowment"},{"key":"42_CR11","doi-asserted-by":"crossref","unstructured":"Kolb, L., Thor, A., Rahm, E.: Load balancing for mapreduce-based entity resolution. In: 2012 IEEE 28th International Conference on Data Engineering (ICDE), pp. 618\u2013629. IEEE (2012)","DOI":"10.1109\/ICDE.2012.22"},{"issue":"1","key":"42_CR12","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/s00450-011-0177-x","volume":"27","author":"L. Kolb","year":"2012","unstructured":"Kolb, L., Thor, A., Rahm, E.: Multi-pass sorted neighborhood blocking with mapreduce. Computer Science-Research and Development\u00a027(1), 45\u201363 (2012)","journal-title":"Computer Science-Research and Development"},{"key":"42_CR13","doi-asserted-by":"crossref","unstructured":"Kolb, L., Thor, A., Rahm, E.: Don\u2019t match twice: redundancy-free similarity computation with mapreduce. In: Proceedings of the Second Workshop on Data Analytics in the Cloud, pp. 1\u20135. ACM (2013)","DOI":"10.1145\/2486767.2486768"},{"issue":"10","key":"42_CR14","doi-asserted-by":"publisher","first-page":"1016","DOI":"10.14778\/2336664.2336674","volume":"5","author":"W. Lu","year":"2012","unstructured":"Lu, W., Shen, Y., Chen, S., Ooi, B.C.: Efficient processing of k nearest neighbor joins using mapreduce. Proceedings of the VLDB Endowment\u00a05(10), 1016\u20131027 (2012)","journal-title":"Proceedings of the VLDB Endowment"},{"key":"42_CR15","doi-asserted-by":"crossref","unstructured":"Newcombe, H., Kennedy, J., Axford, S., James, A.: Automatic linkage of vital records (1959)","DOI":"10.1126\/science.130.3381.954"},{"key":"42_CR16","doi-asserted-by":"crossref","unstructured":"Ravichandran, D., Pantel, P., Hovy, E.: Randomized algorithms and nlp: using locality sensitive hash function for high speed noun clustering. In: Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics, pp. 622\u2013629. Association for Computational Linguistics (2005)","DOI":"10.3115\/1219840.1219917"},{"key":"42_CR17","doi-asserted-by":"crossref","unstructured":"Toutanova, K., Klein, D., Manning, C.D., Singer, Y.: Feature-rich part-of-speech tagging with a cyclic dependency network. In: Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology, vol.\u00a01, pp. 173\u2013180. Association for Computational Linguistics (2003)","DOI":"10.3115\/1073445.1073478"},{"key":"42_CR18","doi-asserted-by":"crossref","unstructured":"Toutanova, K., Manning, C.D.: Enriching the knowledge sources used in a maximum entropy part-of-speech tagger. In: Proceedings of the 2000 Joint SIGDAT Conference on Empirical Methods in Natural Language Processing and Very Large Corpora: Held in Conjunction with the 38th Annual Meeting of the Association for Computational Linguistics, vol.\u00a013, pp. 63\u201370. Association for Computational Linguistics (2000)","DOI":"10.3115\/1117794.1117802"},{"key":"42_CR19","doi-asserted-by":"crossref","unstructured":"Vernica, R., Carey, M.J., Li, C.: Efficient parallel set-similarity joins using mapreduce. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of data, pp. 495\u2013506. ACM (2010)","DOI":"10.1145\/1807167.1807222"},{"issue":"1","key":"42_CR20","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1145\/281250.281256","volume":"32","author":"J. Zobel","year":"1998","unstructured":"Zobel, J., Moffat, A.: Exploring the similarity space. SIGIR Forum\u00a032(1), 18\u201334 (1998)","journal-title":"SIGIR Forum"}],"container-title":["Lecture Notes in Computer Science","Web Technologies and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-25255-1_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,22]],"date-time":"2022-05-22T05:40:14Z","timestamp":1653198014000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-25255-1_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319252544","9783319252551"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-25255-1_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"13 November 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}