{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T05:24:38Z","timestamp":1742966678948,"version":"3.40.3"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319232003"},{"type":"electronic","value":"9783319232010"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-23201-0_19","type":"book-chapter","created":{"date-parts":[[2015,8,27]],"date-time":"2015-08-27T11:57:35Z","timestamp":1440676655000},"page":"165-174","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Cross-Checking Data Sources in MapReduce"],"prefix":"10.1007","author":[{"given":"Foto","family":"Afrati","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zaid","family":"Momani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nikos","family":"Stasinopoulos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,8,28]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Dong, X.L., Gabrilovich, E., Heitz, G., Horn, W., Murphy, K., Sun, S., Zhang, W.: From data fusion to knowledge fusion. Proceedings of the VLDB Endowment 7(10) (2014)","DOI":"10.14778\/2732951.2732962"},{"issue":"1","key":"19_CR2","doi-asserted-by":"publisher","first-page":"550","DOI":"10.14778\/1687627.1687690","volume":"2","author":"XL Dong","year":"2009","unstructured":"Dong, X.L., Berti-Equille, L., Srivastava, D.: Integrating conflicting data: the role of source dependence. Proceedings of the VLDB Endowment 2(1), 550\u2013561 (2009)","journal-title":"Proceedings of the VLDB Endowment"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Afrati, F.N., Ullman, J.D.: Optimizing joins in a map-reduce environment. In: Proceedings of the 13th International Conference on Extending Database Technology, pp. 99\u2013110. ACM (2010)","DOI":"10.1145\/1739041.1739056"},{"issue":"440","key":"19_CR4","doi-asserted-by":"publisher","first-page":"1268","DOI":"10.1080\/01621459.1997.10473648","volume":"92","author":"FT Juster","year":"1997","unstructured":"Juster, F.T., Smith, J.P.: Improving the quality of economic data: Lessons from the hrs and ahead. Journal of the American Statistical Association 92(440), 1268\u20131278 (1997)","journal-title":"Journal of the American Statistical Association"},{"key":"19_CR5","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1146\/annurev.psych.58.110405.085530","volume":"60","author":"JW Graham","year":"2009","unstructured":"Graham, J.W.: Missing data analysis: Making it work in the real world. Annual Review of Psychology 60, 549\u2013576 (2009)","journal-title":"Annual Review of Psychology"},{"issue":"4","key":"19_CR6","doi-asserted-by":"publisher","first-page":"1012","DOI":"10.1111\/j.1741-3737.2005.00191.x","volume":"67","author":"AC Acock","year":"2005","unstructured":"Acock, A.C.: Working with missing values. Journal of Marriage and Family 67(4), 1012\u20131028 (2005)","journal-title":"Journal of Marriage and Family"},{"key":"19_CR7","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1007\/3-540-45554-X_46","volume-title":"Rough Sets and Current Trends in Computing","author":"JW Grzyma\u0142a-Busse","year":"2001","unstructured":"Grzyma\u0142a-Busse, J.W., Hu, M.: A comparison of several approaches to missing attribute values in data mining. In: Ziarko, W.P., Yao, Y. (eds.) RSCTC 2000. LNCS (LNAI), vol. 2005, pp. 378\u2013385. Springer, Heidelberg (2001)"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Padmanabhan, B., Zheng, Z., Kimbrough, S.O.: Personalization from incomplete data: what you don\u2019t know can hurt. In: Proceedings of the Seventh ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 154\u2013163. ACM (2001)","DOI":"10.1145\/502512.502535"},{"issue":"01","key":"19_CR9","first-page":"2007","volume":"15","author":"M Magnani","year":"2004","unstructured":"Magnani, M.: Techniques for dealing with missing data in knowledge discovery tasks. Obtido 15(01), 2007 (2004). http:\/\/magnanim.web.cs.unibo.it\/index.html","journal-title":"Obtido"},{"issue":"2","key":"19_CR10","doi-asserted-by":"publisher","first-page":"97","DOI":"10.14778\/2535568.2448943","volume":"6","author":"X Li","year":"2012","unstructured":"Li, X., Dong, X.L., Lyons, K., Meng, W., Srivastava, D.: Truth finding on the deep web: is the problem solved? Proceedings of the VLDB Endowment 6(2), 97\u2013108 (2012)","journal-title":"Proceedings of the VLDB Endowment"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Dempster, A.P., Laird, N.M., Rubin, D.B.: Maximum likelihood from incomplete data via the em algorithm. Journal of the Royal Statistical Society. Series B (methodological), 1\u201338 (1977)","DOI":"10.1111\/j.2517-6161.1977.tb01600.x"},{"issue":"12","key":"19_CR12","first-page":"1131","volume":"7","author":"FN Afrati","year":"2014","unstructured":"Afrati, F.N., Delorey, D., Pasumansky, M., Ullman, J.D.: Storing and querying tree-structured records in dremel. PVLDB 7(12), 1131\u20131142 (2014)","journal-title":"PVLDB"},{"issue":"6","key":"19_CR13","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1145\/1953122.1953148","volume":"54","author":"S Melnik","year":"2011","unstructured":"Melnik, S., Gubarev, A., Long, J.J., Romer, G., Shivakumar, S., Tolton, M., Vassilakis, T.: Dremel: interactive analysis of web-scale datasets. Commun. ACM 54(6), 114\u2013123 (2011)","journal-title":"Commun. ACM"},{"issue":"1","key":"19_CR14","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: Mapreduce: simplified data processing on large clusters. Communications of the ACM 51(1), 107\u2013113 (2008)","journal-title":"Communications of the ACM"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Vernica, R., Carey, M.J., Li, C.: Efficient parallel set-similarity joins using mapreduce. In: Proceedings of the 2010 ACM SIGMOD International Conference on Management of Data, pp. 495\u2013506. ACM (2010)","DOI":"10.1145\/1807167.1807222"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Kolb, L., Thor, A., Rahm, E.: Load balancing for mapreduce-based entity resolution. In: 2012 IEEE 28th International Conference on Data Engineering (ICDE), pp. 618\u2013629. IEEE (2012)","DOI":"10.1109\/ICDE.2012.22"},{"key":"19_CR17","unstructured":"McNeill, N., Kardes, H., Borthwick, A.: Dynamic record blocking: efficient linking of massive databases in mapreduce. In: Proceedings of the 10th International Workshop on Quality in Databases (QDB) (2012)"},{"key":"19_CR18","unstructured":"Mestre, D.G., Pires, C.E.: An adaptive blocking approach for entity matching with mapreduce"},{"issue":"1","key":"19_CR19","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1007\/s13222-012-0110-x","volume":"13","author":"L Kolb","year":"2013","unstructured":"Kolb, L., Rahm, E.: Parallel entity resolution with dedoop. Datenbank-Spektrum 13(1), 23\u201332 (2013)","journal-title":"Datenbank-Spektrum"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Kolb, L., Thor, A., Rahm, E.: Don\u2019t match twice: redundancy-free similarity computation with mapreduce. In: Proceedings of the Second Workshop on Data Analytics in the Cloud, pp. 1\u20135. ACM (2013)","DOI":"10.1145\/2486767.2486768"},{"key":"19_CR21","unstructured":"U.S. General Services Administration: U.S. government\u2018s open data (2013). http:\/\/www.data.gov\/ (accessed June 19, 2015)"}],"container-title":["Communications in Computer and Information Science","New Trends in Databases and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-23201-0_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,21]],"date-time":"2023-02-21T06:28:05Z","timestamp":1676960885000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-23201-0_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319232003","9783319232010"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-23201-0_19","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"28 August 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}