{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:29:16Z","timestamp":1763458156803,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,6,14]],"date-time":"2017-06-14T00:00:00Z","timestamp":1497398400000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF IIS","award":["1514491","1453171"],"award-info":[{"award-number":["1514491","1453171"]}]},{"name":"Air Force","award":["FA9550-15-1-0144"],"award-info":[{"award-number":["FA9550-15-1-0144"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,6,14]]},"DOI":"10.1145\/2882903.2882909","type":"proceedings-article","created":{"date-parts":[[2016,6,16]],"date-time":"2016-06-16T15:01:52Z","timestamp":1466089312000},"page":"861-876","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Estimating the Impact of Unknown Unknowns on Aggregate Query Results"],"prefix":"10.1145","author":[{"given":"Yeounoh","family":"Chung","sequence":"first","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael Lind","family":"Mortensen","sequence":"additional","affiliation":[{"name":"Aarhus University, Aarhus, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Carsten","family":"Binnig","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Kraska","sequence":"additional","affiliation":[{"name":"Brown University, Providence, RI, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2016,6,14]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1","volume-title":"SAS global forum","author":"Allison P. D.","year":"2012","unstructured":"P. D. Allison. Handling missing data by maximum likelihood. In SAS global forum, pages 1--21, 2012."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807341"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1993.10594330"},{"key":"e_1_3_2_1_4_1","volume-title":"Estimation of the Size of a Closed Population when Capture Probabilities vary Among Animals. Biometrika, 65(3)","author":"Burnham K. P.","year":"1978","unstructured":"K. P. Burnham and W. S. Overton. Estimation of the Size of a Closed Population when Capture Probabilities vary Among Animals. Biometrika, 65(3), 1978."},{"key":"e_1_3_2_1_5_1","volume-title":"Nonparametric Estimation of the Number of Classes in a Population. SJS, 11(4)","author":"Chao A.","year":"1984","unstructured":"A. Chao. Nonparametric Estimation of the Number of Classes in a Population. SJS, 11(4), 1984."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1002\/0471667196.ess5051"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1992.10475194"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/335168.335230"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2000.10474263"},{"key":"e_1_3_2_1_10_1","series-title":"Series B (methodological)","first-page":"1","volume-title":"Maximum likelihood from incomplete data via the em algorithm. Journal of the royal statistical society","author":"Dempster A. P.","year":"1977","unstructured":"A. P. Dempster, N. M. Laird, and D. B. Rubin. Maximum likelihood from incomplete data via the em algorithm. Journal of the royal statistical society. Series B (methodological), pages 1--38, 1977."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1924421.1924442"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/645923.673654"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1989323.1989331"},{"key":"e_1_3_2_1_14_1","volume-title":"The Population Frequencies of Species and the Estimation of Population Parameters. Biometrika, 40(3\/4)","author":"Good I. J.","year":"1953","unstructured":"I. J. Good. The Population Frequencies of Species and the Estimation of Population Parameters. Biometrika, 40(3\/4), 1953."},{"volume-title":"https:\/\/www.freebase.com","year":"2015","key":"e_1_3_2_1_15_1","unstructured":"Google. Freebase. https:\/\/www.freebase.com, 2015. Accessed: 2015-07-08."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v1i1.13112"},{"key":"e_1_3_2_1_17_1","volume-title":"IBM","author":"Haas P. J.","year":"1996","unstructured":"P. J. Haas. Hoeffding Inequalities for Join Selectivity Estimation and Online Aggregation. IBM, 1996."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/645921.673295"},{"key":"e_1_3_2_1_19_1","volume-title":"CIDR","author":"Halevy A. Y.","year":"2013","unstructured":"A. Y. Halevy. Data publishing and sharing using fusion tables. In CIDR, 2013."},{"key":"e_1_3_2_1_20_1","volume-title":"Survey sampling","author":"Kish L.","year":"1965","unstructured":"L. Kish. Survey sampling. John Wiley and Sons, 1965."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2612176"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-1834-5_8"},{"key":"e_1_3_2_1_23_1","volume-title":"Houston","author":"Lexa M.","year":"2004","unstructured":"M. Lexa. Useful facts about the kullback-leibler discrimination distance. Houston, Texas, 2004."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.14778\/2535568.2448943"},{"key":"e_1_3_2_1_25_1","volume-title":"Estimation Methods for the Size of Deep Web Textural Data Source: A Survey. cs.uwindsor.ca\/richard\/cs510\/survey_jie_liang.pdf","author":"Liang J.","year":"2008","unstructured":"J. Liang. Estimation Methods for the Size of Deep Web Textural Data Source: A Survey. cs.uwindsor.ca\/richard\/cs510\/survey_jie_liang.pdf, 2008."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-009-9107-y"},{"key":"e_1_3_2_1_27_1","volume-title":"InterStat","author":"Lynch R.","year":"2010","unstructured":"R. Lynch and B. Kim. Sample size, the margin of error and the coefficient of variation. InterStat, 2010."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1805286.1805291"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1989323.1989486"},{"key":"e_1_3_2_1_30_1","first-page":"211","volume-title":"CIDR 2011, Fifth Biennial Conference on Innovative Data Systems Research, Asilomar, CA, USA, January 9--12, 2011, Online Proceedings","author":"Marcus A.","year":"2011","unstructured":"A. Marcus, E. Wu, S. Madden, and R. C. Miller. Crowdsourced databases: Query processing with people. In CIDR 2011, Fifth Biennial Conference on Innovative Data Systems Research, Asilomar, CA, USA, January 9--12, 2011, Online Proceedings, pages 211--214, 2011."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/648299.755182"},{"key":"e_1_3_2_1_32_1","volume-title":"Pearson","author":"McClave J.","year":"2013","unstructured":"J. McClave and T. Sincich. Statistics. Pearson, 2013."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/846218.847280"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2003.12.005"},{"key":"e_1_3_2_1_35_1","volume-title":"In Proceedings of European Conference on Information Systems","author":"Neiling M. T.","year":"2000","unstructured":"M. T. Neiling and H.-J. Lenz. Data integration by means of object identification in information systems. In In Proceedings of European Conference on Information Systems, 2000."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/645913.671474"},{"key":"e_1_3_2_1_37_1","volume-title":"Best practices in data cleaning: A complete guide to everything you need to do before and after collecting your data","author":"Osborne J. W.","year":"2012","unstructured":"J. W. Osborne. Best practices in data cleaning: A complete guide to everything you need to do before and after collecting your data. Sage, 2012."},{"key":"e_1_3_2_1_38_1","volume-title":"Algorithms and Databases. In Proc. of CIDR","author":"Parameswaran A.","year":"2011","unstructured":"A. Parameswaran and N. Polyzotis. Answering Queries using Humans, Algorithms and Databases. In Proc. of CIDR, 2011."},{"volume-title":"How u.s. tech-sector jobs have grown, changed in 15 years","author":"Pew Research Center","key":"e_1_3_2_1_39_1","unstructured":"Pew Research Center. How u.s. tech-sector jobs have grown, changed in 15 years. http:\/\/pewrsr.ch\/PtqZDA, 2014. Accessed: 2015-07-08."},{"issue":"4","key":"e_1_3_2_1_40_1","first-page":"3","article-title":"Problems and current approaches","volume":"23","author":"Rahm E.","year":"2000","unstructured":"E. Rahm and H. H. Do. Data cleaning: Problems and current approaches. IEEE Data Eng. Bull., 23(4):3--13, 2000.","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_2_1_41_1","unstructured":"S. Razniewski F. Korn W. Nutt and D. Srivastava. Identifying the extent of completeness of query answers over partially complete databases."},{"key":"e_1_3_2_1_42_1","volume-title":"Cengage Learning","author":"Rice J.","year":"2006","unstructured":"J. Rice. Mathematical statistics and data analysis. Cengage Learning, 2006."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/63.3.581"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2014.6816764"},{"key":"e_1_3_2_1_45_1","volume-title":"Survey Research","author":"Sapsford R.","year":"1999","unstructured":"R. Sapsford. Survey Research. SAGE Publications, 1999."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2013.6544865"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1046\/j.1365-2656.2003.00748.x"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1993636.1993727"},{"volume-title":"https:\/\/en.wikipedia.org\/wiki\/68--95--99.7_rule","year":"2015","key":"e_1_3_2_1_49_1","unstructured":"Wikipedia. 68-95-99.7 rule. https:\/\/en.wikipedia.org\/wiki\/68--95--99.7_rule, 2015. Accessed: 2015-07-08."},{"volume-title":"List of u.s. states by gdp. https:\/\/en.wikipedia.org\/wiki\/List_of_U.S._states_by_GDP","year":"2015","key":"e_1_3_2_1_50_1","unstructured":"Wikipedia. List of u.s. states by gdp. https:\/\/en.wikipedia.org\/wiki\/List_of_U.S._states_by_GDP, 2015. Accessed: 2015-07-08."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1814433.1814443"},{"key":"e_1_3_2_1_52_1","volume-title":"Multiple imputation for missing data: Concepts and new development (version 9.0)","author":"Yuan Y. C.","year":"2010","unstructured":"Y. C. Yuan. Multiple imputation for missing data: Concepts and new development (version 9.0). SAS Institute Inc, Rockville, MD, 2010."}],"event":{"name":"SIGMOD\/PODS'16: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"San Francisco California USA","acronym":"SIGMOD\/PODS'16"},"container-title":["Proceedings of the 2016 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2882903.2882909","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2882903.2882909","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2882903.2882909","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:18:19Z","timestamp":1763457499000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2882903.2882909"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6,14]]},"references-count":52,"alternative-id":["10.1145\/2882903.2882909","10.1145\/2882903"],"URL":"https:\/\/doi.org\/10.1145\/2882903.2882909","relation":{},"subject":[],"published":{"date-parts":[[2016,6,14]]},"assertion":[{"value":"2016-06-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}