{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:31:31Z","timestamp":1750307491723,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2010,6,6]],"date-time":"2010-06-06T00:00:00Z","timestamp":1275782400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2010,6,6]]},"DOI":"10.1145\/1807167.1807177","type":"proceedings-article","created":{"date-parts":[[2010,6,8]],"date-time":"2010-06-08T12:37:34Z","timestamp":1276000654000},"page":"63-74","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["Sampling dirty data for matching attributes"],"prefix":"10.1145","author":[{"given":"Henning","family":"K\u00f6hler","sequence":"first","affiliation":[{"name":"The University of Queensland, Brisbane, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofang","family":"Zhou","sequence":"additional","affiliation":[{"name":"The University of Queensland and NICTA, Brisbane, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shazia","family":"Sadiq","sequence":"additional","affiliation":[{"name":"The University of Queensland, Brisbane, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanfeng","family":"Shu","sequence":"additional","affiliation":[{"name":"CSIRO - Tasmanian ICT Centre, Hobart, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kerry","family":"Taylor","sequence":"additional","affiliation":[{"name":"CSIRO - ICT Centre, Canberra, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2010,6,6]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"9","article-title":"Data integration: The teenage years","author":"Halevy A. Y.","year":"2006","unstructured":"A. Y. Halevy , A. Rajaraman , and J. J. Ordille , \" Data integration: The teenage years ,\" in VLDB , 2006 , pp. 9 -- 16 . A. Y. Halevy, A. Rajaraman, and J. J. Ordille, \"Data integration: The teenage years,\" in VLDB, 2006, pp. 9--16.","journal-title":"VLDB"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/s007780100057"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1107499.1107502"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1142351.1142352"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/564691.564719"},{"key":"e_1_3_2_1_6_1","first-page":"21","volume-title":"SEQUENCES: Proceedings of the Compression and Complexity of Sequences. IEEE Computer Society","author":"Broder A.","year":"1997","unstructured":"A. Broder , On the resemblance and containment of documents , in SEQUENCES: Proceedings of the Compression and Complexity of Sequences. IEEE Computer Society , 1997 , p. 21 . A. Broder, On the resemblance and containment of documents, in SEQUENCES: Proceedings of the Compression and Complexity of Sequences. IEEE Computer Society, 1997, p. 21."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0169-7552(97)00031-7"},{"key":"e_1_3_2_1_8_1","first-page":"1","volume-title":"USENIX","author":"Manber U.","year":"1994","unstructured":"U. Manber , Finding similar files in a large file system , in USENIX Winter, 1994 , pp. 1 -- 10 . U. Manber, Finding similar files in a large file system, in USENIX Winter, 1994, pp. 1--10."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/647819.736184"},{"key":"e_1_3_2_1_10_1","volume-title":"A Mathematical Theory of Communication","author":"Shannon C. E.","year":"1948","unstructured":"C. E. Shannon , A Mathematical Theory of Communication . CSLI Publications , 1948 . C. E. Shannon, A Mathematical Theory of Communication. CSLI Publications, 1948."},{"key":"e_1_3_2_1_11_1","first-page":"303","article-title":"Vgram: Improving performance of approximate queries on string collections using variable-length grams","author":"Li C.","year":"2007","unstructured":"C. Li , B. Wang , and X. Yang , Vgram: Improving performance of approximate queries on string collections using variable-length grams , in VLDB , 2007 , pp. 303 -- 314 . C. Li, B. Wang, and X. Yang, Vgram: Improving performance of approximate queries on string collections using variable-length grams, in VLDB, 2007, pp. 303--314.","journal-title":"VLDB"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2007.369032"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2007.9"},{"key":"e_1_3_2_1_14_1","first-page":"3","article-title":"The New Jersey data reduction report","volume":"20","author":"Barbar'a D.","year":"1997","unstructured":"D. Barbar'a , W. Dumouchel , C. Faloutsos , P. J. Haas , J. M. Hellerstein , Y. Ioannidis , H. V. Jagadish , T. Johnson , R. Ng , V. Poosala , K. A. Ross , and K. C. Sevcik , The New Jersey data reduction report , IEEE Data Engineering Bulletin , vol. 20 , pp. 3 -- 45 , 1997 . D. Barbar'a, W. Dumouchel, C. Faloutsos, P. J. Haas, J. M. Hellerstein, Y. Ioannidis, H. V. Jagadish, T. Johnson, R. Ng, V. Poosala, K. A. Ross, and K. C. Sevcik, The New Jersey data reduction report, IEEE Data Engineering Bulletin, vol. 20, pp. 3--45, 1997.","journal-title":"IEEE Data Engineering Bulletin"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00140664"},{"key":"e_1_3_2_1_16_1","first-page":"77","article-title":"Schema mapping as query discovery","author":"Miller R. J.","year":"2000","unstructured":"R. J. Miller , L. M. Haas , and M. A. Hern\u00e1andez , Schema mapping as query discovery , in VLDB , 2000 , pp. 77 -- 88 . R. J. Miller, L. M. Haas, and M. A. Hern\u00e1andez, Schema mapping as query discovery, in VLDB, 2000, pp. 77--88.","journal-title":"VLDB"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1007568.1007612"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2008.4497420"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/276304.276323"},{"key":"e_1_3_2_1_20_1","first-page":"491","article-title":"Approximate string joins in a database (almost) for free","author":"Gravano L.","year":"2001","unstructured":"L. Gravano , P. G. Ipeirotis , H. V. Jagadish , N. Koudas , S. Muthukrishnan , and D. Srivastava , Approximate string joins in a database (almost) for free , in VLDB , 2001 , pp. 491 -- 500 . L. Gravano, P. G. Ipeirotis, H. V. Jagadish, N. Koudas, S. Muthukrishnan, and D. Srivastava, Approximate string joins in a database (almost) for free, in VLDB, 2001, pp. 491--500.","journal-title":"VLDB"},{"key":"e_1_3_2_1_21_1","first-page":"160","article-title":"Simple random sampling from relational databases","author":"Olken F.","year":"1986","unstructured":"F. Olken and D. Rotem , Simple random sampling from relational databases , in VLDB , 1986 , pp. 160 -- 169 . F. Olken and D. Rotem, Simple random sampling from relational databases, in VLDB, 1986, pp. 160--169.","journal-title":"VLDB"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/233269.233340"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/304182.304206"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/342009.335450"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1007568.1007602"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1007568.1007601"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1007568.1007664"}],"event":{"name":"SIGMOD\/PODS '10: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Indianapolis Indiana USA","acronym":"SIGMOD\/PODS '10"},"container-title":["Proceedings of the 2010 ACM SIGMOD International Conference on Management of data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1807167.1807177","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1807167.1807177","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T12:17:34Z","timestamp":1750249054000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1807167.1807177"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,6,6]]},"references-count":27,"alternative-id":["10.1145\/1807167.1807177","10.1145\/1807167"],"URL":"https:\/\/doi.org\/10.1145\/1807167.1807177","relation":{},"subject":[],"published":{"date-parts":[[2010,6,6]]},"assertion":[{"value":"2010-06-06","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}