{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T01:44:17Z","timestamp":1755999857055},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,5,1]],"date-time":"2017-05-01T00:00:00Z","timestamp":1493596800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1007\/s11390-017-1731-1","type":"journal-article","created":{"date-parts":[[2017,5,10]],"date-time":"2017-05-10T01:46:46Z","timestamp":1494380806000},"page":"644-662","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["EntityManager: Managing Dirty Data Based on Entity Resolution"],"prefix":"10.1007","volume":"32","author":[{"given":"Xue-Li","family":"Liu","sequence":"first","affiliation":[]},{"given":"Hong-Zhi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jian-Zhong","family":"Li","sequence":"additional","affiliation":[]},{"given":"Hong","family":"Gao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,5,12]]},"reference":[{"key":"1731_CR1","doi-asserted-by":"crossref","unstructured":"Andritsos P, Fuxman A, Miller R J. Clean answers over dirty databases: A probabilistic approach. In Proc. the 22nd ICDE, April 2006, Article No. 30.","DOI":"10.1109\/ICDE.2006.35"},{"key":"1731_CR2","doi-asserted-by":"crossref","unstructured":"Fuxman A D, Miller R J. First-order query rewriting for inconsistent databases. In Proc. the 10th ICDT, January 2005, pp.337-351.","DOI":"10.1007\/978-3-540-30570-5_23"},{"key":"1731_CR3","doi-asserted-by":"crossref","unstructured":"Fuxman A, Fazli E, Miller R J. Conquer: Efficient management of inconsistent databases. In Proc. SIGMOD, June 2005, pp.155-166.","DOI":"10.1145\/1066157.1066176"},{"key":"1731_CR4","doi-asserted-by":"crossref","unstructured":"Boulos J, Dalvi N, Mandhani B, Mathur S, R\u00e9 C, Suciu D. MYSTIQ: A system for finding more answers by using probabili31 ties. In Proc. SIGMOD, June 2005, pp.891-893.","DOI":"10.1145\/1066157.1066277"},{"issue":"5","key":"1731_CR5","doi-asserted-by":"crossref","first-page":"1141","DOI":"10.1007\/s00778-009-0161-2","volume":"18","author":"O Hassanzadeh","year":"2009","unstructured":"Hassanzadeh O, Miller R J. Creating probabilistic databases from duplicated data. VLDB J., 2009, 18(5): 1141-1166.","journal-title":"VLDB J."},{"key":"1731_CR6","unstructured":"Widom J. Trio: A system for integrated management of data, accuracy, and lineage. In Proc. CIDR, Jan. 2005, pp.262-276."},{"issue":"12","key":"1731_CR7","first-page":"2018","volume":"5","author":"L Getoor","year":"2012","unstructured":"Getoor L, Machanavajjhala A. Entity resolution: Theory, practice & open challenges. PVLDB, 2012, 5(12): 2018-2019.","journal-title":"PVLDB"},{"key":"1731_CR8","unstructured":"Waguih D A, Berti-Equille L. Truth discovery algorithms: An experimental evaluation. arXiv: 1409.6428, May 2014. https:\/\/arxiv.org\/abs\/1409.6428 , Mar. 2017."},{"key":"1731_CR9","unstructured":"Lipner S B, Balenson D M, Ellison CM,Walker S T. System and method for data recovery, September 1996. US Patent 5,557,765. https:\/\/www.google.com\/patents\/us5557765 , Apr. 2017."},{"key":"1731_CR10","unstructured":"Miles M B, Huberman A M. Qualitative Data Analysis: An Expanded Sourcebook. Sage Publications, Inc., 1994."},{"issue":"4","key":"1731_CR11","first-page":"3","volume":"23","author":"E Rahm","year":"2000","unstructured":"Rahm E, Do H H. Data cleaning: Problems and current approaches. IEEE Data Eng. Bull., 2000, 23(4): 3-13.","journal-title":"IEEE Data Eng. Bull."},{"key":"1731_CR12","unstructured":"Arasu A, Ganti V, Kaushik R. Efficient exact set-similarity joins. In Proc. the 32nd VLDB, September 2006, pp.918-929."},{"key":"1731_CR13","doi-asserted-by":"crossref","unstructured":"Behm A, Ji S, Li C, Lu J. Space-constrained gram-based indexing for efficient approximate string search. In Proc. ICDE, March 29-April 2, 2009, pp.604-615.","DOI":"10.1109\/ICDE.2009.32"},{"issue":"6","key":"1731_CR14","doi-asserted-by":"crossref","first-page":"1115","DOI":"10.1145\/227683.227684","volume":"42","author":"MX Goemans","year":"1995","unstructured":"Goemans M X, Williamson D P. Improved approximation algorithms for maximum cut and satisfiability problems using semidefinite programming. Journal of the ACM, 1995, 42(6): 1115-1145.","journal-title":"Journal of the ACM"},{"key":"1731_CR15","doi-asserted-by":"crossref","unstructured":"Hadjieleftheriou M, Chandel A, Koudas N, Srivastava D. Fast indexes and algorithms for set similarity selection queries. In Proc. the 24th ICDE, April 2008, pp.267-276.","DOI":"10.1109\/ICDE.2008.4497435"},{"key":"1731_CR16","doi-asserted-by":"crossref","unstructured":"Hadjieleftheriou M, Koudas N, Srivastava D. Incremental maintenance of length normalized indexes for approximate string matching. In Proc. ACM SIGMOD, June 29-July 2, 2009, pp.429-440.","DOI":"10.1145\/1559845.1559891"},{"issue":"1","key":"1731_CR17","first-page":"933","volume":"1","author":"C Xiao","year":"2008","unstructured":"Xiao C, Wang W, Lin X. Ed-Join: An efficient algorithm for similarity joins with edit distance constraints. PVLDB, 2008, 1(1): 933-944.","journal-title":"PVLDB"},{"key":"1731_CR18","doi-asserted-by":"crossref","unstructured":"Xiao C, Wang W, Lin X, Yu J X, Wang G. Efficient similarity joins for near-duplicate detection. ACM Transactions on Database Systems, 2011, 36(3): 15:1-15:15.","DOI":"10.1145\/2000824.2000825"},{"key":"1731_CR19","doi-asserted-by":"crossref","unstructured":"Zhang Z, Hadjieleftheriou M, Ooi B C, Srivastava D. Bedtree: An all-purpose index structure for string similarity search based on edit distance. In Proc. SIGMOD, June 2010, pp.915-926.","DOI":"10.1145\/1807167.1807266"},{"key":"1731_CR20","doi-asserted-by":"crossref","unstructured":"Bayardo R J, Ma Y, Srikant R. Scaling up all pairs similarity search. In Proc. the 16th WWW, May 2007, pp.131-140.","DOI":"10.1145\/1242572.1242591"},{"issue":"1","key":"1731_CR21","first-page":"1219","volume":"3","author":"J Wang","year":"2010","unstructured":"Wang J, Li G, Feng J. Trie-join: Efficient trie-based string similarity joins with edit-distance constraints. PVLDB, 2010, 3(1): 1219-1230.","journal-title":"PVLDB"},{"key":"1731_CR22","doi-asserted-by":"crossref","unstructured":"Sarawagi S, Kirpal A. Efficient set joins on similarity predicates. In Proc. ACM SIGMOD, June 2004, pp.743-754.","DOI":"10.1145\/1007568.1007652"},{"key":"1731_CR23","doi-asserted-by":"crossref","unstructured":"Vernica R, Carey M J, Li C. Efficient parallel set-similarity joins using mapreduce. In Proc. ACM SIGMOD, June 2010, pp.495-506.","DOI":"10.1145\/1807167.1807222"},{"key":"1731_CR24","unstructured":"Li C, Wang B, Yang X. VGRAM: Improving performance of approximate queries on string collections using variablelength grams. In Proc. the 33rd VLDB, September 2007, pp.303-314."},{"key":"1731_CR25","doi-asserted-by":"crossref","unstructured":"Wang J, Li G, Feng J. Can we beat the prefix filtering?: An adaptive framework for similarity join and search. In Proc. ACM SIGMOD, May 2012, pp.85-96.","DOI":"10.1145\/2213836.2213847"},{"key":"1731_CR26","doi-asserted-by":"crossref","unstructured":"Ioannidis Y E. The history of histograms (abridged). In Proc. the 29th VLDB, Sept. 2003, pp.19-30.","DOI":"10.1016\/B978-012722442-8\/50011-2"},{"issue":"3","key":"1731_CR27","doi-asserted-by":"crossref","first-page":"550","DOI":"10.1006\/jcss.1996.0041","volume":"52","author":"PJ Haas","year":"1996","unstructured":"Haas P J, Naughton J F, Seshadri S, Swami A N. Selectivity and cost estimation for joins based on random sampling. Journal of Computer and System Sciences, 1996, 52(3): 550-569.","journal-title":"Journal of Computer and System Sciences"},{"issue":"2","key":"1731_CR28","doi-asserted-by":"crossref","first-page":"278","DOI":"10.1145\/119995.115837","volume":"20","author":"WC Hou","year":"1991","unstructured":"Hou W C, Ozsoyoglu G, Dogdu E. Error-constrained COUNT query evaluation in relational databases. ACM SIGMOD Record, 1991, 20(2): 278-287.","journal-title":"ACM SIGMOD Record"},{"key":"1731_CR29","unstructured":"Olken F. Random sampling from databases [Ph.D. Thesis]. University of California, 1993."},{"issue":"3","key":"1731_CR30","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1023\/B:DAPD.0000018573.35050.25","volume":"15","author":"AH Ngu","year":"2004","unstructured":"Ngu A H, Harangsri B, Shepherd J. Query size estimation for joins using systematic sampling. Distributed and Parallel Databases, 2004, 15(3): 237-275.","journal-title":"Distributed and Parallel Databases"},{"issue":"6","key":"1731_CR31","first-page":"338","volume":"4","author":"H Lee","year":"2011","unstructured":"Lee H, Ng R T, Shim K. Similarity join size estimation using locality sensitive hashing. PVLDB, 2011, 4(6): 338-349.","journal-title":"PVLDB"},{"key":"1731_CR32","doi-asserted-by":"crossref","unstructured":"Tong X, Wang H. Fgram-Tree: An index structure based on feature grams for string approximate search. In Proc. the 13th WAIM, August 2012, pp.241-253.","DOI":"10.1007\/978-3-642-32281-5_24"},{"key":"1731_CR33","unstructured":"Liu X,Wang H, Li J, Gao H. Similarity join algorithm based on entity. Journal of Software, 2015, 26(6): 1421-1437. (in Chinese)"},{"key":"1731_CR34","doi-asserted-by":"crossref","unstructured":"Zhang Y, Yang L, Wang H. Range query estimation for dirty data management system. In Proc. the 13th WAIM, August 2012, pp.152-164.","DOI":"10.1007\/978-3-642-32281-5_15"},{"issue":"10","key":"1731_CR35","first-page":"865","volume":"6","author":"X Liu","year":"2012","unstructured":"Liu X, Wang H, Li J, Gao H. Multi-similarity join order selection in entity database. Journal of Frontiers of Computer Science and Technology, 2012, 6(10): 865-876.","journal-title":"Journal of Frontiers of Computer Science and Technology"},{"key":"1731_CR36","unstructured":"Garcia-Molina H, Ullman J D, Widom J. Database System Implementation. Prentice-Hall, 2000."},{"key":"1731_CR37","unstructured":"Abiteboul S, Hull R, Vianu V. Foundations of Databases. Addison-Wesley, 1995."},{"key":"1731_CR38","doi-asserted-by":"crossref","unstructured":"Ilyas I F, Beskales G, Soliman M A. A survey of top-k query processing techniques in relational database systems. ACM Computing Surveys (CSUR), 2008, 40(4): 11:1-11:58.","DOI":"10.1145\/1391729.1391730"},{"key":"1731_CR39","doi-asserted-by":"crossref","unstructured":"Zhang Y, Yang L, Wang H. Similarity join size estimation with threshold for dirty data. Journal of Computers, 2012, 35(10): 2159-2168. (in Chinese)","DOI":"10.3724\/SP.J.1016.2012.02159"},{"issue":"3","key":"1731_CR40","doi-asserted-by":"crossref","first-page":"645","DOI":"10.1109\/TNN.2005.845141","volume":"16","author":"R Xu","year":"2005","unstructured":"Xu R, Wunsch D. Survey of clustering algorithms. IEEE Transactions on Neural Networks, 2005, 16(3): 645-678.","journal-title":"IEEE Transactions on Neural Networks"},{"issue":"6","key":"1731_CR41","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1103\/PhysRevE.70.066111","volume":"70","author":"A Clauset","year":"2004","unstructured":"Clauset A, Newman M E, Moore C. Finding community structure in very large networks. Physical Review E, 2004, 70(6): 66-111.","journal-title":"Physical Review E"},{"key":"1731_CR42","doi-asserted-by":"crossref","unstructured":"Li Y, Wang H, Gao H. Efficient entity resolution based on sequence rules. In Proc. CSIE, May 2011, pp.381-388.","DOI":"10.1007\/978-3-642-21402-8_61"},{"key":"1731_CR43","unstructured":"Kuang D, Li X, Ling C X. A new search engine integrating hierarchical browsing and keyword search. In Proc. the 22nd IJCAI, July 2011, pp.2464-2469."}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-017-1731-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-017-1731-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-017-1731-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,23]],"date-time":"2019-09-23T23:57:23Z","timestamp":1569283043000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-017-1731-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":43,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,5]]}},"alternative-id":["1731"],"URL":"https:\/\/doi.org\/10.1007\/s11390-017-1731-1","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"value":"1000-9000","type":"print"},{"value":"1860-4749","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,5]]}}}