{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T22:10:11Z","timestamp":1718835011672},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2016,10,11]],"date-time":"2016-10-11T00:00:00Z","timestamp":1476144000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2017,4]]},"DOI":"10.1007\/s11704-016-5231-1","type":"journal-article","created":{"date-parts":[[2016,10,11]],"date-time":"2016-10-11T13:19:36Z","timestamp":1476191976000},"page":"307-319","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["String similarity join with different similarity thresholds based on novel indexing techniques"],"prefix":"10.1007","volume":"11","author":[{"given":"Chuitian","family":"Rong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yasin N.","family":"Silva","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunqing","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,10,11]]},"reference":[{"key":"5231_CR1","first-page":"267","volume-title":"Proceedings of ACM SIGKDD International Conference on Knowledge Discovery and Data Mining.","author":"A Monge","year":"1996","unstructured":"Monge A, Elkan C. The field matching problem: algorithms and applications. In: Proceedings of ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 1996, 267\u2013270"},{"key":"5231_CR2","doi-asserted-by":"crossref","first-page":"915","DOI":"10.1145\/1807167.1807266","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"Z J Zhang","year":"2010","unstructured":"Zhang Z J, Hadjieleftheriou M, Ooi B, Srivastava D. Bed-tree: an allpurpose index structure for string similarity search based on edit distance. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2010, 915\u2013926"},{"issue":"12","key":"5231_CR3","doi-asserted-by":"crossref","first-page":"2983","DOI":"10.1109\/TKDE.2014.2309131","volume":"26","author":"W Lu","year":"2014","unstructured":"Lu W, Du X Y, Hadjieleftheriou M, Ooi B C. Efficiently supporting edit distance based string similarity search using b+-trees. IEEE Transactions on Knowledge and Data Engineering, 2014, 26(12): 2983\u20132996","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"1\u20132","key":"5231_CR4","doi-asserted-by":"crossref","first-page":"1219","DOI":"10.14778\/1920841.1920992","volume":"3","author":"J N Wang","year":"2010","unstructured":"Wang J N, Feng J H, Li G L. Trie-join: efficient trie-based string similarity joins with edit-distance constraints. Proceedings of the VLDB Endowment, 2010, 3(1\u20132): 1219\u20131230","journal-title":"Proceedings of the VLDB Endowment"},{"key":"5231_CR5","doi-asserted-by":"crossref","first-page":"743","DOI":"10.1145\/1007568.1007652","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"S Sarawagi","year":"2004","unstructured":"Sarawagi S, Kirpal A. Efficient set joins on similarity predicates. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2004, 743\u2013754"},{"key":"5231_CR6","first-page":"61","volume-title":"Proceedings of the 22nd IEEE International Conference on Data Engineering.","author":"S Chaudhuri","year":"2006","unstructured":"Chaudhuri S, Ganti V, Kaushik R. A primitive operator for similarity joins in data cleaning. In: Proceedings of the 22nd IEEE International Conference on Data Engineering. 2006, 61\u201372"},{"key":"5231_CR7","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1145\/1242572.1242591","volume-title":"Proceedings of the 16th ACM International Conference on World Wide Web.","author":"R Bayardo","year":"2007","unstructured":"Bayardo R, Ma Y, Srikant R. Scaling up all pairs similarity search. In: Proceedings of the 16th ACM International Conference on World Wide Web. 2007, 131\u2013140"},{"key":"5231_CR8","first-page":"563","volume-title":"Proceedings of ACM International Conference on World Wide Web.","author":"C Xiao","year":"2008","unstructured":"Xiao C, Wang W, Lin X M, Yu J. Efficient similarity joins for near duplicate detection. In: Proceedings of ACM International Conference on World Wide Web. 2008, 563\u2013574"},{"key":"5231_CR9","first-page":"127","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"M Hern\u00e1ndez","year":"1995","unstructured":"Hern\u00e1ndez M, Stolfo S. The merge\/purge problem for large databases. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 1995, 127\u2013138"},{"key":"5231_CR10","volume-title":"Technical Report, Statistical Research Division, U.S. Census Bureau","author":"W E Winkler","year":"1999","unstructured":"Winkler W E. The state of record linkage and current research problems. Technical Report, Statistical Research Division, U.S. Census Bureau. 1999"},{"key":"5231_CR11","first-page":"1470","volume-title":"Video google: a text retrieval approach to object matching in videos","author":"J Sivic","year":"2003","unstructured":"Sivic J, Zisserman A. Video google: a text retrieval approach to object matching in videos. 2003, 1470\u20131477"},{"key":"5231_CR12","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1145\/1066157.1066168","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"X Dong","year":"2005","unstructured":"Dong X, Halevy A, Madhavan J. Reference reconciliation in complex information spaces. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2005, 85\u201396"},{"key":"5231_CR13","first-page":"269","volume-title":"Proceedings of ACM SIGKDD International Conference on Knowledge Discovery and Data Mining.","author":"S Sarawagi","year":"2002","unstructured":"Sarawagi S, Bhamidipaty A. Interactive deduplication using active learning. In: Proceedings of ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2002, 269\u2013278"},{"key":"5231_CR14","first-page":"952","volume-title":"Proceedings of the 25th IEEE International Conference on Data Engineering.","author":"A R C Arasu","year":"2009","unstructured":"Arasu A, R\u00e9 C, Suciu D. Large-scale deduplication with constraints using dedupalog. In: Proceedings of the 25th IEEE International Conference on Data Engineering. 2009, 952\u2013963"},{"key":"5231_CR15","first-page":"491","volume-title":"Proceedings of the VLDB Endowment.","author":"L Gravano","year":"2001","unstructured":"Gravano L, Ipeirotis P G, Jagadish H V, Koudas N, Muthukrishnan S, Srivastava D. Approximate string joins in a database (almost) for free. In: Proceedings of the VLDB Endowment. 2001, 491\u2013500"},{"issue":"1","key":"5231_CR16","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TKDE.2007.250581","volume":"19","author":"A K Elmagarmid","year":"2007","unstructured":"Elmagarmid A K, Ipeirotis P G, Verykios V S. Duplicate record detection: a survey. IEEE Transactions on Knowledge and Data Engineering, 2007, 19(1): 1\u201316","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"1","key":"5231_CR17","doi-asserted-by":"crossref","first-page":"1","DOI":"10.2200\/S00262ED1V01Y201003DTM003","volume":"2","author":"F Naumann","year":"2010","unstructured":"Naumann F, Herschel M. An Introduction to duplicate detection. Synthesis Lectures on Data Management, 2010, 2(1): 1\u201387","journal-title":"Synthesis Lectures on Data Management"},{"issue":"8","key":"5231_CR18","doi-asserted-by":"crossref","first-page":"625","DOI":"10.14778\/2732296.2732299","volume":"7","author":"Y Jiang","year":"2014","unstructured":"Jiang Y, Li G L, Feng J H, Li W S. String similarity joins: an experimental evaluation. Proceedings of the VLDB Endowment, 2014, 7(8): 625\u2013636","journal-title":"Proceedings of the VLDB Endowment"},{"key":"5231_CR19","doi-asserted-by":"crossref","first-page":"707","DOI":"10.1145\/1559845.1559919","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"S Chaudhuri","year":"2009","unstructured":"Chaudhuri S, Kaushik R. Extending autocompletion to tolerate errors. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2009, 707\u2013718"},{"key":"5231_CR20","doi-asserted-by":"crossref","first-page":"673","DOI":"10.1145\/2588555.2593675","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"D Deng","year":"2014","unstructured":"Deng D, Li G L, Feng J H. A pivotal prefix based filtering algorithm for string similarity search. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2014, 673\u2013684"},{"key":"5231_CR21","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1145\/2213836.2213847","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"J N Wang","year":"2012","unstructured":"Wang J N, Li G L, Feng J H. Can we beat the prefix filtering?: an adaptive framework for similarity join and search. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2012, 85\u201396"},{"issue":"10","key":"5231_CR22","doi-asserted-by":"crossref","first-page":"2217","DOI":"10.1109\/TKDE.2012.195","volume":"25","author":"C T L Rong","year":"2013","unstructured":"Rong C T, LuW, Wang X L, Du X Y, Chen Y G, Tung A K H. Efficient and scalable processing of string similarity join. IEEE Transactions on Knowledge and Data Engineering, 2013, 25(10): 2217\u20132230","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"5231_CR23","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1145\/2463676.2465313","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"J H Lu","year":"2013","unstructured":"Lu J H, Lin C B, Wang W, Li C, Wang H Y. String similarity measures and joins with synonyms. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2013, 373\u2013384"},{"key":"5231_CR24","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1145\/2723372.2723733","volume-title":"Proceedings of ACM SIGMOD International Conference on Management of Data.","author":"G L Li","year":"2015","unstructured":"Li G L, He J, Deng D, Li J. Efficient similarity join and search on multi-attribute data. In: Proceedings of ACM SIGMOD International Conference on Management of Data. 2015, 1137\u20131151"},{"key":"5231_CR25","volume-title":"Introduction to Modern Information Retrieval","author":"G Salton","year":"1986","unstructured":"Salton G, McGill M J. Introduction to Modern Information Retrieval. New York: McGraw-Hill, Inc., 1986"},{"key":"5231_CR26","volume-title":"Managing Gigabytes: Compressing and Indexing Documents and Images","author":"I H Witten","year":"1999","unstructured":"Witten I H, Moffat A, Bell T C. Managing Gigabytes: Compressing and Indexing Documents and Images. 2nd ed. San Francisco, CA: Morgan Kaufmann, 1999"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-016-5231-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11704-016-5231-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-016-5231-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T21:34:38Z","timestamp":1718832878000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11704-016-5231-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10,11]]},"references-count":26,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2017,4]]}},"alternative-id":["5231"],"URL":"https:\/\/doi.org\/10.1007\/s11704-016-5231-1","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,10,11]]}}}