{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,28]],"date-time":"2025-10-28T00:29:18Z","timestamp":1761611358419,"version":"3.40.4"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2014,8,5]],"date-time":"2014-08-05T00:00:00Z","timestamp":1407196800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2015,2]]},"DOI":"10.1007\/s00778-014-0367-9","type":"journal-article","created":{"date-parts":[[2014,8,4]],"date-time":"2014-08-04T09:54:32Z","timestamp":1407146072000},"page":"143-167","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["A unified framework for approximate dictionary-based entity extraction"],"prefix":"10.1007","volume":"24","author":[{"given":"Dong","family":"Deng","sequence":"first","affiliation":[]},{"given":"Guoliang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jianhua","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Zhiguo","family":"Gong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,8,5]]},"reference":[{"issue":"1","key":"367_CR1","first-page":"945","volume":"1","author":"S Agrawal","year":"2008","unstructured":"Agrawal, S., Chakrabarti, K., Chaudhuri, S., Ganti, V.: Scalable ad-hoc entity extraction from text collections. PVLDB 1(1), 945\u2013957 (2008)","journal-title":"PVLDB"},{"key":"367_CR2","unstructured":"Arasu, A., Ganti, V., Kaushik, R.: Efficient exact setsimilarity joins. In: VLDB, pp. 918\u2013929 (2006)"},{"key":"367_CR3","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., Srikant, R.: Scaling up all pairs similarity search. In WWW, pp. 131\u2013140 (2007)","DOI":"10.1145\/1242572.1242591"},{"key":"367_CR4","doi-asserted-by":"crossref","unstructured":"Chakrabarti, K., Chaudhuri, S., Ganti, V., Xin, D.: An efficient filter for approximate membership checking. In: SIGMOD Conference, pp. 805\u2013818 (2008)","DOI":"10.1145\/1376616.1376697"},{"key":"367_CR5","doi-asserted-by":"crossref","unstructured":"Chandel, A., Nagesh, P. C., Sarawagi, S.: Efficient batch top-k search for dictionary-based entity recognition. In: ICDE, pp. 28 (2006)","DOI":"10.1109\/ICDE.2006.55"},{"key":"367_CR6","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganjam, K., Ganti, V., Motwani, R.: Robust and efficient fuzzy match for online data cleaning. In: SIGMOD Conference, pp. 313\u2013324 (2003)","DOI":"10.1145\/872757.872796"},{"key":"367_CR7","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganti, V., Kaushik, R.: A primitive operator for similarity joins in data cleaning. In ICDE, pp. 5\u201316 (2006)","DOI":"10.1109\/ICDE.2006.9"},{"key":"367_CR8","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganti, V., Motwani, R.: Robust identification of fuzzy duplicates. In: ICDE, pp. 865\u2013876 (2005)","DOI":"10.1109\/ICDE.2005.125"},{"issue":"1","key":"367_CR9","first-page":"395","volume":"2","author":"S Chaudhuri","year":"2009","unstructured":"Chaudhuri, S., Ganti, V., Xin, D.: Mining document collections to facilitate accurate approximate entity matching. PVLDB 2(1), 395\u2013406 (2009)","journal-title":"PVLDB"},{"key":"367_CR10","doi-asserted-by":"crossref","unstructured":"Deng, D., Li, G., Feng, J.: An efficient trie-based method for approximate entity extraction with editdistance constraints. In: ICDE, pp. 762\u2013773 (2012)","DOI":"10.1109\/ICDE.2012.29"},{"key":"367_CR11","unstructured":"Deng, D., Li, G., Feng, J., Li, W.-S.: Top-k string similarity search with edit-distance constraints. In: ICDE, pp. 925\u2013936 (2013)"},{"issue":"4","key":"367_CR12","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1007\/s00778-011-0252-8","volume":"21","author":"J Feng","year":"2012","unstructured":"Feng, J., Wang, J., Li, G.: Trie-join: a trie-based method for efficient string similarity joins. VLDB J. 21(4), 437\u2013461 (2012)","journal-title":"VLDB J."},{"key":"367_CR13","unstructured":"Gravano, L., Ipeirotis, P.G., Jagadish, H.V., Koudas, N., Muthukrishnan, S., Srivastava, D.: Approximate string joins in a database (almost) for free. In: VLDB, pp. 491\u2013500 (2001)"},{"key":"367_CR14","doi-asserted-by":"crossref","unstructured":"Hadjieleftheriou, M., Chandel, A., Koudas, N., Srivastava, D.: Fast indexes and algorithms for set similarity selection queries. In: ICDE, pp. 267\u2013276 (2008)","DOI":"10.1109\/ICDE.2008.4497435"},{"key":"367_CR15","doi-asserted-by":"crossref","unstructured":"Hadjieleftheriou, M., Koudas, N., Srivastava, D.: Incremental maintenance of length normalized indexes for approximate string matching. In: SIGMOD Conference, pp. 429\u2013440 (2009)","DOI":"10.1145\/1559845.1559891"},{"issue":"1","key":"367_CR16","first-page":"201","volume":"1","author":"M Hadjieleftheriou","year":"2008","unstructured":"Hadjieleftheriou, M., Yu, X., Koudas, N., Srivastava, D.: Hashed samples: selectivity estimators for set similarity selection queries. PVLDB 1(1), 201\u2013212 (2008)","journal-title":"PVLDB"},{"key":"367_CR17","unstructured":"Kim, M.-S., Whang, K.-Y., Lee, J.-G., Lee, M.-J.: ngram\/ 2l: a space and time efficient two-level n-gram inverted index structure. In: VLDB, pp. 325\u2013336 (2005)"},{"key":"367_CR18","unstructured":"Koudas, N., Li, C., Tung, A.K.H., Vernica, R.: Relaxing join and selection queries. In: VLDB, pp. 199\u2013210 (2006)"},{"key":"367_CR19","unstructured":"Lee, H., Ng, R.T., Shim, K.: Extending q-grams to estimate selectivity of string matching with low edit distance. In: VLDB, pp. 195\u2013206 (2007)"},{"issue":"1","key":"367_CR20","first-page":"658","volume":"2","author":"H Lee","year":"2009","unstructured":"Lee, H., Ng, R.T., Shim, K.: Power-law based estimation of set similarity join size. PVLDB 2(1), 658\u2013669 (2009)","journal-title":"PVLDB"},{"key":"367_CR21","doi-asserted-by":"crossref","unstructured":"Li, C., Lu, J., Lu, Y.: Efficient merging and filtering algorithms for approximate string searches. In: ICDE, pp. 257\u2013266 (2008)","DOI":"10.1109\/ICDE.2008.4497434"},{"key":"367_CR22","unstructured":"Li, C., Wang, B., Yang, X.: Vgram: Improving performance of approximate queries on string collections using variable-length grams. In: VLDB, pp. 303\u2013314 (2007)"},{"key":"367_CR23","doi-asserted-by":"crossref","unstructured":"Li, G., Deng, D., Feng, J.: Faerie: efficient filtering algorithms for approximate dictionary-based entity extraction. In: SIGMOD Conference, pp. 529\u2013540 (2011)","DOI":"10.1145\/1989323.1989379"},{"issue":"2","key":"367_CR24","first-page":"9","volume":"38","author":"G Li","year":"2013","unstructured":"Li, G., Deng, D., Feng, J.: A partition-based method for string similarity joins with edit-distance constraints. ACM Trans. Database Syst. 38(2), 9 (2013)","journal-title":"ACM Trans. Database Syst."},{"issue":"3","key":"367_CR25","first-page":"253","volume":"5","author":"G Li","year":"2011","unstructured":"Li, G., Deng, D., Wang, J., Feng, J.: Pass-join: a partition-based method for similarity joins. PVLDB 5(3), 253\u2013264 (2011)","journal-title":"PVLDB"},{"key":"367_CR26","doi-asserted-by":"crossref","unstructured":"Lu, J., Han, J., Meng, X.: Efficient algorithms for approximate member extraction using signature-based inverted lists. In: CIKM, pp. 315\u2013324 (2009)","DOI":"10.1145\/1645953.1645995"},{"key":"367_CR27","doi-asserted-by":"crossref","unstructured":"Sarawagi, S., Kirpal, A.: Efficient set joins on similarity predicates. In: SIGMOD Conference, pp. 743\u2013754 (2004)","DOI":"10.1145\/1007568.1007652"},{"issue":"1","key":"367_CR28","first-page":"1219","volume":"3","author":"J Wang","year":"2010","unstructured":"Wang, J., Li, G., Feng, J.: Trie-join: efficient trie-based string similarity joins with edit-distance constraints. PVLDB 3(1), 1219\u20131230 (2010)","journal-title":"PVLDB"},{"key":"367_CR29","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, G., Feng, J.: Fast-join: an efficient method for fuzzy token matching based string similarity join. In: ICDE, pp. 458\u2013469 (2011)","DOI":"10.1109\/ICDE.2011.5767865"},{"key":"367_CR30","doi-asserted-by":"crossref","unstructured":"Wang, J., Li, G., Feng, J.: Can we beat the prefix filtering?: an adaptive framework for similarity join and search. In: SIGMOD conference, pp. 85\u201396 (2012)","DOI":"10.1145\/2213836.2213847"},{"key":"367_CR31","doi-asserted-by":"crossref","unstructured":"Wang, W., Xiao, C., Lin, X., Zhang, C.: Efficient approximate entity extraction with edit distance constraints. In: SIGMOD Conference (2009)","DOI":"10.1145\/1559845.1559925"},{"issue":"1","key":"367_CR32","first-page":"933","volume":"1","author":"C Xiao","year":"2008","unstructured":"Xiao, C., Wang, W., Lin, X.: Ed-join: an efficient algorithm for similarity joins with edit distance constraints. PVLDB 1(1), 933\u2013944 (2008)","journal-title":"PVLDB"},{"key":"367_CR33","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Shang, H.: Top-k set similarity joins. In: ICDE, pp. 916\u2013927 (2009)","DOI":"10.1109\/ICDE.2009.111"},{"key":"367_CR34","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X. and Yu, J.X.: Efficient similarity joins for near duplicate detection. In: WWW (2008)","DOI":"10.1145\/1367497.1367516"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-014-0367-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00778-014-0367-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-014-0367-9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T01:56:53Z","timestamp":1746323813000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00778-014-0367-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,8,5]]},"references-count":34,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,2]]}},"alternative-id":["367"],"URL":"https:\/\/doi.org\/10.1007\/s00778-014-0367-9","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"type":"print","value":"1066-8888"},{"type":"electronic","value":"0949-877X"}],"subject":[],"published":{"date-parts":[[2014,8,5]]}}}