{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T15:25:16Z","timestamp":1776093916496,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2008,3,11]],"date-time":"2008-03-11T00:00:00Z","timestamp":1205193600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2009,1]]},"DOI":"10.1007\/s00778-008-0098-x","type":"journal-article","created":{"date-parts":[[2008,3,10]],"date-time":"2008-03-10T13:44:40Z","timestamp":1205156680000},"page":"255-276","source":"Crossref","is-referenced-by-count":288,"title":["Swoosh: a generic approach to entity resolution"],"prefix":"10.1007","volume":"18","author":[{"given":"Omar","family":"Benjelloun","sequence":"first","affiliation":[]},{"given":"Hector","family":"Garcia-Molina","sequence":"additional","affiliation":[]},{"given":"David","family":"Menestrina","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Su","sequence":"additional","affiliation":[]},{"given":"Steven Euijong","family":"Whang","sequence":"additional","affiliation":[]},{"given":"Jennifer","family":"Widom","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2008,3,11]]},"reference":[{"key":"98_CR1","doi-asserted-by":"crossref","unstructured":"Ananthakrishna, R., Chaudhuri, S., Ganti, V.: Eliminating fuzzy duplicates in data warehouses. In: Proceedings of VLDB, pp. 586\u2013597 (2002)","DOI":"10.1016\/B978-155860869-6\/50058-5"},{"key":"98_CR2","unstructured":"Arasu, A., Ganti, V., Kaushik, R.: Efficient exact set-similarity joins. In: VLDB, pp. 918\u2013929 (2006)"},{"key":"98_CR3","doi-asserted-by":"crossref","unstructured":"Bansal, N., Blum, A., Chawla, S.: Correlation clustering. In: FOCS, p. 238 (2002)","DOI":"10.1109\/SFCS.2002.1181947"},{"key":"98_CR4","unstructured":"Baxter, R., Christen, P., Churches, T.: A comparison of fast blocking methods for record linkage. In: Proceedings of ACM SIGKDD\u201903 Workshop on Data Cleaning, Record Linkage, and Object Consolidation (2003). http:\/\/citeseer.ist.psu.edu\/article\/baxter03comparison.html"},{"key":"98_CR5","doi-asserted-by":"crossref","unstructured":"Bekkerman, R., McCallum, A.: Disambiguating web appearances of people in a social network. In: WWW, pp. 463\u2013470 (2005)","DOI":"10.1145\/1060745.1060813"},{"key":"98_CR6","unstructured":"Benjelloun, O., Garcia-Molina, H., Jonas, J., Menestrina, D., Whang, S., Su, Q., Widom, J.: Swoosh : a generic approach to entity resolution. Technical Report, Stanford University (2006). http:\/\/dbpubs.stanford.edu\/pub\/2005-5"},{"key":"98_CR7","doi-asserted-by":"crossref","unstructured":"Benjelloun, O., Garcia-Molina, H., Kawai, H., Larson, T.E., Menestrina, D., Thavisomboon, S.: D-Swoosh : a family of algorithms for generic, distributed entity resolution. In: ICDCS (2007)","DOI":"10.1109\/ICDCS.2007.96"},{"key":"98_CR8","doi-asserted-by":"crossref","unstructured":"Bhattacharya, I., Getoor, L.: Iterative record linkage for cleaning and integration. In: Proceedings of SIGMOD Workshop on Research Issues on Data Mining and Knowledge Discovery (2004)","DOI":"10.1145\/1008694.1008697"},{"key":"98_CR9","doi-asserted-by":"crossref","unstructured":"Bhattacharya, I., Getoor, L.: A latent dirichlet model for unsupervised entity resolution. In: Sixth SIAM Conference on Data Mining (2006)","DOI":"10.1137\/1.9781611972764.5"},{"key":"98_CR10","unstructured":"Blume, M.: Automatic entity disambiguation: benefits to NER, relation extraction, link analysis, and inference. In: International Conference on Intelligence Analysis (2005). https:\/\/analysis.mitre.org\/"},{"key":"98_CR11","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganjam, K., Ganti, V., Motwani, R.: Robust and efficient fuzzy match for online data cleaning. In: Proceedings of ACM SIGMOD, pp. 313\u2013324 (2003)","DOI":"10.1145\/872757.872796"},{"key":"98_CR12","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ganti, V., Motwani, R.: Robust identification of fuzzy duplicates. In: Proceedings of ICDE, Tokyo, Japan (2005)","DOI":"10.1109\/ICDE.2005.125"},{"key":"98_CR13","doi-asserted-by":"crossref","first-page":"288","DOI":"10.1145\/352595.352598","volume":"18","author":"W. Cohen","year":"2000","unstructured":"Cohen, W.: Data integration using similarity joins and a word-based information representation language. ACM Trans. Inf. Syst. 18, 288\u2013321 (2000)","journal-title":"ACM Trans. Inf. Syst."},{"key":"98_CR14","doi-asserted-by":"crossref","unstructured":"Dong, X., Halevy, A.Y., Madhavan, J.: Reference reconciliation in complex information spaces. In: Proceedings of ACM SIGMOD (2005)","DOI":"10.1145\/1066157.1066168"},{"issue":"328","key":"98_CR15","doi-asserted-by":"crossref","first-page":"1183","DOI":"10.1080\/01621459.1969.10501049","volume":"64","author":"I.P. Fellegi","year":"1969","unstructured":"Fellegi, I.P., Sunter, A.B.: A theory for record linkage. J. Am. Stat. Assoc. 64(328), 1183\u20131210 (1969)","journal-title":"J. Am. Stat. Assoc."},{"key":"98_CR16","unstructured":"Galhardas, H., Florescu, D., Shasha, D., Simon, E., Saita, C.A.: Declarative data cleaning: Language, model, and algorithms. In: Proceedings of VLDB, pp. 371\u2013380 (2001)"},{"key":"98_CR17","unstructured":"Gravano, L., Ipeirotis, P.G., Jagadish, H.V., Koudas, N., Muthukrishnan, S., Srivastava, D.: Approximate string joins in a database (almost) for free. In: VLDB, pp. 491\u2013500 (2001)"},{"key":"98_CR18","unstructured":"Gu, L., Baxter, R., Vickers, D., Rainsford, C.: Record linkage: current practice and future directions. Technical Report 03\/83, CSIRO Mathematical and Information Sciences (2003)"},{"key":"98_CR19","doi-asserted-by":"crossref","unstructured":"Hern\u00e1ndez, M.A., Stolfo, S.J.: The merge\/purge problem for large databases. In: Proceedings of ACM SIGMOD, pp. 127\u2013138 (1995)","DOI":"10.1145\/568271.223807"},{"issue":"1","key":"98_CR20","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1023\/A:1009761603038","volume":"2","author":"M.A. Hern\u00e1ndez","year":"1998","unstructured":"Hern\u00e1ndez, M.A., Stolfo, S.J.: Real-world data is dirty: data cleansing and the merge\/purge problem. Data Min. Knowl. Discov. 2(1), 9\u201337 (1998)","journal-title":"Data Min. Knowl. Discov."},{"key":"98_CR21","unstructured":"IBM: DB2 Entity Analytic Solutions. http:\/\/www-306.ibm.com\/software\/data\/db2\/eas\/"},{"issue":"406","key":"98_CR22","doi-asserted-by":"crossref","first-page":"414","DOI":"10.1080\/01621459.1989.10478785","volume":"84","author":"M.A. Jaro","year":"1989","unstructured":"Jaro, M.A.: Advances in record-linkage methodology as applied to matching the 1985 census of tampa, florida. J. Am. Stat. Assoc. 84(406), 414\u2013420 (1989)","journal-title":"J. Am. Stat. Assoc."},{"key":"98_CR23","doi-asserted-by":"crossref","unstructured":"Jin, L., Li, C., Mehrotra, S.: Efficient record linkage in large data sets. In: Proceedings of International Conference on Database Systems for Advanced Applications, p. 137 (2003)","DOI":"10.1109\/DASFAA.2003.1192377"},{"key":"98_CR24","doi-asserted-by":"crossref","unstructured":"Kalashnikov, D.V., Mehrotra, S., Chen, Z.: Exploiting relationships for domain-independent data cleaning. In: Proceedings of the SIAM International Conference on Data Mining, Newport Beach, CA (2005)","DOI":"10.1137\/1.9781611972757.24"},{"key":"98_CR25","doi-asserted-by":"crossref","unstructured":"McCallum, A.K., Nigam, K., Ungar, L.: Efficient clustering of high-dimensional data sets with application to reference matching. In: Proceedings of KDD, pp. 169\u2013178, Boston, MA (2000)","DOI":"10.1145\/347090.347123"},{"key":"98_CR26","unstructured":"Menestrina, D., Benjelloun, O., Garcia-Molina, H.: Generic entity resolution with data confidences. In: CleanDB (2006)"},{"key":"98_CR27","unstructured":"Monge, A.E., Elkan, C.: An efficient domain-independent algorithm for detecting approximately duplicate database records. In: Proceedings of SIGMOD Workshop on Research Issues on Data Mining and Knowledge Discovery, pp. 23\u201329 (1997)"},{"issue":"2","key":"98_CR28","doi-asserted-by":"crossref","first-page":"176","DOI":"10.1016\/j.inffus.2004.10.001","volume":"7","author":"A. Motro","year":"2006","unstructured":"Motro, A., Anokhin, P.: Fusionplex: resolution of data inconsistencies in the integration of heterogeneous information sources. Inf. Fusion 7(2), 176\u2013196 (2006)","journal-title":"Inf. Fusion"},{"issue":"3381","key":"98_CR29","doi-asserted-by":"crossref","first-page":"954","DOI":"10.1126\/science.130.3381.954","volume":"130","author":"H.B. Newcombe","year":"1959","unstructured":"Newcombe, H.B., Kennedy, J.M., Axford, S.J., James, A.P.: Automatic linkage of vital records. Science 130(3381), 954\u2013959 (1959)","journal-title":"Science"},{"key":"98_CR30","unstructured":"Parag, D.P.: Multi-relational record linkage. In: Proceedings of the KDD-2004 Workshop on Multi-Relational Data Mining, pp. 31\u201348 (2004)"},{"key":"98_CR31","doi-asserted-by":"crossref","unstructured":"Sarawagi, S., Bhamidipaty, A.: Interactive deduplication using active learning. In: Proceedings of ACM SIGKDD, Edmonton, Alberta (2002)","DOI":"10.1145\/775047.775087"},{"key":"98_CR32","doi-asserted-by":"crossref","unstructured":"Schallehn, E., Sattler, K.U., Saake, G.: Extensible and similarity-based grouping for data integratio. In: ICDE, p. 277 (2002)","DOI":"10.1109\/ICDE.2002.994731"},{"key":"98_CR33","doi-asserted-by":"crossref","unstructured":"Singla, P., Domingos, P.: Object identification with attribute-mediated dependences. In: Proceedings of PKDD, pp. 297 \u2013 308 (2005)","DOI":"10.1007\/11564126_31"},{"key":"98_CR34","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1016\/0022-2836(81)90087-5","volume":"147","author":"T.F. Smith","year":"1981","unstructured":"Smith, T.F., Waterman, M.S.: Identification of common molecular subsequences. J. Mol. Biol. 147, 195\u2013197 (1981)","journal-title":"J. Mol. Biol."},{"issue":"2","key":"98_CR35","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1145\/321879.321884","volume":"22","author":"R.E. Tarjan","year":"1975","unstructured":"Tarjan, R.E.: Efficiency of a good but not linear set union algorithm. J. ACM. 22(2), 215\u2013225 (1975)","journal-title":"J. ACM."},{"issue":"8","key":"98_CR36","doi-asserted-by":"crossref","first-page":"635","DOI":"10.1016\/S0306-4379(01)00043-6","volume":"26","author":"S. Tejada","year":"2001","unstructured":"Tejada, S., Knoblock, C.A., Minton, S.: Learning object identification rules for information integration. Inf. Syst. J. 26(8), 635\u2013656 (2001)","journal-title":"Inf. Syst. J."},{"key":"98_CR37","doi-asserted-by":"crossref","unstructured":"Verykios, V.S., Moustakides, G.V., Elfeky, M.G.: A bayesian decision model for cost optimal record matching. VLDB J. 12(1), 28\u201340(2003). http:\/\/www.cs.purdue.edu\/homes\/mgelfeky\/Papers\/vldbj12(1 ).pdf","DOI":"10.1007\/s00778-002-0072-y"},{"key":"98_CR38","unstructured":"Winkler, W.: Overview of record linkage and current research directions. Technical Report, Statistical Research Division, U.S. Bureau of the Census, Washington, DC (2006)"},{"key":"98_CR39","unstructured":"Winkler, W.E.: Using the EM algorithm for weight computation in the Fellegi\u2013Sunter model of record linkage. In: American Statistical Association, Proceedings of the Section on Survey Research Methods, pp. 667\u2013671 (1988)"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-008-0098-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00778-008-0098-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-008-0098-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,29]],"date-time":"2025-01-29T00:59:05Z","timestamp":1738112345000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00778-008-0098-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,3,11]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2009,1]]}},"alternative-id":["98"],"URL":"https:\/\/doi.org\/10.1007\/s00778-008-0098-x","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2008,3,11]]}}}