{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T07:53:41Z","timestamp":1777017221445,"version":"3.51.4"},"reference-count":31,"publisher":"Elsevier BV","issue":"8","license":[{"start":{"date-parts":[[2001,12,1]],"date-time":"2001-12-01T00:00:00Z","timestamp":1007164800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Information Systems"],"published-print":{"date-parts":[[2001,12]]},"DOI":"10.1016\/s0306-4379(01)00041-2","type":"journal-article","created":{"date-parts":[[2002,10,14]],"date-time":"2002-10-14T17:55:05Z","timestamp":1034618105000},"page":"585-606","source":"Crossref","is-referenced-by-count":53,"title":["A knowledge-based approach for duplicate elimination in data cleaning"],"prefix":"10.1016","volume":"26","author":[{"given":"Wai","family":"Lup Low","sequence":"first","affiliation":[]},{"given":"Mong","family":"Li Lee","sequence":"additional","affiliation":[]},{"given":"Tok","family":"Wang Ling","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S0306-4379(01)00041-2_BIB1","unstructured":"R. Kimball, Dealing with dirty data, DBMS Online, Available at URL http:\/\/www.dbmsmag.com\/9609d14.htm, 1996."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB2","unstructured":"Infoshare Limited, Best value guide to data standardising, InfoDB, Available at URL http:\/\/www.infoshare.ltd.uk, 1998."},{"issue":"1","key":"10.1016\/S0306-4379(01)00041-2_BIB3","first-page":"52","article-title":"Database research: achievements and opportunities into the 21st century","volume":"25","author":"Silberschatz","year":"1996","journal-title":"SIGMOD Record (ACM Special Interest Group on Management of Data)"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB4","unstructured":"D. Calvanese, G. de Giacomo, M. Lenzerini, D. Nardi, R. Rosati, A principled approach to data integration and reconciliation in data warehousing, in: Proceedings of the International Workshop on Design and Management of Data Warehouses (DMDW\u201999), 1999, Heidelberg, Germany."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB5","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1016\/S0169-023X(97)00004-9","article-title":"Using linguistic knowledge in view integration: toward a third generation of tools","volume":"23","author":"Metais","year":"1997","journal-title":"Data Knowledge Eng."},{"issue":"2","key":"10.1016\/S0306-4379(01)00041-2_BIB6","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1023\/A:1008683107812","article-title":"The TSIMMIS approach to mediation: data models and languages","volume":"8","author":"Papakonstantinou","year":"1997","journal-title":"J. Intell. Inform. Systems"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB7","doi-asserted-by":"crossref","unstructured":"S. Bressan, Cheng Hian Goh, K. Fynn, M. Jakobisiak, K. Hussein, H.B. Kon, T. Lee, S.E. Madnick, T. Pena, J. Qu, A.W. Shum, M. Siegel, The COntext INterchange Mediator Prototype, in: J. Peckham, (Ed.), Proceedings of the 1997 ACM SIGMOD International Conference on Management of Data, Tucson, Arizona, 1997, pp. 525\u2013527.","DOI":"10.1145\/253260.253389"},{"issue":"4","key":"10.1016\/S0306-4379(01)00041-2_BIB8","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1145\/27633.27634","article-title":"A comparative analysis of methodologies for database schema integration","volume":"18","author":"Batini","year":"1986","journal-title":"Comput. Surveys"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB9","doi-asserted-by":"crossref","unstructured":"Mong Li Lee, Tok Wang Ling, Resolving constraint conflicts in the integration of entity-relationship schemas, in: Proceedings of the 16th International Conference on Conceptual Modeling, Los Angeles, California, USA, November 1997, pp. 394\u2013407.","DOI":"10.1007\/3-540-63699-4_32"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB10","unstructured":"B. Kilss, W. Alvey, Record linkage techniques, in: Proceedings of the Workshop on Exact Matching Methodologies, Arlington, Virginia, Dept of the Treasury, Internal Revenue Service, Statistics of Income Division, 1985."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB11","unstructured":"L. Moss, Data cleansing: a dichotomy of data warehousing? DM Review, Available at URL http:\/\/www.dmreview.com\/editorial\/dmreview\/print_action.cfm?EdID=8284, 1998."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB12","doi-asserted-by":"crossref","unstructured":"G. Wiederhold, Intelligent integration of information, in: P. Buneman, S. Jajodia (Eds.), Proceedings of the 1993 ACM SIGMOD International Conference on Management of Data, Washington, DC 1993, pp. 434\u2013437.","DOI":"10.1145\/170035.170118"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB13","doi-asserted-by":"crossref","unstructured":"Mong Li Lee, Hongjun Lu, Tok Wang Ling, Yee Teng Ko, Cleansing data for mining and warehousing, in: Proceedings of the 10th International Conference on Database and Expert Systems Applications (DEXA99), 1999, pp. 751\u2013760.","DOI":"10.1007\/3-540-48309-8_70"},{"issue":"2","key":"10.1016\/S0306-4379(01)00041-2_BIB14","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1145\/319983.319987","article-title":"Duplicate record elimination in large data files","volume":"8","author":"Bitton","year":"1983","journal-title":"ACM Trans. Database Systems"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB15","doi-asserted-by":"crossref","unstructured":"M.A. Hern\u00e1ndez, S.J. Stolfo, The merge\/purge problem for large databases, in: M.J. Carey, D.A. Schneider (Eds.), Proceedings of the 1995 ACM SIGMOD International Conference on Management of Data, San Jose, California, 1995, pp. 127\u2013138.","DOI":"10.1145\/223784.223807"},{"issue":"1","key":"10.1016\/S0306-4379(01)00041-2_BIB16","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1023\/A:1009761603038","article-title":"Real-world data is dirty: data cleansing and the merge\/purge problem","volume":"2","author":"Hern\u00e1ndez","year":"1998","journal-title":"Data Mining Knowledge Discovery"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB17","unstructured":"M. Hernandez, A generalization of band joins and the merge\/purge Problem, Technical Report CUCS-005-1995, Department of Computer Science, Columbia University, 1996."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB18","unstructured":"A.E. Monge, C.P. Elkan, An efficient domain-independent algorithm for detecting approximately duplicate database records, in: Proceedings of the ACM-SIGMOD Workshop on Research Issues on Knowledge Discovery and Data Mining. Tucson, AZ, 1997."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB19","unstructured":"M. Jo Waller, A comparison of two incremental merge\/purge strategies, Master Thesis, University of Illinois, 1998."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB20","doi-asserted-by":"crossref","unstructured":"Y.R. Wang, S.E. Madnick, The inter-database instance identification problem in integrating autonomous systems, in: Proceedings of the Fifth International Conference on Data Engineering, February 6\u201310, 1989, Los Angeles, California, USA, IEEE Computer Society, Silver Spring, MD, 1999, pp. 46\u201355.","DOI":"10.1109\/ICDE.1989.47199"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB21","unstructured":"A.E. Monge, C.P. Elkan, The field matching problem: algorithms and applications, in: E. Simoudis, Jia Wei Han, U. Fayyad (Eds.), Proceedings of the Second International Conference on Knowledge Discovery and Data Mining (KDD-96), AAAI Press, 1996, p. 267, Portland, Oregon, USA."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB22","doi-asserted-by":"crossref","unstructured":"W.W. Cohen, Integration of heterogeneous databases without common domains using queries based on textual similarity, in: L.M. Haas, A. Tiwary (Eds.), SIGMOD 1998, Proceedings ACM SIGMOD International Conference on Management of Data, June 2\u20134, 1998, Seattle, Washington, USA, ACM Press, New York, 1998, pp. 201\u2013212.","DOI":"10.1145\/276304.276323"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB23","unstructured":"V. Raman, J.M. Hellerstein, Potters wheel: an interactive framework for data cleaning and transformation, http:\/\/control.cs.berkeley.edu\/abc\/index.html, 2000."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB24","unstructured":"D.S.H. Galhardas, D. Florescu, E. Simon, An extensible framework for data cleaning, INRIA Technical Report, 1999."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB25","unstructured":"E.J. Friedman-Hill, JESS, the Java Expert System Shell, Available at URL http:\/\/herzberg.ca.sandia.gov\/jess\/, 1999."},{"issue":"1","key":"10.1016\/S0306-4379(01)00041-2_BIB26","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1016\/0004-3702(82)90020-0","article-title":"Rete: a fast algorithm for the many patterns\/many objects match problem","volume":"19","author":"Forgy","year":"1982","journal-title":"Artif. Intell."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB27","series-title":"Expert Systems: Principles and Programming","author":"Giarratano","year":"1998"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB28","unstructured":"G. Riley, CLIPS: A tool for building expert systems, Available at URL http:\/\/www.ghg.net\/clips\/CLIPS.html, 1999."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB29","doi-asserted-by":"crossref","unstructured":"H. Hinrichs, K. Panienski, Experiences with knowledge-based data cleansing at the epidemiological cancer registry of lower-saxony, in: F. Puppe (Ed.), XPS-99: Knowledge-Based Systems. Survey and Future Directions, Springer-Verlag, Germany, March 1999, pp. 218\u2013225.","DOI":"10.1007\/10703016_18"},{"key":"10.1016\/S0306-4379(01)00041-2_BIB30","unstructured":"A. Maydanchik, Challenges of efficient data cleansing, DM Review, Available at URL http:\/\/www.dmreview.com\/editorial\/dmreview\/print_action.cfm?EdID=1403, 1999."},{"key":"10.1016\/S0306-4379(01)00041-2_BIB31","unstructured":"E.-P. Lim, J. Srivastava, S. Shekhar, Resolving attribute incompatibility in database integration: an evidential reasoning approach, in: A.K. Elmagarmid, E. Neuhold, (Eds.), Proceedings of the 10th International Conference on Data Engineering, Houston, TX, IEEE Computer Society Press, Silver Spring, MD, 1994, pp. 154\u2013165."}],"container-title":["Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306437901000412?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306437901000412?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T13:09:07Z","timestamp":1733404147000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0306437901000412"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001,12]]},"references-count":31,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2001,12]]}},"alternative-id":["S0306437901000412"],"URL":"https:\/\/doi.org\/10.1016\/s0306-4379(01)00041-2","relation":{},"ISSN":["0306-4379"],"issn-type":[{"value":"0306-4379","type":"print"}],"subject":[],"published":{"date-parts":[[2001,12]]}}}