{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T06:58:49Z","timestamp":1775890729846,"version":"3.50.1"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2003,1,1]],"date-time":"2003-01-01T00:00:00Z","timestamp":1041379200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2003,1,1]],"date-time":"2003-01-01T00:00:00Z","timestamp":1041379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Data Mining and Knowledge Discovery"],"published-print":{"date-parts":[[2003,1]]},"DOI":"10.1023\/a:1021564703268","type":"journal-article","created":{"date-parts":[[2003,3,20]],"date-time":"2003-03-20T20:44:11Z","timestamp":1048193051000},"page":"81-99","source":"Crossref","is-referenced-by-count":213,"title":["A Taxonomy of Dirty Data"],"prefix":"10.1007","volume":"7","author":[{"given":"Won","family":"Kim","sequence":"first","affiliation":[]},{"given":"Byoung-Ju","family":"Choi","sequence":"additional","affiliation":[]},{"given":"Eui-Kyeong","family":"Hong","sequence":"additional","affiliation":[]},{"given":"Soo-Kyung","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Doheon","family":"Lee","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"5107142_CR1","unstructured":"The Applied Technology Group. 1998. Building a successful CRM environment. White Paper, The Applied Technology Group, available at http:\/\/www.techguide.com\/."},{"key":"5107142_CR2","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1145\/291469.291471","volume":"42","author":"D. Ballou","year":"1999","unstructured":"Ballou, D. and Tayi, G.K. 1999. Enhancing data quality in data warehouse environments. Communications of the ACM, 42(1):73\u201378.","journal-title":"Communications of the ACM"},{"key":"5107142_CR3","volume-title":"Data Mining Techniques for Marketing, Sales and Customer Support","author":"M. Berry","year":"1997","unstructured":"Berry, M. and Linoff, G. 1997. Data Mining Techniques for Marketing, Sales and Customer Support. New York: John Wiley and Sons."},{"key":"5107142_CR4","unstructured":"Berson, A. and Smith, S. 1997. Data Warehousing, Data Mining, and OLAP (Data Warehousing\/Data Management). Computing, McGraw-Hill."},{"key":"5107142_CR5","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1016\/0165-0114(82)90052-5","volume":"7","author":"B. Buckles","year":"1982","unstructured":"Buckles, B. and Petry, E. 1982. A fuzzy representation of data for relational databases. Fuzzy Sets and Systems, 7:213\u2013226.","journal-title":"Fuzzy Sets and Systems"},{"key":"5107142_CR6","doi-asserted-by":"crossref","unstructured":"Codd, E.F. 1979. Extending the database relational model to capture more meaning. ACM Transaction on Database Systems, 4(4).","DOI":"10.1145\/320107.320109"},{"key":"5107142_CR7","first-page":"150","volume":"31","author":"Cutter Information Corporation.","year":"1998","unstructured":"Cutter Information Corporation. 1998. Data management strategies newsletter on the state of the data warehousing industry. Management Science, 31:150\u2013162.","journal-title":"Management Science"},{"key":"5107142_CR8","volume-title":"Relational Database Writing 1994\u20131997","author":"C. Date","year":"1998","unstructured":"Date, C. 1998. Faults and defaults. In Relational Database Writing 1994\u20131997 (tin five parts). (C.J. Date, H. Darwen, and D. McGoveran (Eds.)). Reading, MA: Addison-Wesley."},{"key":"5107142_CR9","volume-title":"An Introduction to Database Systems","author":"C. Date","year":"2000","unstructured":"Date, C. 2000.An Introduction to Database Systems, 7th edn. Reading, MA: Addison-Wesley.","edition":"7th"},{"key":"5107142_CR10","doi-asserted-by":"crossref","unstructured":"Dey, D. and Sarkar, S. 1996.Aprobabilistic relational model and algebra. ACM Transactions on Database Systems, 21(3).","DOI":"10.1145\/232753.232796"},{"key":"5107142_CR11","volume-title":"Improving Data Warehouse and Business Information Quality-Method for Reducing Costs and Increasing Profits","author":"L. English","year":"1999","unstructured":"English, L. 1999. Improving Data Warehouse and Business Information Quality-Method for Reducing Costs and Increasing Profits. New York: Wiley."},{"key":"5107142_CR12","doi-asserted-by":"crossref","unstructured":"Etzion, O., Jajodia, S., and Sripada, S. (Eds.). 1998. Temporal Databases: Research and Practice, Lecture Notes in Computer Science, Vol. 1399. Berlin: Springer-Verlag.","DOI":"10.1007\/BFb0053695"},{"key":"5107142_CR13","unstructured":"First Logic Inc. Customer data quality\u2014Building the foundation for a one-to-one customer relationship. White Paper, available at http:\/\/www.firstlogic.com\/."},{"key":"5107142_CR14","doi-asserted-by":"crossref","first-page":"471","DOI":"10.1016\/S0165-0114(99)00156-6","volume":"121","author":"J. Galindo","year":"2001","unstructured":"Galindo, J., Medina, J.M., and Aranda-Garrido, M. 2001. Fuzzy division in fuzzy relational databases: An approach. Fuzzy Sets and Systems, 121:471\u2013490.","journal-title":"Fuzzy Sets and Systems"},{"key":"5107142_CR15","unstructured":"Golfarelli, M. and Rizzi, S. 1999. Designing the data warehouse: Key steps and crucial issues. Journal of Computer Science and Information Management, 2(3)."},{"key":"5107142_CR16","volume-title":"Transaction Processing: Concepts and Techniques","author":"J. Gray","year":"1993","unstructured":"Gray, J. and Reuter, A. 1993. Transaction Processing: Concepts and Techniques. San Mateo, CA: Morgan Kaufmann."},{"key":"5107142_CR17","unstructured":"IBM NUMA-Q. 1999. Modeling customer relationship. White Paper, available at http:\/\/www.sequent.com\/solutions\/crm\/whitepapers\/mcr wp.html."},{"key":"5107142_CR18","volume-title":"Building the Data Warehouse","author":"W.H. Inmon","year":"1996","unstructured":"Inmon, W.H. 1996. Building the Data Warehouse. New York: John Wiley."},{"key":"5107142_CR19","volume-title":"Data Warehouse Performance","author":"W.H. Inmon","year":"1999","unstructured":"Inmon, W.H. 1999. Data Warehouse Performance. New York: John Wiley."},{"key":"5107142_CR20","doi-asserted-by":"crossref","unstructured":"Kim, W. and Seo, J.Y. 1991. On classifying schematic and data heterogeneity in multi database systems. IEEE Computer, 24(12).","DOI":"10.1109\/2.116884"},{"issue":"1","key":"5107142_CR21","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1007\/BF01263333","volume":"1","author":"W. Kim","year":"1993","unstructured":"Kim, W., Choi, I.J., Gala, S., and Scheevel, M. 1993. On resolving schema heterogeneity in multi database systems.Distributed and Parallel Databases, 1(1):251\u2013279.","journal-title":"Distributed and Parallel Databases"},{"key":"5107142_CR22","unstructured":"Kim, W. 1995. Modern Database Systems. ACM Press, 1995."},{"issue":"6","key":"5107142_CR23","first-page":"40","volume":"12","author":"W. Kim","year":"1999","unstructured":"Kim, W., Chae, K.J., Cho, D.S., Choi, B.J., Kim, M., Lee, K.H., Lee, M.J., Lee, S.H., Park, S.S., and Yong, H.S. 1999. A component-based knowledge engineering architecture. Journal of Object-Oriented Programming, 12(6):40\u201348.","journal-title":"Journal of Object-Oriented Programming"},{"key":"5107142_CR24","volume-title":"The Data Warehouse Lifecycle Toolkit: Expert Methods for Designing, Developing, and Deploying Data Warehouses","author":"R. Kimball","year":"1998","unstructured":"Kimball, R. et al. 1998. The Data Warehouse Lifecycle Toolkit: Expert Methods for Designing, Developing, and Deploying Data Warehouses. New York: John Wiley."},{"key":"5107142_CR25","series-title":"A.P.I.C. Series","volume-title":"Fundamentals of Spatial Information Systems","author":"R. Laurini","year":"1993","unstructured":"Laurini, R. and Thompson, D. 1993. Fundamentals of Spatial Information Systems, A.P.I.C. Series no. 37. SanDiego, CA: Academic Press."},{"key":"5107142_CR26","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1016\/S0165-0114(98)00294-2","volume":"117","author":"O. Maimon","year":"2001","unstructured":"Maimon, O., Kandel, A., and Last, M. 2001. Information-theoretic fuzzy approach to data reliability and data mining. Fuzzy Sets and Systems, 117:183\u2013194.","journal-title":"Fuzzy Sets and Systems"},{"key":"5107142_CR27","unstructured":"Olson, J. Data profiling. White Paper, Evoke Software Corporation, available at http:\/\/www.evokesoft.com\/products\/ProdWPDP.html."},{"key":"5107142_CR28","unstructured":"Ooi, B. 1990. Efficient Query Processing in Geographic Information Systems, Lecture Notes in Computer Science. Berlin: Springer-Verlag."},{"key":"5107142_CR29","unstructured":"SAS Institute Inc. 1999. Finding the solution to data mining\u2014A map of the features and components of SAS enterprise miner software version 3. White Paper, available at http:\/\/www.sas.com."},{"key":"5107142_CR30","volume-title":"Lecture Notes in Computer Science","author":"M. Schneider","year":"1997","unstructured":"Schneider, M. 1997. Spatial Data Types for Database Systems: Finite Resolution Geometry for Geographic Information Systems, Lecture Notes in Computer Science, Vol. 1288. Berlin: Springer-Verlag."},{"key":"5107142_CR31","volume-title":"Database System Concepts","author":"A. Silberschatz","year":"1997","unstructured":"Silberschatz, A., Korth, H., and Sudarchan, S. 1997. Database System Concepts. New York: McGraw-Hill."},{"key":"5107142_CR32","volume-title":"The TSQL2 Temporal Query Language","year":"1995","unstructured":"Snodgrass, R. (Ed). 1995. The TSQL2 Temporal Query Language. Boston, MA: Kluwer Academic Publishers."},{"key":"5107142_CR33","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1016\/S0165-0114(98)00152-3","volume":"117","author":"M.I. Sozat","year":"2001","unstructured":"Sozat, M.I. and Yazici, A. 2001. A complete axiomatization for fuzzy functional and multivalued dependencies in fuzzy database relations. Fuzzy Sets and Systems, 117:161\u2013181.","journal-title":"Fuzzy Sets and Systems"},{"key":"5107142_CR34","unstructured":"Stokes, M.E., Davis, C.S., and Koch, G.G. 1995. Categorical Data Analysis Using the SAS System. SAS Institute."},{"key":"5107142_CR35","volume-title":"Object-Relational DBMSs: The Next Great Wave","author":"M. Stonebraker","year":"1996","unstructured":"Stonebraker, M. 1996. Object-Relational DBMSs: The Next Great Wave. San Mateo, CA: Morgan Kaufmann Publishers."},{"key":"5107142_CR36","unstructured":"TechGuide-1. The Technology Guide Series. A practical guide to achieving enterprise data quality\u2014Trillium software. White Paper, available at http:\/\/www.techguide.com\/."},{"key":"5107142_CR37","unstructured":"TechGuide-2. The Technology Guide Series. Achieving business success through customer relationship management (CRM)\u2014Mosaix. White Paper, available at http:\/\/www.techguide.com\/."},{"issue":"3","key":"5107142_CR38","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1145\/319732.319734","volume":"7","author":"I. Traiger","year":"1982","unstructured":"Traiger, I., Gray, J., Galtieri, C.A., and Lindsay, B. 1982. Transactions and consistency in distributed database systems. ACM Trans. Database Systems, 7(3):323\u2013342.","journal-title":"ACM Trans. Database Systems"},{"key":"5107142_CR39","unstructured":"Trillium Software System. 1998. A practical guide to achieving enterprise data quality. White Paper, available at http:\/\/www.trilliumsoft.com\/."},{"key":"5107142_CR40","unstructured":"Vality Technology Inc. The five legacy data contaminants you will encounter in your warehouse migration. White Paper, available at http:\/\/www.vality.com\/."},{"issue":"4","key":"5107142_CR41","doi-asserted-by":"crossref","first-page":"623","DOI":"10.1109\/69.404034","volume":"7","author":"R. Wang","year":"1995","unstructured":"Wang, R., Storey, V., and Firth, C. 1995. A framework for analysis of data quality research. IEEE Transactions on Knowledge and Engineering, 7(4):623\u2013640.","journal-title":"IEEE Transactions on Knowledge and Engineering"},{"key":"5107142_CR42","volume-title":"Data Mining Solutions: Methods and Tools for Solving Real-World Problems","author":"C. Westphal","year":"1998","unstructured":"Westphal, C. and Blaxton, T. 1998. Data Mining Solutions: Methods and Tools for Solving Real-World Problems. New York: John Wiley."},{"key":"5107142_CR43","unstructured":"Williams, J. 1997. Tools for traveling data. In DBMS. Miller Freeman."},{"key":"5107142_CR44","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1016\/0020-0255(85)90008-8","volume":"37","author":"M. Zemankova","year":"1985","unstructured":"Zemankova, M. and Kandel, A. 1985. Implementing imprecision in information systems. Information Sciences, 37:107\u2013141.","journal-title":"Information Sciences"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1021564703268.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1021564703268\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1021564703268.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T03:02:50Z","timestamp":1752462170000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1021564703268"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003,1]]},"references-count":44,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2003,1]]}},"alternative-id":["5107142"],"URL":"https:\/\/doi.org\/10.1023\/a:1021564703268","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2003,1]]}}}