{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,14]],"date-time":"2025-10-14T11:22:16Z","timestamp":1760440936344},"publisher-location":"Boston, MA","reference-count":50,"publisher":"Springer US","isbn-type":[{"type":"print","value":"9780387098227"},{"type":"electronic","value":"9780387098234"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-0-387-09823-4_2","type":"book-chapter","created":{"date-parts":[[2010,9,9]],"date-time":"2010-09-09T11:02:00Z","timestamp":1284030120000},"page":"19-32","source":"Crossref","is-referenced-by-count":16,"title":["Data Cleansing: A Prelude to Knowledge Discovery"],"prefix":"10.1007","author":[{"given":"Jonathan I.","family":"Maletic","sequence":"first","affiliation":[]},{"given":"Andrian","family":"Marcus","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,7,7]]},"reference":[{"key":"2_CR1","first-page":"37","volume-title":"Outlier detection for high dimensional data","author":"C. C. Aggarwal","year":"2001","unstructured":"Aggarwal, C. C. & Yu, P. S. Outlier detection for high dimensional data. Proceedings of ACM SIGMOD international Conference on Management of Data; 2001 May 21-24; Santa Barbara, CA. 37-46."},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Agrawal, R., Imielinski, T., & Swami, A. Mining Association rules between Sets of Items in Large Databases. Proceedings of ACM SIGMOD International Conference on Management of Data; 1993 May; Washington D.C. 207-216.","DOI":"10.1145\/170035.170072"},{"issue":"1","key":"2_CR5","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1145\/291469.291471","volume":"42","author":"D. P. Ballou","year":"1999","unstructured":"Ballou, D. P. & Tayi, G. K. Enhancing Data Quality in DataWarehouse Environments, Communications of the ACM 1999; 42(1):73-78.","journal-title":"Communications of the ACM"},{"key":"2_CR6","unstructured":"Barnett, V. & Lewis, T., Outliers in Statistical Data. John Wiley and Sons, 1994."},{"key":"2_CR7","first-page":"464","volume-title":"Data Cleansing for Fiscal Services: The Taviano Project","author":"M. A. Bochicchio","year":"2003","unstructured":"Bochicchio, M. A. & Longo, A. Data Cleansing for Fiscal Services: The Taviano Project. Proceedings of 5th International Conference on Enterprise Information Systems; 2003 April 22-26; Angers, France. 464-467."},{"key":"2_CR8","unstructured":"Brachman, R. J., Anand, T., The Process of Knowledge Discovery in Databases \u2014 A Human\u2013Centered Approach. In Advances in Knowledge Discovery and Data Mining, Fayyad, U. M., Piatetsky-Shapiro, G., Smyth, P., & Uth-urasamy, R., eds. MIT Press\/AAAI Press, 1996."},{"issue":"2","key":"2_CR12","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1145\/980972.980996","volume":"5","author":"M. Cadot","year":"2003","unstructured":"Cadot, M. & di Martion, J. A data cleaning solution by Perl scripts for the KDD Cup 2003 task 2, ACM SIGKDD Explorations Newsletter 2003; 5(2):158-159.","journal-title":"ACM SIGKDD Explorations Newsletter"},{"key":"2_CR13","first-page":"313","volume-title":"Robust and efficient fuzzy match for online data cleaning","author":"S. Chaudhuri","year":"2003","unstructured":"Chaudhuri, S., Ganjam, K., Ganti, V., & Motwani, R. Robust and efficient fuzzy match for online data cleaning. Proceedings of ACM SIGMOD International Conference on Management of Data; 2003 june 9-12; San Diego, CA. 313-324."},{"key":"2_CR14","unstructured":"Dasu, T., Vesonder, G. T., & Wright, J. R. Data quality through knowledge engineering."},{"key":"2_CR15","unstructured":"Proceedings of ACM SIGKDD International Conference on Knowledge Discovery and Data Mining; 2003 August 24-27; Washington, D.C. 705-710."},{"key":"2_CR17","unstructured":"Fayyad, U. M., Piatetsky-Shapiro, G., & Smyth, P., From Data Mining to Knowledge Discovery: An Overview. In Advances in Knowledge Discovery and Data Mining, Fayyad,"},{"key":"2_CR19","unstructured":"U. M., Piatetsky-Shapiro, G., Smyth, P., & Uthurasamy, R., eds. MIT Press\/AAAI Press, 1996."},{"issue":"2","key":"2_CR21","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1145\/980972.981004","volume":"5","author":"U. M. Fayyad","year":"2003","unstructured":"Fayyad, U. M., Piatetsky-Shapiro, G., & Uthurasamy, R. Summary from the KDD-03 Panel - Data Mining: The Next 10 Years, ACM SIGKDD Explorations Newsletter 2003; 5(2):191-196.","journal-title":"ACM SIGKDD Explorations Newsletter"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Feekin, A. & Chen, Z. Duplicate detection using k-way sorting method. Proceedings of ACM Symposium on Applied Computing; 2000 Como, Italy. 323-327.","DOI":"10.1145\/335603.335778"},{"issue":"1","key":"2_CR24","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1016\/0306-4573(94)90020-5","volume":"30","author":"C. Fox","year":"1994","unstructured":"Fox, C., Levitin, A., & Redman, T. The Notion of Data and Its Quality Dimensions, InformationProcessing and Management 1994; 30(1):9-19.","journal-title":"InformationProcessing and Management"},{"key":"2_CR25","volume-title":"Data Cleaning: Model, Language and Algoritmes","author":"H. Galhardas","year":"2001","unstructured":"Galhardas, H. Data Cleaning: Model, Language and Algoritmes. University of Versailles, Saint-Quentin-En-Yvelines, Ph.D., 2001."},{"key":"2_CR26","unstructured":"Guyon, I., Matic, N., & Vapnik, V., Discovering Information Patterns and Data Cleaning. In Advances in Knowledge Discovery and Data Mining, Fayyad, U. M., Piatetsky-Shapiro, G., Smyth, P., & Uthurasamy, R., eds. MIT Press\/AAAI Press, 1996."},{"key":"2_CR29","volume-title":"Coding and Information Theory","author":"R. W. Hamming","year":"1980","unstructured":"Hamming, R. W., Coding and Information Theory. New Jersey, Prentice-Hall, 1980."},{"key":"2_CR30","doi-asserted-by":"crossref","first-page":"170","DOI":"10.1007\/3-540-46145-0_17","volume":"04-06","author":"S. Hawkins","year":"2002","unstructured":"Hawkins, S., He, H., Williams, G. J., & Baxter, R. A. Outlier Detection Using Replicator Neural Networks. Proceedings of 4th International Conference on Data Warehousing and Knowledge Discovery; 2002 September 04-06; 170-180.","journal-title":"Proceedings of 4th International Conference on Data Warehousing and Knowledge Discovery"},{"issue":"1","key":"2_CR31","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1023\/A:1009761603038","volume":"2","author":"M. Hernandez","year":"1998","unstructured":"Hernandez, M. & Stolfo, S. Real-world Data is Dirty: Data Cleansing and The Merge\/Purge Problem, Data Mining and Knowledge Discovery 1998; 2(1):9-37.","journal-title":"Data Mining and Knowledge Discovery"},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Johnson, R. A. & Wichern, D. W., Applied Multivariate Statistical Analysis. Prentice Hall, 1998.","DOI":"10.2307\/2533879"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Kaufman, L. & Rousseauw, P. J., Finding Groups in Data: An Introduction to Cluster Analysis. John Wiley & Sons, 1990.","DOI":"10.1002\/9780470316801"},{"issue":"1","key":"2_CR36","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1023\/A:1021564703268","volume":"7","author":"W. Kim","year":"2003","unstructured":"Kim, W., Choi, B.-J., Hong, E.-K., Kim, S.-K., & Lee, D. A taxonomy of dirty data, Data Mining and Knowledge Discovery 2003; 7(1):81-99.","journal-title":"Data Mining and Knowledge Discovery"},{"issue":"10","key":"2_CR37","first-page":"55","volume":"9","author":"R. Kimball","year":"1996","unstructured":"Kimball, R. Dealing with Dirty Data, DBMS 1996; 9(10):55-60.","journal-title":"DBMS"},{"key":"2_CR38","unstructured":"Knorr, E. M. & Ng, R. T. Algorithms for Mining Distance-Based Outliers in Large Datasets. Proceedings of 24th International Conference on Very Large Data Bases; 1998 New York. 392-403."},{"issue":"3-4","key":"2_CR42","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/s007780050006","volume":"8","author":"E. M. Knorr","year":"2000","unstructured":"Knorr, E. M., Ng, R. T., & Tucakov, V. Distance-based outliers: algorithms and applications, The International Journal on Very Large Data Bases 2000; 8(3-4):237-253.","journal-title":"The International Journal on Very Large Data Bases"},{"key":"2_CR43","unstructured":"Korn, F., Labrinidis, A., Yannis, K., & Faloustsos, C. Ratio Rules: A New Paradigm for Fast, Quantifiable Data Mining. Proceedings of 24th VLDB Conference; 1998 New York. 582\u2013593."},{"key":"2_CR46","doi-asserted-by":"crossref","unstructured":"Lee, M. L., Ling, T. W., & Low, W. L. IntelliClean: a knowledge-based intelligent data cleaner. Proceedings of Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining; 2000 August 20-23; Boston, MA. 290-294.","DOI":"10.1145\/347090.347154"},{"issue":"4","key":"2_CR49","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1016\/0950-5849(93)90069-F","volume":"35","author":"A. Levitin","year":"1995","unstructured":"Levitin, A. & Redman, T. A Model of the Data (Life) Cycles with Application to Quality, Information and Software Technology 1995; 35(4):217-223.","journal-title":"Information and Software Technology"},{"key":"2_CR50","doi-asserted-by":"crossref","unstructured":"Li, Z., Sung, S. Y., Peng, S., & Ling, T. W. A New Efficient Data cleansing Method. Proceedings of Database and Expert Systems Applications (DEXA 2002); 2002 September 2-6; Aix-en-Provence, France. 484-493.","DOI":"10.1007\/3-540-46146-9_48"},{"key":"2_CR53","doi-asserted-by":"crossref","unstructured":"Maimon, O. and Rokach, L. Improving supervised learning by feature decomposition, Proceedings of the Second International Symposium on Foundations of Information and Knowledge Systems, Lecture Notes in Computer Science, Springer, 2002, 178-196","DOI":"10.1007\/3-540-45758-5_12"},{"key":"2_CR56","unstructured":"Maletic, J. I. & Marcus, A. Data Cleansing: Beynod Integrity Analysis. Proceedings of The Conference on Information Quality (IQ2000); 2000 October 20-22; Massachusetts Institute of Technology. 200-209."},{"key":"2_CR59","doi-asserted-by":"crossref","unstructured":"Marcus, A., Maletic, J. I., & Lin, K. I. Ordinal Association Rules for Error Identification in Data Sets. Proceedings of Tenth International Conference on Information and Knowledge Management (CIKM 2001); 2001 November 3-5; Atlanta, GA. to appear.","DOI":"10.1145\/502585.502700"},{"issue":"4","key":"2_CR62","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1093\/comjnl\/26.4.354","volume":"26","author":"F. Murtagh","year":"1983","unstructured":"Murtagh, F. A Survey of Recent Advances in Hierarchical Clustering Algorithms, The Computer Journal 1983; 26(4):354-359.","journal-title":"The Computer Journal"},{"issue":"2","key":"2_CR63","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1145\/269012.269023","volume":"41","author":"K. Orr","year":"1998","unstructured":"Orr, K. Data Quality and Systems Theory, Communications of the ACM 1998; 41(2):66-71.","journal-title":"Communications of the ACM"},{"key":"2_CR64","unstructured":"Raman, V. & Hellerstein, J. M. Potter\u2019s wheel an interactive data cleaning system. Proceedings of 27th International Conference on Very Large Databases 2001 September 11-14; Rome, Italy. 381\u2013391."},{"key":"2_CR67","doi-asserted-by":"crossref","unstructured":"Ramaswamy, S., Rastogi, R., & Shim, K. Efficient Algorithms for Mining Outliers from Large Data Sets. Proceedings of ACM SIGMOD International Conference on Management of Data; 2000 Dallas. 427-438.","DOI":"10.1145\/342009.335437"},{"issue":"2","key":"2_CR70","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1145\/269012.269025","volume":"41","author":"T. Redman","year":"1998","unstructured":"Redman, T. The Impact of Poor Data Quality on the Typical Enterprise, Communications of the ACM 1998; 41(2):79-82.","journal-title":"Communications of the ACM"},{"key":"2_CR71","doi-asserted-by":"crossref","unstructured":"Rokach, L., Maimon, O. (2005), Clustering Methods, Data Mining and Knowledge Discovery Handbook, Springer, pp. 321-352.","DOI":"10.1007\/0-387-25465-X_15"},{"key":"2_CR73","unstructured":"Simoudis, E., Livezey, B., & Kerber, R., Using Recon for Data Cleaning. In Advances in Knowledge Discovery and Data Mining, Fayyad, U. M., Piatetsky-Shapiro, G., Smyth, P., & Uthurasamy, R., eds. MIT Press\/AAAI Press, 1995."},{"key":"2_CR76","first-page":"1","volume-title":"Mining Association Rules with Item Constraints","author":"R. Srikant","year":"1996","unstructured":"Srikant, R., Vu, Q., & Agrawal, R. Mining Association Rules with Item Constraints. Proceedings of SIGMOD International Conference on Management of Data; 1996 June; Montreal, Canada. 1-12."},{"issue":"5","key":"2_CR77","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1145\/253769.253804","volume":"40","author":"D. Strong","year":"1997","unstructured":"Strong, D., Yang, L., & Wang, R. Data Quality in Context, Communications of the ACM 1997; 40(5):103-110.","journal-title":"Communications of the ACM"},{"key":"2_CR78","doi-asserted-by":"crossref","unstructured":"Sung, S. Y., Li, Z., & Sun, P. A fast filtering scheme for large database cleansing. Proceedings of Eleventh ACM International Conference on Information and Knowledge Management; 2002 November 04-09; McLean, VA. 76-83.","DOI":"10.1145\/584792.584808"},{"issue":"10","key":"2_CR81","first-page":"595","volume":"30","author":"M. Svanks","year":"1984","unstructured":"Svanks, M. Integrity Analysis: Methods for Automating Data Quality Assurance, EDP Auditors Foundation 1984; 30(10):595-605.","journal-title":"EDP Auditors Foundation"},{"issue":"4","key":"2_CR82","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1109\/69.404034","volume":"7","author":"R. Wang","year":"1995","unstructured":"Wang, R., Storey, V., & Firth, C. A Framework for Analysis of Data Quality Research, IEEE Transactions on Knowledge and Data Engineering 1995; 7(4):623-639.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"4","key":"2_CR83","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1080\/07421222.1996.11518099","volume":"12","author":"R. Wang","year":"1996","unstructured":"Wang, R., Strong, D., & Guarascio, L. Beyond Accuracy: What Data Quality Means to Data Consumers, Journal of Management Information Systems 1996; 12(4):5-34.","journal-title":"Journal of Management Information Systems"},{"key":"2_CR84","unstructured":"Wang, R., Ziad, M., & Lee, Y. W., Data Quality. Kluwer, 2001."},{"key":"2_CR85","doi-asserted-by":"crossref","unstructured":"Yang, Y., Carbonell, J., Brown, R., Pierce, T., Archibald, B. T., & Liu, X. Learning Approaches for Detecting and Tracking News Events, IEEE Intelligent Systems 1999; 14(4).","DOI":"10.1109\/5254.784083"},{"issue":"4","key":"2_CR88","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1007\/s101150200013","volume":"4","author":"D. Yu","year":"2002","unstructured":"Yu, D., Sheikholeslami, G., & Zhang, A. FindOut: Finding Outliers in Very Large Datasets, Knowledge and Information Systems 2002; 4(4):387-412.","journal-title":"Knowledge and Information Systems"},{"key":"2_CR89","doi-asserted-by":"crossref","unstructured":"Zhao, L., Yuan, S. S., Peng, S., & Ling, T. W. A new efficient data cleansing method. Proceedings of 13th International Conference on Database and Expert Systems Applications; 2002 September 02-06; 484-493.","DOI":"10.1007\/3-540-46146-9_48"}],"container-title":["Data Mining and Knowledge Discovery Handbook"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-387-09823-4_2.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T16:12:51Z","timestamp":1605629571000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-0-387-09823-4_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9780387098227","9780387098234"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-0-387-09823-4_2","relation":{},"subject":[],"published":{"date-parts":[[2009]]}}}