{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T13:15:18Z","timestamp":1773148518537,"version":"3.50.1"},"reference-count":68,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2006,3,28]],"date-time":"2006-03-28T00:00:00Z","timestamp":1143504000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2007,2,8]]},"DOI":"10.1007\/s10115-006-0006-x","type":"journal-article","created":{"date-parts":[[2006,3,27]],"date-time":"2006-03-27T15:26:47Z","timestamp":1143473207000},"page":"191-215","source":"Crossref","is-referenced-by-count":12,"title":["Data quality awareness: a case study for cost optimal association rule mining"],"prefix":"10.1007","volume":"11","author":[{"given":"Laure","family":"Berti-\u00c9quille","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2006,3,28]]},"reference":[{"key":"6_CR1","unstructured":"Avenali A, Batini C, Bertolazzi P, Missier P (2004) A formulation of the data quality optimization problem. In: Proceedings of the international CAiSE workhop on data and information quality (DIQ), Riga, Latvia, pp 49\u201363"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Ballou DP, Pazer H (1995) Designing information systems to optimize the accuracy-timeliness trade-off. Inf Syst Res 6(1)","DOI":"10.1287\/isre.6.1.51"},{"issue":"1","key":"6_CR3","first-page":"40","volume":"15","author":"DP Ballou","year":"2002","unstructured":"Ballou DP, Pazer H (2002) Modeling completeness versus consistency trade-offs in information decision contexts. IEEE Trans Knowl Data Eng (TDKE) 15(1):240\u2013243","journal-title":"IEEE Trans Knowl Data Eng (TDKE)"},{"key":"6_CR4","unstructured":"Batini C, Catarci T, Scannapiceco M (2004) A survey of data quality issues in cooperative information systems. In: Tutorial presented at the 23rd international conference on conceptual modeling (ER), Shanghai, China"},{"key":"6_CR5","unstructured":"Benjelloun O, Garcia-Molina H, Su Q, Widom J (2005) Swoosh: A generic approach to entity resolution. Technical Report, Stanford Database Group"},{"key":"6_CR6","unstructured":"Berti-\u00c9quille L, Moussouni F (2005) Quality-aware integration and warehousing of genomic data. In: Proceedings of the 10th international conference on information quality (IQ'05), MIT, Cambridge, USA"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Bilenko M, Mooney RJ (2003) Adaptive duplicate detection using learnable string similarity measures. In: Proceedings of the 9th ACM SIGKDD conference on knowledge discovery and data mining (KDD), Washington, DC, USA, pp 39\u201348","DOI":"10.1145\/956750.956759"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Bouzeghoub M, Peralta V (2004) A framework for analysis of data freshness. In: Proceedings of the 1st ACM SIGMOD workshop on information quality in information systems (IQIS), Paris, France, pp 59\u201367","DOI":"10.1145\/1012453.1012464"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Breunig M, Kriegel H, Ng R, Sander J (2000) LOF: Identifying density-based local outliers. In: Proceedings of 2000 ACM SIGMOD conference, Dallas, TX, USA, pp 93\u2013104","DOI":"10.1145\/342009.335388"},{"key":"6_CR10","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1016\/0378-7206(80)90035-X","volume":"3","author":"ML Brodie","year":"1980","unstructured":"Brodie ML (1980) Data quality in information systems. Inform Manage 3:245\u2013258","journal-title":"Inform Manage"},{"key":"6_CR11","unstructured":"Celko J, McDonald J (1995) Don't warehouse dirty data. Datamation 41(18)"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Chaudhuri S, Ganjam K, Ganti V, Motwani R (2003) Robust and efficient fuzzy match for online data cleaning. In: Proceedings of the 2003 ACM SIGMOD international conference on management of data, San Diego, CA, USA, pp 313\u2013324","DOI":"10.1145\/872757.872796"},{"key":"6_CR13","unstructured":"Cui Y, Widom J (2001) Lineage tracing for general data warehouse transformation. In: Proceedings of the 27th international conference on very large data bases (VLDB), Roma, Italy, September 11\u201314, pp 471\u2013480"},{"key":"6_CR14","doi-asserted-by":"crossref","DOI":"10.1002\/0471448354","volume-title":"Exploratory data mining and data cleaning","author":"T Dasu","year":"2003","unstructured":"Dasu T, Johnson T (2003) Exploratory data mining and data cleaning. Wiley, New York"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Dasu T, Johnson T, Muthukrishnan S, Shkapenyuk V (2002) Mining database structure or, how to build a data quality browser. In: Proceedings of the 2002 ACM SIGMOD international conference on management of data, Madison, WI, USA, pp 240\u2013251","DOI":"10.1145\/564691.564719"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"De Giacomo G, Lembo D, Lenzerini M, Rosati R (2004) Tackling inconsistencies in data integration through source preferences. In: Proceedings of the 1st ACM SIGMOD workshop on information quality in information systems (IQIS), Paris, France, pp 27\u201334","DOI":"10.1145\/1012453.1012459"},{"key":"6_CR17","doi-asserted-by":"crossref","first-page":"205","DOI":"10.1016\/0164-1212(92)90109-W","volume":"17","author":"G Delen","year":"1992","unstructured":"Delen G, Rijsenbrij D (1992) The specification, engineering and measurement of information systems quality. J Softw Syst 17:205\u2013217","journal-title":"J Softw Syst"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Elfeky MG, Verykios VS, Elmagarmid AK (2002) Tailor: A record linkage toolbox. In: Proceedings of the 19th international conference on data engineering (ICDE), San Jose, CA, USA, pp 1\u201328","DOI":"10.1109\/ICDE.2002.994694"},{"key":"6_CR19","volume-title":"Improving data warehouse and business information quality","author":"L English","year":"1998","unstructured":"English L (1998) Improving data warehouse and business information quality. Wiley, New York"},{"issue":"8","key":"6_CR20","first-page":"235","volume":"26","author":"K Fan","year":"2001","unstructured":"Fan K, Lu H, Madnick S, Cheung D (2001) Discovering and reconciling value conflicts for numerical data integration. Inform Syst 26(8):235\u2013656","journal-title":"Inform Syst"},{"key":"6_CR21","doi-asserted-by":"crossref","first-page":"1183","DOI":"10.1080\/01621459.1969.10501049","volume":"64","author":"IP Fellegi","year":"1969","unstructured":"Fellegi IP, Sunter AB (1969) A theory for record linkage. J Am Stat Assoc 64:1183-1210","journal-title":"J Am Stat Assoc"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Fox C, Levitin A, Redman T (1994) The notion of data and its quality dimensions. Information Processing and Management 30(1)","DOI":"10.1016\/0306-4573(94)90020-5"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Gravano L, Ipeirotis PG, Koudas N, Srivastava D (2003) Text joins in an RDBMS for web data integration. In: Proceedings of the 12th international world wide web conference (WWW), Budapest, Hungary, pp 90\u2013101","DOI":"10.1145\/775152.775166"},{"issue":"1","key":"6_CR24","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1023\/A:1009761603038","volume":"2","author":"M Hernandez","year":"1998","unstructured":"Hernandez M, Stolfo S (1998) Real-world data is dirty: Data cleansing and the merge\/purge problem. Data Min Knowl Discov 2(1):9\u201337","journal-title":"Data Min Knowl Discov"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Hou WC, Zhang Z (1995) Enhancing database correctness: A statistical approach. In: Proceedings of the 1995 ACM SIGMOD international conference on management of data, San Jose, CA, USA","DOI":"10.1145\/223784.223840"},{"key":"6_CR26","volume-title":"Quality information and knowledge management","author":"K Huang","year":"1999","unstructured":"Huang K, Lee Y, Wang R (1999) Quality information and knowledge management. Prentice Hall, New Jersey"},{"key":"6_CR27","doi-asserted-by":"crossref","unstructured":"Jarke M, Jeusfeld MA, Quix C, Vassiliadis P (1998) Architecture and quality in data warehouses. In: Proceedings of the 10th international conference on advanced information systems engineering (CAiSE), Pisa, Italy, pp 93\u2013113","DOI":"10.1007\/BFb0054221"},{"key":"6_CR28","unstructured":"Johnson T, Dasu T (1998) Comparing massive high-dimensional data sets. In: Proceedings of the 4th international conference KDD, New York City, New York, USA, pp 229\u2013233"},{"issue":"4","key":"6_CR29","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1145\/505248.506007","volume":"45","author":"B Kahn","year":"2002","unstructured":"Kahn B, Strong D, Wang R (2002) Information quality benchmark: Product and service performance. Com. ACM 45(4):184\u2013192","journal-title":"Com. ACM"},{"key":"6_CR30","unstructured":"Knorr E, Ng R (1998) Algorithms for mining distance-based outliers in large datasets. In: Proceedings of the 24th international conference on very large data bases (VLDB), New York City, USA, pp 392\u2013403"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Lavra\u010d N, Flach PA, Zupan B (1999) Rule evaluation measures: A unifying view. In: Proceedings of the international workshop on inductive logic programming (ILP), Bled, Slovenia, pp 174\u2013185","DOI":"10.1007\/3-540-48751-4_17"},{"key":"6_CR32","volume-title":"Data quality control: Theory and pragmatics","author":"G Liepins","year":"1990","unstructured":"Liepins G, Uppuluri V (1990) Data quality control: Theory and pragmatics. Marcel Dekker, New York"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Lim L, Srivastava J, Prabhakar S, Richardson J (1993) Entity identification in database integration. In: Proceedings of the 9th international conference on data engineering (ICDE), Vienna, Austria, pp 294\u2013301","DOI":"10.1109\/ICDE.1993.344053"},{"key":"6_CR34","volume-title":"Statistical analysis with missing data","author":"RJ Little","year":"1987","unstructured":"Little RJ, Rubin DB (1987) Statistical analysis with missing data. Wiley, New York"},{"key":"6_CR35","unstructured":"Liu L, Chi L (2002) Evolutionary data quality. In: Proceedings of the 7th international conference on information quality (IQ), MIT, Cambridge, USA"},{"key":"6_CR36","doi-asserted-by":"crossref","unstructured":"McCallum A, Nigam K, Ungar LH (2000) Efficient clustering of high-dimensional data sets with application to reference matching. In: Proceedings of the 6th ACM SIGKDD conference on knowledge discovery and data mining (KDD), Boston, MA, USA, pp 169\u2013178","DOI":"10.1145\/347090.347123"},{"key":"6_CR37","unstructured":"Mihaila GA, Raschid L, Vidal M (2000) Using quality of data metadata for source selection and ranking. In: Proceedings of the 3rd international WebDB workshop, Dallas, TX, USA, pp 93\u201398"},{"key":"6_CR38","unstructured":"Missier P, Batini C (2003) A multidimensional model for information quality in CIS. In: Proceedings of the 8th international conference on information quality (IQ), MIT, Cambridge, MA, USA"},{"issue":"4","key":"6_CR39","first-page":"14","volume":"23","author":"A Monge","year":"2000","unstructured":"Monge A (2000) Matching algorithms within a duplicate detection system. IEEE Data Eng Bull 23(4):14\u201320","journal-title":"IEEE Data Eng Bull"},{"key":"6_CR40","unstructured":"M\u00fcller H, Leser U, Freytag JC (2004) Mining for patterns in contradictory data. In: Proceedings of the 1st ACM SIGMOD workshop on information quality in information systems (IQIS) in conjunction with ACM PODS\/SIGMOD, Paris, France, pp 51\u201358"},{"key":"6_CR41","unstructured":"Naumann F, Leser U, Freytag J (1999) Quality-driven integration of heterogeneous information systems. In: Proceedings of the 25th international conference on very large data bases (VLDB), Edinburgh, Scotland, pp 447\u2013458"},{"key":"6_CR42","doi-asserted-by":"crossref","DOI":"10.1007\/3-540-45921-9","volume-title":"Quality-driven query answering for integrated information systems","author":"F Naumann","year":"2002","unstructured":"Naumann F (2002) Quality-driven query answering for integrated information systems. LNCS 2261, Springer, Berlin Heidelberg New York"},{"key":"6_CR43","unstructured":"Pasula H, Marthi B, Milch B, Russell S, Shpitser I (2003) Identity uncertainty and citation matching. In: Proceedings of the international conference advances in neural information processing systems (NIPS), Vancouver, British Colombia, pp 1401\u20131408"},{"key":"6_CR44","unstructured":"Pearson RK (2002) Data mining in face of contaminated and incomplete records. In: Proceedings of SIAM international conference on data mining"},{"key":"6_CR45","volume-title":"Data mining on multimedia","author":"P Perner","year":"2002","unstructured":"Perner P (2002) Data mining on multimedia. LNCS 2558, Springer, Berlin Heidelberg New York"},{"key":"6_CR46","unstructured":"Piattini M, Genero M, Calero C, Polo C, Ruiz F (2000) Database quality. Chapter 14: Advanced database technology and design. Artech House, Norwood, MA, pp 485\u2013509"},{"key":"6_CR47","doi-asserted-by":"crossref","unstructured":"Piattini, M, Calero C, Genero M (eds)(2002) Information and database quality. The Kluwer International Series on Advances in Database Systems, 25","DOI":"10.1007\/978-1-4615-0831-1"},{"key":"6_CR48","unstructured":"Pyle D (1999) Data preparation for data mining. Morgan Kaufmann, San Mateo, CA"},{"issue":"4","key":"6_CR49","first-page":"3","volume":"23","author":"E Rahm","year":"2000","unstructured":"Rahm E, Do H (2000) Data cleaning: Problems and current approaches. IEEE Data Eng Bull 23(4):3\u201313","journal-title":"IEEE Data Eng Bull"},{"key":"6_CR50","unstructured":"Raman V, Hellerstein JM (2001) Potter's wheel: An interactive data cleaning system. In: Proceedings of the 26th international conference on very large data bases (VLDB), Roma, Italy, pp 381\u2013390"},{"key":"6_CR51","unstructured":"Redman T (2001) Data quality: The field guide. Digital Press, Elsevier"},{"key":"6_CR52","unstructured":"Rothenberg J (1996) Metadata to support data quality and longevity. In: Proceedings of the 1st IEEE metadata conference, Silver Spring, MD"},{"key":"6_CR53","doi-asserted-by":"crossref","unstructured":"Santis LD, Scannapieco M, Catarci T (2003) Trusting data quality in cooperative information systems. In: Proceedings of the international conference on cooperative information systems (CoopIS), Catania, Sicily, Italy, pp 354\u2013369","DOI":"10.1007\/978-3-540-39964-3_23"},{"key":"6_CR54","unstructured":"Scannapieco M, Pernici B, Pierce E (2004) IP-UML: A methodology for quality improvement based on IP-MAP and UML. Advances in Management Information Systems-Information Quality Monograph (AMIS-IQ), Sharpe"},{"key":"6_CR55","doi-asserted-by":"crossref","DOI":"10.1201\/9781439821862","volume-title":"Analysis of incomplete multivariate data","author":"JL Schafer","year":"1997","unstructured":"Schafer JL (1997) Analysis of incomplete multivariate data. Chapman & Hall, London"},{"key":"6_CR56","unstructured":"Schlimmer J (1991) Learning determinations and checking databases. In: Proceedings of AAAI workshop on knowledge discovery in databases, AAAI\u20131991 Anaheim California"},{"key":"6_CR57","doi-asserted-by":"crossref","unstructured":"Tan P-N, Kumar V, Srivastava J (2002) Selecting the right interestingness measure for association patterns. In: Proceedings of the 8th ACM SIGKDD conference on knowledge discovery and data mining (KDD), Edmonton, Canada, pp 32\u201341","DOI":"10.1145\/775047.775053"},{"issue":"3","key":"6_CR58","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1142\/S0218843001000369","volume":"10","author":"D Theodoratos","year":"2001","unstructured":"Theodoratos D, Bouzeghoub M (2001) Data currency quality satisfaction in the design of a data warehouse. Special Issue on design and management of data warehouses. Int J Coop Inf Syst 10(3):299\u2013326","journal-title":"Special Issue on design and management of data warehouses. Int J Coop Inf Syst"},{"key":"6_CR59","doi-asserted-by":"crossref","unstructured":"Vassiliadis P, Bouzeghoub M, Quix C (1999) Towards quality-oriented data warehouse usage and evolution. In: Proceedings of the 11th international conference on advanced information systems engineering (CAiSE), Heidelberg, Germany, pp 164\u2013179","DOI":"10.1007\/3-540-48738-7_13"},{"key":"6_CR60","doi-asserted-by":"crossref","unstructured":"Vassiliadis P, Simitsis A, Georgantas P, Terrovitis M (2003) A framework for the design of ETL scenarios. In: Proceedings of the 15th international conference on advanced information systems engineering (CAiSE), Klagenfurt, Austria, pp 520\u2013535","DOI":"10.1007\/3-540-45017-3_35"},{"key":"6_CR61","unstructured":"Vassiliadis P (2000) Data warehouse modeling and quality issues. PhD thesis, Technical University of Athens, Greece"},{"key":"6_CR62","doi-asserted-by":"crossref","unstructured":"Wang R, Kon HB, Madnick SE (1993) Data quality requirements analysis and modeling. In: Proceedings of the 9th international conference on data engineering (ICDE), Vienna, Austria, pp 670\u2013677","DOI":"10.1109\/ICDE.1993.344012"},{"issue":"4","key":"6_CR63","first-page":"670","volume":"7","author":"R Wang","year":"1995","unstructured":"Wang R, Storey V, Firth C (1995) A framework for analysis of data quality research. IEEE Trans Knowl Data Eng (TDKE) 7(4):670\u2013677","journal-title":"IEEE Trans Knowl Data Eng (TDKE)"},{"issue":"2","key":"6_CR64","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1145\/269012.269022","volume":"41","author":"R Wang","year":"1998","unstructured":"Wang R (1998) A product perspective on total data quality management. Com. ACM 41(2):58\u201365","journal-title":"Com. ACM"},{"key":"6_CR65","volume-title":"Journey to data quality, vol 23 of Advances in database systems","author":"R Wang","year":"2002","unstructured":"Wang R (2002) Journey to data quality, vol 23 of Advances in database systems. Kluwer, Boston, MA, USA"},{"key":"6_CR66","doi-asserted-by":"crossref","unstructured":"Wang K, Zhou S, Yang Q, Yeung JMS (2005) Mining customer value: From association rules to direct marketing. J Data Min Knowl Discov","DOI":"10.1007\/s10618-005-1355-x"},{"key":"6_CR67","doi-asserted-by":"crossref","unstructured":"Weis M, Naumann F (2004) Detecting duplicate objects in XML documents. In: Proceedings of the 1st international ACM SIGMOD workshop on information quality in information systems (IQIS) in conjunction with ACM PODS\/SIGMOD, Paris, France, pp 10\u201319","DOI":"10.1145\/1012453.1012456"},{"key":"6_CR68","doi-asserted-by":"crossref","unstructured":"Winkler WE (2004) Methods for evaluating and creating data quality. Inf Syst 29(7)","DOI":"10.1016\/j.is.2003.12.003"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-006-0006-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10115-006-0006-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-006-0006-x","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,6]],"date-time":"2023-05-06T21:34:19Z","timestamp":1683408859000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10115-006-0006-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006,3,28]]},"references-count":68,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2007,2,8]]}},"alternative-id":["6"],"URL":"https:\/\/doi.org\/10.1007\/s10115-006-0006-x","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2006,3,28]]}}}