{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T09:22:08Z","timestamp":1778750528352,"version":"3.51.4"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2011,11,26]],"date-time":"2011-11-26T00:00:00Z","timestamp":1322265600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2013,1]]},"DOI":"10.1007\/s10618-011-0246-6","type":"journal-article","created":{"date-parts":[[2011,11,25]],"date-time":"2011-11-25T05:05:47Z","timestamp":1322197547000},"page":"130-173","source":"Crossref","is-referenced-by-count":18,"title":["Summarizing categorical data by clustering attributes"],"prefix":"10.1007","volume":"26","author":[{"given":"Michael","family":"Mampaey","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jilles","family":"Vreeken","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2011,11,26]]},"reference":[{"issue":"2","key":"246_CR1","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1109\/TCBB.2005.17","volume":"2","author":"W Au","year":"2005","unstructured":"Au W, Chan K, Wong A, Wang Y (2005) Attribute clustering for grouping, selection, and classification of gene expression data. IEEE\/ACM Trans Comput Biol Bioinform 2(2): 83\u2013101","journal-title":"IEEE\/ACM Trans Comput Biol Bioinform"},{"issue":"2","key":"246_CR2","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1016\/j.jbi.2004.08.009","volume":"38","author":"C Baumgartner","year":"2005","unstructured":"Baumgartner C, B\u00f6hm C, Baumgartner D (2005) Modelling of classification rules on metabolic patterns including machine learning and expert knowledge. Biomed Inform 38(2): 89\u201398","journal-title":"Biomed Inform"},{"key":"246_CR3","doi-asserted-by":"crossref","unstructured":"Bringmann B, Zimmermann A (2007) The chosen few: on identifying valuable patterns. In: Proceedings of the IEEE international conference on data mining (ICDM\u201907), IEEE, pp 63\u201372","DOI":"10.1109\/ICDM.2007.85"},{"issue":"1","key":"246_CR4","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1007\/s10618-006-0054-6","volume":"14","author":"T Calders","year":"2007","unstructured":"Calders T, Goethals B (2007) Non-derivable itemset mining. Data Min Knowl Discov 14(1): 171\u2013206","journal-title":"Data Min Knowl Discov"},{"key":"246_CR5","doi-asserted-by":"crossref","unstructured":"Chakrabarti D, Papadimitriou S, Modha DS, Faloutsos C (2004) Fully automatic cross-associations. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201904), pp 79\u201388","DOI":"10.21236\/ADA459025"},{"key":"246_CR6","doi-asserted-by":"crossref","unstructured":"Chandola V, Kumar V (2005) Summarization\u2014compressing data into an informative representation. In: Proceedings of the IEEE international conference on data mining (ICDM\u201905), IEEE, pp 98\u2013105","DOI":"10.1109\/ICDM.2005.137"},{"key":"246_CR7","unstructured":"Coenen F (2003) The LUCS-KDD discretised\/normalised ARM and CARM data library. http:\/\/www.csc.liv.ac.uk\/~frans\/KDD\/Software\/LUCS-KDD-DN\/DataSets\/dataSets.html . Accessed October 2010"},{"key":"246_CR8","volume-title":"Elements of information theory","author":"TM Cover","year":"2006","unstructured":"Cover TM, Thomas JA (2006) Elements of information theory, 2nd edn. Wiley, New York","edition":"2"},{"key":"246_CR9","unstructured":"Das G, Mannila H, Ronkainen P (1997) Similarity of attributes by external probes. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201997), pp 23\u201329"},{"issue":"3","key":"246_CR10","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1007\/s10618-010-0209-3","volume":"23","author":"T De Bie","year":"2011","unstructured":"De Bie T (2011) Maximum entropy models and subjective interestingness: an application to tiles in binary databases. Data Min Knowl Discov 23(3): 407\u2013446","journal-title":"Data Min Knowl Discov"},{"key":"246_CR11","first-page":"1265","volume":"3","author":"I Dhillon","year":"2003","unstructured":"Dhillon I, Mallela S, Kumar R (2003) A divisive information theoretic feature clustering algorithm for text classification. J Mach Learn Res 3: 1265\u20131287","journal-title":"J Mach Learn Res"},{"key":"246_CR12","unstructured":"Frank A, Asuncion A (2010) UCI machine learning repository. http:\/\/archive.ics.uci.edu\/ml . Accessed March 2011"},{"issue":"1","key":"246_CR13","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1007\/s10115-010-0319-7","volume":"28","author":"GC Garriga","year":"2011","unstructured":"Garriga GC, Junttila E, Mannila H (2011) Banded structure in binary matrices. Knowl Inf Syst (KAIS) 28(1): 197\u2013226","journal-title":"Knowl Inf Syst (KAIS)"},{"issue":"3","key":"246_CR14","first-page":"1556","volume":"1","author":"A Gionis","year":"2007","unstructured":"Gionis A, Mannila H, Mielik\u00e4inen T, Tsaparas P (2007) Assessing data mining results via swap randomization. Trans Knowl Discov Data 1(3): 1556\u20134681","journal-title":"Trans Knowl Discov Data"},{"key":"246_CR15","unstructured":"Goethals B, Zaki MJ (2003) Frequent itemset mining implementations repository (FIMI). http:\/\/fimi.ua.ac.be . Accessed October 2010"},{"key":"246_CR16","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4643.001.0001","volume-title":"The minimum description length principle","author":"PD Gr\u00fcnwald","year":"2007","unstructured":"Gr\u00fcnwald PD (2007) The minimum description length principle. MIT Press, Cambridge"},{"issue":"1","key":"246_CR17","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s10618-006-0059-1","volume":"15","author":"J Han","year":"2007","unstructured":"Han J, Cheng H, Xin D, Yan X (2007) Frequent pattern mining: current status and future directions. Data Min Knowl Discov 15(1): 55\u201386","journal-title":"Data Min Knowl Discov"},{"key":"246_CR18","doi-asserted-by":"crossref","unstructured":"Hanhij\u00e4rvi S, Ojala M, Vuokko N, Puolam\u00e4ki K, Tatti N, Mannila H (2009) Tell me something I don\u2019t know: randomization strategies for iterative data mining. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201909). ACM, New York, pp 379\u2013388","DOI":"10.1145\/1557019.1557065"},{"key":"246_CR19","doi-asserted-by":"crossref","unstructured":"Heikinheimo H, Hinkkanen E, Mannila H, Mielik\u00e4inen T, Sepp\u00e4nen JK (2007) Finding low-entropy sets and trees from binary data. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201907). ACM, New York, pp 350\u2013359","DOI":"10.1145\/1281192.1281232"},{"key":"246_CR20","doi-asserted-by":"crossref","unstructured":"Heikinheimo H, Vreeken J, Siebes A, Mannila H (2009) Low-entropy set selection. In: Proceedings of the SIAM international conference on data mining (SDM\u201909). SIAM, New York, pp 569\u2013579","DOI":"10.1137\/1.9781611972795.49"},{"issue":"5","key":"246_CR21","doi-asserted-by":"crossref","first-page":"975","DOI":"10.1007\/BF01009452","volume":"34","author":"S Kirkpatrick","year":"1984","unstructured":"Kirkpatrick S (1984) Optimization by simulated annealing: quantitative studies. Stat Phys 34(5): 975\u2013986","journal-title":"Stat Phys"},{"key":"246_CR22","unstructured":"Knobbe AJ, Ho EKY (2006) Maximally informative k-itemsets and their efficient discovery. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201906). ACM, New York, pp 237\u2013244"},{"key":"246_CR23","unstructured":"Kontonasios KN, De Bie T (2010) An information-theoretic approach to finding noisy tiles in binary databases. In: Proceedings of the SIAM international conference on data mining (SDM\u201910). SIAM, New York, pp 153\u2013164"},{"key":"246_CR24","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-3860-5","volume-title":"An introduction to Kolmogorov complexity and its applications","author":"M Li","year":"1993","unstructured":"Li M, Vit\u00e1nyi P (1993) An introduction to Kolmogorov complexity and its applications. Springer, New York"},{"key":"246_CR25","doi-asserted-by":"crossref","unstructured":"Mampaey M, Vreeken J (2010) Summarising data by clustering items. In: Proceedings of the European conference on machine learning and principles and practice of knowledge discovery in databases (ECML PKDD\u201910). Springer, New York, pp 321\u2013336","DOI":"10.1007\/978-3-642-15883-4_21"},{"key":"246_CR26","doi-asserted-by":"crossref","unstructured":"Mampaey M, Tatti N, Vreeken J (2011) Tell me what I need to know: succinctly summarizing data with itemsets. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201911). ACM, New York, pp 573\u2013581","DOI":"10.1145\/2020408.2020499"},{"key":"246_CR27","volume-title":"The atlas of European mammals","author":"A Mitchell-Jones","year":"1999","unstructured":"Mitchell-Jones A, Amori G, Bogdanowicz W, Krystufek B, Reijnders PH, Spitzenberger F, Stubbe M, Thissen J, Vohralik V, Zima J (1999) The atlas of European mammals. Academic Press, London"},{"issue":"55","key":"246_CR28","doi-asserted-by":"crossref","first-page":"7324","DOI":"10.1038\/sj.onc.1209717","volume":"25","author":"S Myllykangas","year":"2006","unstructured":"Myllykangas S, Himberg J, B\u00f6hling T, Nagy B, Hollm\u00e9n J, Knuutila S (2006) DNA copy number amplification profiling of human neoplasms. Oncogene 25(55): 7324\u20137332","journal-title":"Oncogene"},{"key":"246_CR29","doi-asserted-by":"crossref","unstructured":"Pasquier N, Bastide Y, Taouil R, Lakhal L (1999) Discovering frequent closed itemsets for association rules. In: Proceedings of the ICDT international conference on database theory, pp 398\u2013416","DOI":"10.1007\/3-540-49257-7_25"},{"key":"246_CR30","doi-asserted-by":"crossref","unstructured":"Pensa R, Robardet C, Boulicaut JF (2005) A bi-clustering framework for categorical data. In: Proceedings of the European conference on principles and practice of knowledge discovery in databases (PKDD\u201905). Springer, New York, pp 643\u2013650","DOI":"10.1007\/11564126_68"},{"issue":"1","key":"246_CR31","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1016\/0005-1098(78)90005-5","volume":"14","author":"J Rissanen","year":"1978","unstructured":"Rissanen J (1978) Modeling by shortest data description. Automatica 14(1): 465\u2013471","journal-title":"Automatica"},{"key":"246_CR32","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-68812-1","volume-title":"Information and complexity in statistical modeling","author":"J Rissanen","year":"2007","unstructured":"Rissanen J (2007) Information and complexity in statistical modeling. Springer, New York"},{"key":"246_CR33","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1002\/j.1538-7305.1948.tb01338.x","volume":"27","author":"CE Shannon","year":"1948","unstructured":"Shannon CE (1948) A mathematical theory of communication. Bell Syst Tech J 27: 379\u2013423","journal-title":"Bell Syst Tech J"},{"key":"246_CR34","doi-asserted-by":"crossref","unstructured":"Siebes A, Vreeken J, van Leeuwen M (2006) Item sets that compress. In: Proceedings of the SIAM international conference on data mining (SDM\u201906). SIAM, New York, pp 393\u2013404","DOI":"10.1137\/1.9781611972764.35"},{"issue":"2","key":"246_CR35","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1016\/j.jbi.2010.12.001","volume":"44","author":"T Vanden Bulcke","year":"2011","unstructured":"Vanden Bulcke T, Vanden Broucke P, Van Hoof V, Wouters K, Vanden Broucke S, Smits G, Smits E, Proesmans S, Van Genechten T, Eyskens F (2011) Data mining methods for classification of Medium-Chain Acyl-CoA dehydrogenase deficiency (MCADD) using non-derivatized tandem MS neonatal screening data. J Biomed Inform 44(2): 319\u2013325","journal-title":"J Biomed Inform"},{"issue":"12","key":"246_CR36","doi-asserted-by":"crossref","first-page":"3265","DOI":"10.1109\/TIT.2004.838346","volume":"50","author":"N Vereshchagin","year":"2004","unstructured":"Vereshchagin N, Vitanyi P (2004) Kolmogorov\u2019s structure functions and model selection. IEEE Trans Inf Theory 50(12): 3265\u20133290","journal-title":"IEEE Trans Inf Theory"},{"key":"246_CR37","doi-asserted-by":"crossref","unstructured":"Vreeken J, van Leeuwen M, Siebes A (2007) Preserving privacy through data generation. In: Proceedings of the IEEE international conference on data mining (ICDM\u201907), IEEE, pp 685\u2013690","DOI":"10.1109\/ICDM.2007.25"},{"issue":"1","key":"246_CR38","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1007\/s10618-010-0202-x","volume":"23","author":"J Vreeken","year":"2011","unstructured":"Vreeken J, van Leeuwen M, Siebes A (2011) Krimp: mining itemsets that compress. Data Min Knowl Discov 23(1): 169\u2013214","journal-title":"Data Min Knowl Discov"},{"key":"246_CR39","volume-title":"Statistical and inductive inference by minimum message length","author":"C Wallace","year":"2005","unstructured":"Wallace C (2005) Statistical and inductive inference by minimum message length. Springer, New York"},{"key":"246_CR40","doi-asserted-by":"crossref","unstructured":"Wang J, Karypis G (2004) SUMMARY: efficiently summarizing transactions for clustering. In: Proceedings of the IEEE international conference on data mining (ICDM\u201904), IEEE, pp 241\u2013248","DOI":"10.1109\/ICDM.2004.10105"},{"key":"246_CR41","doi-asserted-by":"crossref","unstructured":"Wang C, Parthasarathy S (2006) Summarizing itemset patterns using probabilistic models. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201906). ACM, New York, pp 730\u2013735","DOI":"10.1145\/1150402.1150495"},{"key":"246_CR42","doi-asserted-by":"crossref","unstructured":"Yan X, Cheng H, Han J, Xin D (2005) Summarizing itemset patterns: a profile-based approach. In: Proceedings of the ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201905). ACM, New York, pp 314\u2013323","DOI":"10.1145\/1081870.1081907"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-011-0246-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-011-0246-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-011-0246-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,19]],"date-time":"2019-06-19T21:59:48Z","timestamp":1560981588000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-011-0246-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,11,26]]},"references-count":42,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2013,1]]}},"alternative-id":["246"],"URL":"https:\/\/doi.org\/10.1007\/s10618-011-0246-6","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,11,26]]}}}