{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T13:05:45Z","timestamp":1778591145665,"version":"3.51.4"},"reference-count":49,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T00:00:00Z","timestamp":1774483200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113580","type":"journal-article","created":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T07:26:26Z","timestamp":1774682786000},"page":"113580","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"PB","title":["A deterministic information bottleneck method for clustering mixed-type data"],"prefix":"10.1016","volume":"179","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1267-1165","authenticated-orcid":false,"given":"Efthymios","family":"Costa","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ioanna","family":"Papatsouma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Angelos","family":"Markos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113580_bib0001","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1385\/MB:31:1:055","article-title":"Data clustering in life sciences","volume":"31","author":"Zhao","year":"2005","journal-title":"Mol. Biotechnol."},{"issue":"1","key":"10.1016\/j.patcog.2026.113580_bib0002","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1093\/pan\/mpr039","article-title":"Model-based clustering and typologies in the social sciences","volume":"20","author":"Ahlquist","year":"2012","journal-title":"Polit. Anal."},{"issue":"2","key":"10.1016\/j.patcog.2026.113580_bib0003","doi-asserted-by":"crossref","first-page":"134","DOI":"10.1177\/002224378302000204","article-title":"Cluster analysis in marketing research: review and suggestions for application","volume":"20","author":"Punj","year":"1983","journal-title":"J. Mark. Res."},{"key":"10.1016\/j.patcog.2026.113580_bib0004","series-title":"Finite Mixture Models","first-page":"135","article-title":"Mixtures with nonnormal components","author":"McLachlan","year":"2000"},{"key":"10.1016\/j.patcog.2026.113580_bib0005","series-title":"Proceedings of the 1St Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)","first-page":"21","article-title":"Clustering large data sets with mixed numeric and categorical values","author":"Huang","year":"1997"},{"key":"10.1016\/j.patcog.2026.113580_bib0006","doi-asserted-by":"crossref","first-page":"590","DOI":"10.1016\/j.neucom.2013.04.011","article-title":"An improved k-prototypes clustering algorithm for mixed numeric and categorical data","volume":"120","author":"Ji","year":"2013","journal-title":"Neurocomputing"},{"key":"10.1016\/j.patcog.2026.113580_bib0007","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109815","article-title":"Mixed data clustering based on a number of similar features","volume":"143","author":"Rezaei","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113580_bib0008","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110674","article-title":"EDMD: An entropy based dissimilarity measure to cluster mixed-categorical data","author":"Kar","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113580_bib0009","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109353","article-title":"A generalized multi-aspect distance metric for mixed-type data clustering","volume":"138","author":"Mousavi","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113580_bib0010","series-title":"Multiple Factor Analysis by Example Using R","author":"Pag\u00e8s","year":"2014"},{"key":"10.1016\/j.patcog.2026.113580_bib0011","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1007\/s10994-016-5575-7","article-title":"A semiparametric method for clustering mixed data","volume":"105","author":"Foss","year":"2016","journal-title":"Mach. Learn."},{"issue":"1","key":"10.1016\/j.patcog.2026.113580_bib0012","doi-asserted-by":"crossref","first-page":"4202","DOI":"10.1038\/s41598-021-83340-8","article-title":"Head-to-head comparison of clustering methods for heterogeneous data: a simulation-driven benchmark","volume":"11","author":"Preud\u2019Homme","year":"2021","journal-title":"Sci. Rep."},{"issue":"3","key":"10.1016\/j.patcog.2026.113580_bib0013","doi-asserted-by":"crossref","first-page":"701","DOI":"10.1007\/s11634-022-00521-7","article-title":"Benchmarking distance-based partitioning methods for mixed-type data","volume":"17","author":"Costa","year":"2023","journal-title":"Adv. Data Anal. Classif."},{"key":"10.1016\/j.patcog.2026.113580_bib0014","doi-asserted-by":"crossref","first-page":"31883","DOI":"10.1109\/ACCESS.2019.2903568","article-title":"Survey of state-of-the-art mixed data clustering algorithms","volume":"7","author":"Ahmad","year":"2019","journal-title":"IEEE Access"},{"key":"10.1016\/j.patcog.2026.113580_bib0015","series-title":"International Conference on Machine Learning","first-page":"478","article-title":"Unsupervised deep embedding for clustering analysis","author":"Xie","year":"2016"},{"key":"10.1016\/j.patcog.2026.113580_bib0016","doi-asserted-by":"crossref","first-page":"39501","DOI":"10.1109\/ACCESS.2018.2855437","article-title":"A survey of clustering with deep learning: from the perspective of network architecture","volume":"6","author":"Min","year":"2018","journal-title":"IEEE Access"},{"issue":"51","key":"10.1016\/j.patcog.2026.113580_bib0017","doi-asserted-by":"crossref","first-page":"18297","DOI":"10.1073\/pnas.0507432102","article-title":"Information-based clustering","volume":"102","author":"Slonim","year":"2005","journal-title":"Proceed. National Acad. Sci."},{"key":"10.1016\/j.patcog.2026.113580_bib0018","series-title":"Proceedings of the 37th Annual Allerton Conference on Communication, Control and Computing","first-page":"368","article-title":"The information bottleneck method","author":"Tishby","year":"1999"},{"issue":"6","key":"10.1016\/j.patcog.2026.113580_bib0019","doi-asserted-by":"crossref","first-page":"1611","DOI":"10.1162\/NECO_a_00961","article-title":"The Deterministic Information Bottleneck","volume":"29","author":"Strouse","year":"2017","journal-title":"Neural Comput."},{"key":"10.1016\/j.patcog.2026.113580_bib0020","article-title":"Agglomerative information bottleneck","volume":"12","author":"Slonim","year":"1999","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"3","key":"10.1016\/j.patcog.2026.113580_bib0021","doi-asserted-by":"crossref","first-page":"596","DOI":"10.1162\/neco_a_01136","article-title":"The information bottleneck and geometric clustering","volume":"31","author":"Strouse","year":"2019","journal-title":"Neural Comput."},{"issue":"2","key":"10.1016\/j.patcog.2026.113580_bib0022","doi-asserted-by":"crossref","first-page":"266","DOI":"10.1016\/S0047-259X(02)00025-8","article-title":"Nonparametric estimation of distributions with categorical and continuous data","volume":"86","author":"Li","year":"2003","journal-title":"J. Multivar. Anal."},{"key":"10.1016\/j.patcog.2026.113580_bib0023","series-title":"Density Estimation for Statistics and Data Analysis","article-title":"Density Estimation for Statistics and Data Analysis","author":"Silverman","year":"1998"},{"issue":"3","key":"10.1016\/j.patcog.2026.113580_bib0024","doi-asserted-by":"crossref","first-page":"413","DOI":"10.1093\/biomet\/63.3.413","article-title":"Multivariate binary discrimination by the kernel method","volume":"63","author":"Aitchison","year":"1976","journal-title":"Biometrika"},{"key":"10.1016\/j.patcog.2026.113580_bib0025","series-title":"An Introduction to the Advanced Theory and Practice of Nonparametric Econometrics: a Replicable Approach Using R","first-page":"49","article-title":"Continuous density and cumulative distribution functions","author":"Racine","year":"2019"},{"key":"10.1016\/j.patcog.2026.113580_bib0026","article-title":"Using the nystr\u00f6m method to speed up kernel machines","volume":"13","author":"Williams","year":"2000","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113580_bib0027","series-title":"Conference on Learning Theory","first-page":"185","article-title":"Sharp analysis of low-rank kernel matrix approximations","author":"Bach","year":"2013"},{"issue":"1","key":"10.1016\/j.patcog.2026.113580_bib0028","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1017\/S0020268100020102","article-title":"On the choice of bandwidth for kernel graduation","volume":"121","author":"Gavin","year":"1994","journal-title":"J. Inst. Actuar."},{"key":"10.1016\/j.patcog.2026.113580_bib0029","first-page":"1","article-title":"Mixed-type distance shrinkage and selection for clustering via kernel metric learning","author":"Ghashti","year":"2024","journal-title":"J. Classif."},{"issue":"3","key":"10.1016\/j.patcog.2026.113580_bib0030","doi-asserted-by":"crossref","first-page":"309","DOI":"10.1111\/j.1467-9876.2012.01066.x","article-title":"How to find an appropriate clustering for mixed-type variables with application to socio-economic stratification","volume":"62","author":"Hennig","year":"2013","journal-title":"J. Royal Statist. Society Series C: Appl. Statist."},{"key":"10.1016\/j.patcog.2026.113580_bib0031","series-title":"Finding Groups in Data: An Introduction to Cluster Analysis","first-page":"68","author":"Kaufman","year":"1990"},{"key":"10.1016\/j.patcog.2026.113580_bib0032","unstructured":"A. Markos, E. Costa, IBclust: Information Bottleneck Methods for Clustering Mixed-Type Data, 2025. R package version 1.2.1, https:\/\/cran.r-project.org\/web\/packages\/IBclust\/."},{"key":"10.1016\/j.patcog.2026.113580_bib0033","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v083.i13","article-title":"Kamila: clustering mixed-type data in r and hadoop","volume":"83","author":"Foss","year":"2018","journal-title":"J. Stat. Softw."},{"issue":"2","key":"10.1016\/j.patcog.2026.113580_bib0034","first-page":"200","article-title":"ClustMixType: user-friendly clustering of mixed-type data in r","volume":"10","author":"Szepannek","year":"2018","journal-title":"R Journal"},{"key":"10.1016\/j.patcog.2026.113580_bib0035","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v025.i01","article-title":"FactoMineR: an r package for multivariate analysis","volume":"25","author":"L\u00ea","year":"2008","journal-title":"J. Stat. Softw."},{"issue":"6","key":"10.1016\/j.patcog.2026.113580_bib0036","article-title":"A white paper on good research practices in benchmarking: the case of cluster analysis","volume":"13","author":"Van Mechelen","year":"2023","journal-title":"Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery"},{"key":"10.1016\/j.patcog.2026.113580_bib0037","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v051.i12","article-title":"MixSim: an R package for simulating data to study performance of clustering algorithms","volume":"51","author":"Melnykov","year":"2012","journal-title":"J. Stat. Softw."},{"issue":"2","key":"10.1016\/j.patcog.2026.113580_bib0038","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1198\/jcgs.2009.08054","article-title":"Simulating data to study performance of finite mixture modeling and clustering algorithms","volume":"19","author":"Maitra","year":"2010","journal-title":"J. Comput. Graph. Statist."},{"issue":"2","key":"10.1016\/j.patcog.2026.113580_bib0039","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1007\/BF01908075","article-title":"Comparing partitions","volume":"2","author":"Hubert","year":"1985","journal-title":"J. Classif."},{"issue":"95","key":"10.1016\/j.patcog.2026.113580_bib0040","first-page":"2837","article-title":"Information theoretic measures for clusterings comparison: variants, properties, normalization and correction for chance","volume":"11","author":"Vinh","year":"2010","journal-title":"J. Machine Learn. Res."},{"key":"10.1016\/j.patcog.2026.113580_bib0041","series-title":"Statistical Power Analysis for the Behavioral Sciences","first-page":"273","article-title":"F tests on means in the analysis of variance and covariance","author":"Cohen","year":"1977"},{"key":"10.1016\/j.patcog.2026.113580_bib0042","unstructured":"D. Dua, C. Graff, UCI Machine Learning Repository, 2019. http:\/\/archive.ics.uci.edu\/ml."},{"key":"10.1016\/j.patcog.2026.113580_bib0043","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1016\/j.patrec.2015.04.009","article-title":"What are the true clusters?","volume":"64","author":"Hennig","year":"2015","journal-title":"Pattern Recognit. Lett."},{"issue":"1","key":"10.1016\/j.patcog.2026.113580_bib0044","doi-asserted-by":"crossref","first-page":"9223","DOI":"10.1038\/s41598-025-90865-9","article-title":"Ground truth clustering is not the optimum clustering","volume":"15","author":"Bautista","year":"2025","journal-title":"Sci. Rep."},{"issue":"2","key":"10.1016\/j.patcog.2026.113580_bib0045","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1111\/1467-9868.00293","article-title":"Estimating the number of clusters in a data set via the gap statistic","volume":"63","author":"Tibshirani","year":"2001","journal-title":"J. Royal Statist. Society: Series B (Statist. Methodol.)"},{"issue":"1","key":"10.1016\/j.patcog.2026.113580_bib0046","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1016\/j.patcog.2012.07.021","article-title":"An extensive comparative study of cluster validity indices","volume":"46","author":"Arbelaitz","year":"2013","journal-title":"Pattern Recognit."},{"issue":"6\u20138","key":"10.1016\/j.patcog.2026.113580_bib0047","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1080\/10485250701733747","article-title":"Discrete triangular distributions and non-parametric estimation for probability mass function","volume":"19","author":"Kokonendji","year":"2007","journal-title":"J. Nonparametr. Stat."},{"key":"10.1016\/j.patcog.2026.113580_bib0048","article-title":"Discriminant adaptive nearest neighbor classification and regression","volume":"8","author":"Hastie","year":"1995","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"2","key":"10.1016\/j.patcog.2026.113580_bib0049","doi-asserted-by":"crossref","first-page":"553","DOI":"10.1214\/aos\/1031833664","article-title":"Trimmed k-Means: an attempt to robustify quantizers","volume":"25","author":"Cuesta-Albertos","year":"1997","journal-title":"Ann. Stat."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005467?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005467?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T13:02:24Z","timestamp":1776344544000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326005467"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":49,"alternative-id":["S0031320326005467"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113580","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A deterministic information bottleneck method for clustering mixed-type data","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113580","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"113580"}}