{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T13:55:29Z","timestamp":1767707729725,"version":"3.40.4"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T00:00:00Z","timestamp":1736121600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T00:00:00Z","timestamp":1736121600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62472064"],"award-info":[{"award-number":["62472064"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s10115-024-02317-x","type":"journal-article","created":{"date-parts":[[2025,1,6]],"date-time":"2025-01-06T13:37:30Z","timestamp":1736170650000},"page":"4113-4138","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Clusterability test for categorical data"],"prefix":"10.1007","volume":"67","author":[{"given":"Lianyu","family":"Hu","sequence":"first","affiliation":[]},{"given":"Junjie","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Mudi","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Yan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Zengyou","family":"He","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,6]]},"reference":[{"key":"2317_CR1","unstructured":"Kleinberg J (2002) An impossibility theorem for clustering. Adv Neural Inf Process Syst 15"},{"key":"2317_CR2","unstructured":"Pelillo M (2009) What is a cluster? perspectives from game theory. In: Proceedings of the NIPS Workshop on Clustering Theory"},{"key":"2317_CR3","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/j.patrec.2015.04.009","volume":"64","author":"C Hennig","year":"2015","unstructured":"Hennig C (2015) What are the true clusters? Pattern Recogn Lett 64:53\u201362","journal-title":"Pattern Recogn Lett"},{"issue":"59","key":"2317_CR4","first-page":"1949","volume":"16","author":"P Thomann","year":"2015","unstructured":"Thomann P, Steinwart I, Schmid N (2015) Towards an axiomatic approach to hierarchical clustering of measures. J Mach Learn Res 16(59):1949\u20132002","journal-title":"J Mach Learn Res"},{"key":"2317_CR5","unstructured":"Ben-David S, Ackerman M (2008) Measures of clustering quality: a working set of axioms for clustering. Adv Neural Inf Process Syst 21"},{"issue":"4","key":"2317_CR6","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1109\/TEVC.2022.3220187","volume":"27","author":"M Garza-Fabre","year":"2023","unstructured":"Garza-Fabre M, Handl J, Jos\u00e9-Garc\u00eda A (2023) Evolutionary multiobjective clustering over multiple conflicting data views. IEEE Trans Evol Comput 27(4):817\u2013831","journal-title":"IEEE Trans Evol Comput"},{"key":"2317_CR7","unstructured":"Von\u00a0Luxburg U, Williamson RC, Guyon I (2012) Clustering: science or art? In: Proceedings of ICML Workshop on Unsupervised and Transfer Learning, 65\u201379"},{"issue":"1","key":"2317_CR8","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1016\/j.patcog.2012.07.021","volume":"46","author":"O Arbelaitz","year":"2013","unstructured":"Arbelaitz O, Gurrutxaga I, Muguerza J, P\u00e9rez JM, Perona I (2013) An extensive comparative study of cluster validity indices. Pattern Recogn 46(1):243\u2013256","journal-title":"Pattern Recogn"},{"issue":"4","key":"2317_CR9","doi-asserted-by":"publisher","first-page":"1489","DOI":"10.1109\/TKDE.2023.3306024","volume":"36","author":"AF Diallo","year":"2024","unstructured":"Diallo AF, Patras P (2024) Deciphering clusters with a deterministic measure of clustering tendency. IEEE Trans Knowl Data Eng 36(4):1489\u20131501","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"2317_CR10","first-page":"1","volume":"5","author":"M Ackerman","year":"2009","unstructured":"Ackerman M, Ben-David S (2009) Clusterability: a theoretical study. Int Conf Artif Intell Stat 5:1\u20138","journal-title":"Int Conf Artif Intell Stat"},{"key":"2317_CR11","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.patcog.2018.10.026","volume":"88","author":"A Adolfsson","year":"2019","unstructured":"Adolfsson A, Ackerman M, Brownstein NC (2019) To cluster, or not to cluster: an analysis of clusterability methods. Pattern Recogn 88:13\u201326","journal-title":"Pattern Recogn"},{"key":"2317_CR12","unstructured":"Ahmadi S, Awasthi P, Khuller S, Kleindessner M, Morgenstern J, Sukprasert P, Vakilian A (2022) Individual preference stability for clustering. In: International Conference on Machine Learning, 197\u2013246"},{"issue":"1","key":"2317_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12859-023-05210-6","volume":"24","author":"J Laborde","year":"2023","unstructured":"Laborde J, Stewart PA, Chen Z, Chen YA, Brownstein NC (2023) Sparse clusterability: testing for cluster structure in high dimensions. BMC Bioinformatics 24(1):1\u201327","journal-title":"BMC Bioinformatics"},{"issue":"3","key":"2317_CR14","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1111\/1467-9868.00141","volume":"60","author":"M-Y Cheng","year":"1998","unstructured":"Cheng M-Y, Hall P (1998) Calibrating the excess mass and dip tests of modality. J R Stat Soc: Ser B (Statistical Methodology) 60(3):579\u2013589","journal-title":"J R Stat Soc: Ser B (Statistical Methodology)"},{"issue":"1","key":"2317_CR15","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1111\/j.2517-6161.1981.tb01155.x","volume":"43","author":"BW Silverman","year":"1981","unstructured":"Silverman BW (1981) Using kernel density estimates to investigate multimodality. J Roy Stat Soc: Ser B (Methodol) 43(1):97\u201399","journal-title":"J Roy Stat Soc: Ser B (Methodol)"},{"issue":"5","key":"2317_CR16","doi-asserted-by":"publisher","first-page":"813","DOI":"10.1109\/TKDE.2011.33","volume":"24","author":"TC Havens","year":"2011","unstructured":"Havens TC, Bezdek JC (2011) An efficient formulation of the improved visual assessment of cluster tendency (IVAT) algorithm. IEEE Trans Knowl Data Eng 24(5):813\u2013822","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"2","key":"2317_CR17","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1198\/jcgs.2010.09139","volume":"20","author":"M Hahsler","year":"2011","unstructured":"Hahsler M, Hornik K (2011) Dissimilarity plots: a visual exploration tool for partitional clustering. J Comput Graph Stat 20(2):335\u2013354","journal-title":"J Comput Graph Stat"},{"key":"2317_CR18","volume-title":"Categorical Data Analysis","author":"A Agresti","year":"2012","unstructured":"Agresti A (2012) Categorical Data Analysis, vol 792. John Wiley & Sons, Hoboken"},{"key":"2317_CR19","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1146\/annurev-soc-060116-053613","volume":"43","author":"MP Couper","year":"2017","unstructured":"Couper MP (2017) New developments in survey data collection. Ann Rev Sociol 43:121\u2013145","journal-title":"Ann Rev Sociol"},{"issue":"1","key":"2317_CR20","doi-asserted-by":"publisher","first-page":"1684","DOI":"10.1038\/s41467-023-37432-w","volume":"14","author":"SV Vasaikar","year":"2023","unstructured":"Vasaikar SV, Savage AK, Gong Q, Swanson E, Talla A, Lord C, Heubeck AT, Reading J, Graybuck LT, Meijer P (2023) A comprehensive platform for analyzing longitudinal multi-omics data. Nat Commun 14(1):1684","journal-title":"Nat Commun"},{"key":"2317_CR21","doi-asserted-by":"crossref","unstructured":"Boriah S, Chandola V, Kumar V (2008) Similarity measures for categorical data: a comparative evaluation. In: SIAM International Conference on Data Mining, 243\u2013254","DOI":"10.1137\/1.9781611972788.22"},{"issue":"01","key":"2317_CR22","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1142\/S0219622019300064","volume":"19","author":"S Naouali","year":"2020","unstructured":"Naouali S, Ben Salem S, Chtourou Z (2020) Clustering categorical data: a survey. Int J Inf Technol Decis Mak 19(01):49\u201396","journal-title":"Int J Inf Technol Decis Mak"},{"issue":"7","key":"2317_CR23","first-page":"9149","volume":"45","author":"H Liu","year":"2023","unstructured":"Liu H, Chen J, Dy J, Fu Y (2023) Transforming complex problems into k-means solutions. IEEE Trans Pattern Anal Mach Intell 45(7):9149\u20139168","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2317_CR24","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10618-013-0336-8","volume":"29","author":"M Bouguessa","year":"2015","unstructured":"Bouguessa M (2015) Clustering categorical data in projected spaces. Data Min Knowl Disc 29:3\u201338","journal-title":"Data Min Knowl Disc"},{"issue":"12","key":"2317_CR25","doi-asserted-by":"publisher","first-page":"1607","DOI":"10.1109\/TKDE.2007.190649","volume":"19","author":"E Cesario","year":"2007","unstructured":"Cesario E, Manco G, Ortale R (2007) Top-down parameter-free clustering of high-dimensional categorical data. IEEE Trans Knowl Data Eng 19(12):1607\u20131624","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"7","key":"2317_CR26","first-page":"3560","volume":"44","author":"Y Zhang","year":"2022","unstructured":"Zhang Y, Cheung Y-M (2022) Learnable weighting of intra-attribute distances for categorical data clustering with nominal and ordinal attributes. IEEE Trans Pattern Anal Mach Intell 44(7):3560\u20133576","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2317_CR27","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1016\/j.patcog.2019.01.042","volume":"90","author":"Y Xiao","year":"2019","unstructured":"Xiao Y, Huang C, Huang J, Kaku I, Xu Y (2019) Optimal mathematical programming and variable neighborhood search for k-modes categorical data clustering. Pattern Recogn 90:183\u2013195","journal-title":"Pattern Recogn"},{"issue":"5","key":"2317_CR28","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1109\/TKDE.2018.2848902","volume":"31","author":"S Jian","year":"2019","unstructured":"Jian S, Pang G, Cao L, Lu K, Gao H (2019) Cure: flexible categorical data representation by hierarchical coupling learning. IEEE Trans Knowl Data Eng 31(5):853\u2013866","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"2317_CR29","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108694","volume":"128","author":"L Bai","year":"2022","unstructured":"Bai L, Liang J (2022) A categorical data clustering framework on graph representation. Pattern Recogn 128:108694","journal-title":"Pattern Recogn"},{"key":"2317_CR30","doi-asserted-by":"crossref","unstructured":"Barbar\u00e1 D, Li Y, Couto J (2002) Coolcat: an entropy-based algorithm for categorical clustering. In: Proceedings of the Eleventh International Conference on Information and Knowledge Management, pp. 582\u2013589","DOI":"10.1145\/584792.584888"},{"key":"2317_CR31","doi-asserted-by":"crossref","unstructured":"Li T, Ma S, Ogihara M (2004) Entropy-based criterion in categorical clustering. In: Proceedings of the Twenty-first International Conference on Machine Learning, p. 68","DOI":"10.1145\/1015330.1015404"},{"issue":"2","key":"2317_CR32","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/BF00114265","volume":"2","author":"DH Fisher","year":"1987","unstructured":"Fisher DH (1987) Knowledge acquisition via incremental conceptual clustering. Mach Learn 2(2):139\u2013172","journal-title":"Mach Learn"},{"issue":"2","key":"2317_CR33","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1023\/A:1010924920739","volume":"45","author":"B Mirkin","year":"2001","unstructured":"Mirkin B (2001) Reinterpreting the category utility function. Mach Learn 45(2):219\u2013228","journal-title":"Mach Learn"},{"key":"2317_CR34","unstructured":"Epter S, Krishnamoorthy M, Zaki M (1999) Clusterability detection and initial seed selection in large datasets. In: The International Conference on Knowledge Discovery in Databases, vol. 7"},{"issue":"483","key":"2317_CR35","doi-asserted-by":"publisher","first-page":"1281","DOI":"10.1198\/016214508000000454","volume":"103","author":"Y Liu","year":"2008","unstructured":"Liu Y, Hayes DN, Nobel A, Marron JS (2008) Statistical significance of clustering for high-dimension, low-sample size data. J Am Stat Assoc 103(483):1281\u20131293","journal-title":"J Am Stat Assoc"},{"key":"2317_CR36","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/BF01890074","volume":"4","author":"RC Dubes","year":"1987","unstructured":"Dubes RC, Zeng G (1987) A test for spatial homogeneity in cluster analysis. J Classif 4:33\u201356","journal-title":"J Classif"},{"issue":"3","key":"2317_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v031.i03","volume":"31","author":"J Leeuw","year":"2009","unstructured":"Leeuw J, Mair P (2009) Multidimensional scaling using majorization: Smacof in r. J Stat Softw 31(3):1\u201330","journal-title":"J Stat Softw"},{"key":"2317_CR38","unstructured":"Maaten L, Hinton G (2008) Visualizing data using T-SNE. J Mach Learn Res 9(11)"},{"issue":"1","key":"2317_CR39","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1038\/nbt.4314","volume":"37","author":"E Becht","year":"2019","unstructured":"Becht E, McInnes L, Healy J, Dutertre C-A, Kwok IW, Ng LG, Ginhoux F, Newell EW (2019) Dimensionality reduction for visualizing single-cell data using UMAP. Nat Biotechnol 37(1):38\u201344","journal-title":"Nat Biotechnol"},{"issue":"3","key":"2317_CR40","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1016\/S0022-5371(77)80054-6","volume":"16","author":"B Hayes-Roth","year":"1977","unstructured":"Hayes-Roth B, Hayes-Roth F (1977) Concept learning and the recognition and classification of exemplars. J Verbal Learn Verbal Behav 16(3):321\u2013338","journal-title":"J Verbal Learn Verbal Behav"},{"key":"2317_CR41","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v101.i01","volume":"101","author":"O Cinar","year":"2022","unstructured":"Cinar O, Viechtbauer W (2022) The poolr package for combining independent and dependent p values. J Stat Softw 101:1\u201342","journal-title":"J Stat Softw"},{"key":"2317_CR42","unstructured":"Dua D, Graff C (2019) UCI Machine Learning Repository"},{"issue":"5","key":"2317_CR43","doi-asserted-by":"publisher","first-page":"2161","DOI":"10.1007\/s00180-022-01209-4","volume":"37","author":"Z Sulc","year":"2022","unstructured":"Sulc Z, Cibulkova J, Rezankova H (2022) Nomclust 2.0: an R package for hierarchical clustering of objects characterized by nominal variables. Comput Statistics 37(5):2161\u20132184","journal-title":"Comput Statistics"},{"key":"2317_CR44","unstructured":"Ferrari A (2019) A note on sum and difference of correlated chi-squared variables. arXiv preprint arXiv:1906.09982"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-024-02317-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10115-024-02317-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-024-02317-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,12]],"date-time":"2025-04-12T03:40:49Z","timestamp":1744429249000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10115-024-02317-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,6]]},"references-count":44,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["2317"],"URL":"https:\/\/doi.org\/10.1007\/s10115-024-02317-x","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"type":"print","value":"0219-1377"},{"type":"electronic","value":"0219-3116"}],"subject":[],"published":{"date-parts":[[2025,1,6]]},"assertion":[{"value":"2 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}