{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T19:56:25Z","timestamp":1760385385379},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2014,10,2]],"date-time":"2014-10-02T00:00:00Z","timestamp":1412208000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2015,11]]},"DOI":"10.1007\/s10618-014-0387-5","type":"journal-article","created":{"date-parts":[[2014,10,1]],"date-time":"2014-10-01T11:07:49Z","timestamp":1412161669000},"page":"1560-1597","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Cluster validity functions for categorical data: a solution-space perspective"],"prefix":"10.1007","volume":"29","author":[{"given":"Liang","family":"Bai","sequence":"first","affiliation":[]},{"given":"Jiye","family":"Liang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,10,2]]},"reference":[{"issue":"1","key":"387_CR1","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1109\/69.979972","volume":"14","author":"CC Aggarwal","year":"2002","unstructured":"Aggarwal CC, Magdalena C, Yu PS (2002) Finding localized associations in market basket data. IEEE Trans Knowl Data Eng 14(1):51\u201362","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"387_CR2","doi-asserted-by":"crossref","unstructured":"Andritsos P, Tsaparas P, Miller RJ, Sevcik KC (2004) Limbo: scalable clustering of categorical data. In: Proceedings of the ninth international conference on extending database technology","DOI":"10.1007\/978-3-540-24741-8_9"},{"issue":"12","key":"387_CR3","doi-asserted-by":"crossref","first-page":"2843","DOI":"10.1016\/j.patcog.2011.04.024","volume":"44","author":"L Bai","year":"2011","unstructured":"Bai L, Liang JY, Dang CY, Cao FY (2011) A novel attribute weighting algorithm for clustering high-dimensional categorical data. Pattern Recognit 44(12):2843\u20132861","journal-title":"Pattern Recognit"},{"issue":"6","key":"387_CR4","doi-asserted-by":"crossref","first-page":"1509","DOI":"10.1109\/TPAMI.2012.228","volume":"35","author":"L Bai","year":"2013","unstructured":"Bai L, Liang JY, Dang CY (2013) The impact of cluster representatives on the convergence of the $$k$$ k -modes type clustering. IEEE Trans Pattern Anal Mach Intell 35(6):1509\u20131522","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"387_CR5","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4615-0953-0","volume-title":"Applications of data mining in computer security","author":"D Barbara","year":"2002","unstructured":"Barbara D, Jajodia S (2002) Applications of data mining in computer security. Kluwer, Dordrecht"},{"key":"387_CR6","doi-asserted-by":"crossref","unstructured":"Barbara D, Li Y, Couto J (2002) Coolcat: an entropy-based algorithm for categorical clustering. In: Proceedings of the eleventh international conference on information and knowledge management, pp 582\u2013589","DOI":"10.1145\/584792.584888"},{"key":"387_CR7","doi-asserted-by":"crossref","DOI":"10.1002\/0471223921","volume-title":"Bioinformatics: a practical guide to the analysis of genes and proteins","author":"A Baxevanis","year":"2001","unstructured":"Baxevanis A, Ouellette F (2001) Bioinformatics: a practical guide to the analysis of genes and proteins, 2nd edn. Wiley, New York","edition":"2"},{"key":"387_CR8","volume-title":"Data mining techniques for marketing. Sales and customer support","author":"MJA Berry","year":"1996","unstructured":"Berry MJA, Linoff G (1996) Data mining techniques for marketing. Sales and customer support. John Wiley and Sons, New York"},{"issue":"11","key":"387_CR9","doi-asserted-by":"crossref","first-page":"1458","DOI":"10.1109\/TKDE.2008.81","volume":"20","author":"HL Chen","year":"2008","unstructured":"Chen HL, Chuang KT, Chen MS (2008) On data labeling for clustering categorical data. IEEE Trans Knowl Data Eng 20(11):1458\u20131472","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"387_CR10","unstructured":"Chen K, Liu L (2005) The \u201dbest k\u201d for entropy-based categorical clustering. In: Proceedings of international conference on scientific and statistical database management (SSDBM), pp 253C\u2013262C"},{"issue":"5","key":"387_CR11","doi-asserted-by":"crossref","first-page":"1241","DOI":"10.1007\/s00778-009-0134-5","volume":"18","author":"K Chen","year":"2009","unstructured":"Chen K, Liu L (2009) He-tree: a framework for detecting changes in clustering structure for categorical data streams. VLDB J 18(5):1241\u20131260","journal-title":"VLDB J"},{"key":"387_CR12","volume-title":"Pattern classification and scene analysis","author":"RO Duda","year":"1973","unstructured":"Duda RO, Hart PE (1973) Pattern classification and scene analysis. Wiley, New York"},{"key":"387_CR13","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1080\/01969727308546046","volume":"3","author":"JC Dunn","year":"1973","unstructured":"Dunn JC (1973) A fuzzy relative of the isodata process and its use in detecting compact well-separated clusters. J Cybern 3:32\u201357","journal-title":"J Cybern"},{"issue":"2","key":"387_CR14","first-page":"139","volume":"2","author":"DH Fisher","year":"1987","unstructured":"Fisher DH (1987) Knowledge acquisition via incremental conceptual clustering. Mach Learn 2(2):139\u2013172","journal-title":"Mach Learn"},{"key":"387_CR15","unstructured":"Gluck MA, Corter JE (1985) Information uncertainty and the utility. In: Proceedings of the seventh annual conference of cognitive science society, pp 283\u2013287"},{"issue":"6","key":"387_CR16","doi-asserted-by":"crossref","first-page":"567","DOI":"10.1016\/0031-3203(91)90022-W","volume":"24","author":"KC Gowda","year":"1991","unstructured":"Gowda KC, Diday E (1991) Symbolic clustering using a new dissimilarity measure. Pattern Recognit 24(6):567\u2013578","journal-title":"Pattern Recognit"},{"key":"387_CR17","doi-asserted-by":"crossref","unstructured":"Halkidi M, Vazirgiannis M (2001) Clustering validity assessment: finding the optimal partitioning of a data set. In: Proceedings of IEEE international conference on data mining (ICDM), pp 187\u2013194","DOI":"10.1109\/ICDM.2001.989517"},{"key":"387_CR18","doi-asserted-by":"crossref","unstructured":"Halkidi M, Batistakis Y, Vazirgiannis M (2001) On clustering validation techniques. J Intell Inf Syst 17(2\u20133): 107\u2013145","DOI":"10.1023\/A:1012801612483"},{"key":"387_CR19","doi-asserted-by":"crossref","unstructured":"He Z, Deng S, Xu X (2005) Improving $$k$$ k -modes algorithm considering frequencies of attribute values in mode. In: Proceedings of computational intelligence and security, pp 157\u2013162","DOI":"10.1007\/11596448_23"},{"key":"387_CR20","unstructured":"Huang ZX (1997) A fast clustering algorithm to cluster very large categorical data sets in data mining. In: Proceedings of SIGMOD workshop research issues on data mining and knowledge discovery, pp 1\u20138"},{"issue":"4","key":"387_CR21","doi-asserted-by":"crossref","first-page":"446","DOI":"10.1109\/91.784206","volume":"7","author":"ZX Huang","year":"1999","unstructured":"Huang ZX, Ng MK (1999) A fuzzy $$k$$ k -modes algorithm for clustering categorical data. IEEE Trans Fuzzy Syst 7(4):446\u2013452","journal-title":"IEEE Trans Fuzzy Syst"},{"issue":"5","key":"387_CR22","first-page":"657","volume":"27","author":"ZX Huang","year":"2005","unstructured":"Huang ZX, Ng MK, Rong H, Li Z (2005) Automated variable weighting in $$k$$ k -means type clustering. IEEE Trans Fuzzy Syst 27(5):657\u2013668","journal-title":"IEEE Trans Fuzzy Syst"},{"key":"387_CR23","volume-title":"Algorithms for clustering data","author":"AK Jain","year":"1988","unstructured":"Jain AK, Dubes RC (1988) Algorithms for clustering data. Prentice Hall, Englewood Cliffs"},{"key":"387_CR24","doi-asserted-by":"crossref","unstructured":"Li T, Ma S, Ogihara M (2004) Entropy-based criterion in categorical clustering. In: Proceedings of international conference on machine learning (ICML), pp 536\u2013543","DOI":"10.1145\/1015330.1015404"},{"issue":"4","key":"387_CR25","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1080\/0308107021000013635","volume":"31","author":"JY Liang","year":"2002","unstructured":"Liang JY, Chin KS, Dang CY, Yam RCM (2002) A new method for measuring uncertainty and fuzziness in rough set theory. Int J Gen Syst 31(4):331\u2013342","journal-title":"Int J Gen Syst"},{"key":"387_CR26","doi-asserted-by":"crossref","unstructured":"Liu Y, Li Z, Xiong H, Gao X, Wu J (2010) Understanding of internal clustering validation measures. In: The 10th IEEE international conference on data mining (ICDM), pp 911\u2013916","DOI":"10.1109\/ICDM.2010.35"},{"issue":"3","key":"387_CR27","first-page":"982","volume":"43","author":"Y Liu","year":"2013","unstructured":"Liu Y, Li Z, Xiong H, Gao X, Wu J, Wu S (2013) Understanding and enhancement of internal clustering validation measure. IEEE Trans Syst Man Cybern B Cybern (TSMCB) 43(3):982\u2013994","journal-title":"IEEE Trans Syst Man Cybern B Cybern (TSMCB)"},{"issue":"9","key":"387_CR28","first-page":"1949","volume":"21","author":"P Luo","year":"2009","unstructured":"Luo P, Xiong H, Zhan GX, Wu JJ, Shi ZZ (2009) Information-theoretic distance measures for clustering validation: generalization and normalization. IEEE Trans Knowl Data Eng 21(9):1949\u20131962","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"387_CR29","unstructured":"MacQueen JB (1967) Some methods for classification and analysis of multivariate observations. In: Proceedings of the fifth Berkeley symposium on mathematical statistics and probability. University of California Press, Berkeley, pp 281\u2013297"},{"issue":"3","key":"387_CR30","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1023\/A:1010924920739","volume":"45","author":"B Mirkin","year":"2001","unstructured":"Mirkin B (2001) Reinterpreting the category utility function. Mach Learn 45(3):219\u2013228","journal-title":"Mach Learn"},{"issue":"3","key":"387_CR31","doi-asserted-by":"crossref","first-page":"503","DOI":"10.1109\/TPAMI.2007.53","volume":"29","author":"MK Ng","year":"2007","unstructured":"Ng MK, Li MJ, Huang ZX, He ZY (2007) On the impact of dissimilarity measure in $$k$$ k -modes clustering algorithm. IEEE Trans Pattern Anal Mach Intell 29(3):503\u2013507","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"387_CR32","first-page":"241","volume":"14","author":"O San","year":"2004","unstructured":"San O, Huynh V, Nakamori Y (2004) An alternative extension of the $$k$$ k -means algorithm for clustering categorical data. Pattern Recognit 14(2):241\u2013247","journal-title":"Pattern Recognit"},{"key":"387_CR33","unstructured":"Steinbach M, Karypis G, Kumar V (2000) A comparison of document clustering techniques. In: Proceedings of workshop text mining, 6th ACMSIGKDD international conference on knowledge discovery and data mining, pp 20\u201323"},{"key":"387_CR34","unstructured":"UCI (2012) UCI machine learning repository. http:\/\/www.ics.uci.edu\/mlearn\/MLRepository.html"},{"key":"387_CR35","volume-title":"Categorical data analysis for geographers and environmental scientists","author":"N Wrigley","year":"1985","unstructured":"Wrigley N (1985) Categorical data analysis for geographers and environmental scientists. Longman, London"},{"issue":"22","key":"387_CR36","doi-asserted-by":"crossref","first-page":"4353","DOI":"10.1016\/j.ins.2010.07.028","volume":"180","author":"J Wu","year":"2010","unstructured":"Wu J, Yuan H, Chen G (2010) Validation of overlapping clustering: a random clustering perspective. Inf Sci 180(22):4353\u20134369","journal-title":"Inf Sci"},{"issue":"2","key":"387_CR37","doi-asserted-by":"crossref","first-page":"318","DOI":"10.1109\/TSMCB.2008.2004559","volume":"39","author":"H Xiong","year":"2009","unstructured":"Xiong H, Wu J, Chen J (2009) K-means clustering versus validation measures: a data distribution perspective. IEEE Trans Syst Man Cybern B Cybern 39(2):318\u2013331","journal-title":"IEEE Trans Syst Man Cybern B Cybern"},{"issue":"1\u20132","key":"387_CR38","first-page":"67","volume":"1","author":"YM Yang","year":"2004","unstructured":"Yang YM (2004) An evaluation of statistical approaches to text categorization. J Inf Retr 1(1\u20132):67\u201388","journal-title":"J Inf Retr"},{"issue":"8","key":"387_CR39","doi-asserted-by":"crossref","first-page":"1197","DOI":"10.1109\/TPAMI.2005.160","volume":"27","author":"J Yu","year":"2005","unstructured":"Yu J (2005) General c-means clustering model. IEEE Trans Pattern Anal Mach Intell 27(8):1197\u20131211","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"387_CR40","doi-asserted-by":"crossref","first-page":"311","DOI":"10.1023\/B:MACH.0000027785.44527.d6","volume":"55","author":"Y Zhao","year":"2004","unstructured":"Zhao Y, Karypis G (2004) Criterion functions for document clustering: experiments and analysis. Mach Learn 55(3):311\u2013331","journal-title":"Mach Learn"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-014-0387-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-014-0387-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-014-0387-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,30]],"date-time":"2019-05-30T19:29:45Z","timestamp":1559244585000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-014-0387-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,10,2]]},"references-count":40,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2015,11]]}},"alternative-id":["387"],"URL":"https:\/\/doi.org\/10.1007\/s10618-014-0387-5","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,10,2]]}}}