{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T15:46:46Z","timestamp":1773676006092,"version":"3.50.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2013,8,1]],"date-time":"2013-08-01T00:00:00Z","timestamp":1375315200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2015,1]]},"DOI":"10.1007\/s10618-013-0336-8","type":"journal-article","created":{"date-parts":[[2013,7,31]],"date-time":"2013-07-31T13:14:48Z","timestamp":1375276488000},"page":"3-38","source":"Crossref","is-referenced-by-count":17,"title":["Clustering categorical data in projected spaces"],"prefix":"10.1007","volume":"29","author":[{"given":"Mohamed","family":"Bouguessa","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,8,1]]},"reference":[{"issue":"2","key":"336_CR1","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1109\/69.991713","volume":"14","author":"CC Aggarwal","year":"2002","unstructured":"Aggarwal CC, Yu PS (2002) Redefining clustering for high dimensional applications. IEEE Trans Knowl Data Eng 14(2):210\u2013225","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"336_CR2","doi-asserted-by":"crossref","unstructured":"Aggarwal CC, Procopiuc C, Wolf JL, Yu PS, Park JS (1999) Fast algorithm for Projected clustering. In: Proceedings of the ACM SIGMOD\u201999 conference, pp 61\u201372","DOI":"10.1145\/304182.304188"},{"key":"336_CR3","doi-asserted-by":"crossref","unstructured":"Andritsos P, Tsaparas P, Miller RJ, Sevcik KC (2004) LIMBO: scalable clustering of categorical data. In: Proceedings of the 9th international conference on extending database technology (EDBT\u201904), pp 123\u2013146","DOI":"10.1007\/978-3-540-24741-8_9"},{"issue":"2","key":"336_CR4","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1109\/TKDE.2005.31","volume":"17","author":"F Angiulli","year":"2005","unstructured":"Angiulli F, Pizzuti C (2005) Outlier mining in large high-dimensional data sets. IEEE Trans Knowl Data Eng 17(2):203\u2013215","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"12","key":"336_CR5","doi-asserted-by":"crossref","first-page":"2843","DOI":"10.1016\/j.patcog.2011.04.024","volume":"44","author":"L Bai","year":"2011","unstructured":"Bai L, Liang J, Dang C, Cao F (2011) A novel attribute weighting algorithm for clustering high-dimensional categorical data. Pattern Recognit 44(12):2843\u20132861","journal-title":"Pattern Recognit"},{"key":"336_CR6","doi-asserted-by":"crossref","unstructured":"Barbara D, Li Y, Couto J (2002) COOLCAT: an entropy-based algorithm for categorical clustering. In: Proceedings of the 11th ACM international conference on information and knowledge management (CIKM\u201902), pp 582\u2013589","DOI":"10.1145\/584792.584888"},{"key":"336_CR7","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-0450-1","volume-title":"Pattern recognition with fuzzy objective function algorithms","author":"JC Bezdek","year":"1981","unstructured":"Bezdek JC (1981) Pattern recognition with fuzzy objective function algorithms. Plenum, New York"},{"key":"336_CR8","doi-asserted-by":"crossref","unstructured":"Bouguessa M (2011) An unsupervised approach for identifying spammers in social networks. In: Proceedings of the 23rd IEEE international conference on tools with artificial intelligence (ICTAI\u201911), pp 832\u2013840","DOI":"10.1109\/ICTAI.2011.130"},{"issue":"4","key":"336_CR9","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1109\/TKDE.2008.162","volume":"21","author":"M Bouguessa","year":"2009","unstructured":"Bouguessa M, Wang S (2009) Mining projected clusters in high-dimensional spaces. IEEE Trans Knowl Data Eng 21(4):507\u2013522","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"13","key":"336_CR10","doi-asserted-by":"crossref","first-page":"1419","DOI":"10.1016\/j.patrec.2006.01.015","volume":"27","author":"M Bouguessa","year":"2006","unstructured":"Bouguessa M, Wang S, Sun H (2006) An objective approach to cluster validation. Pattern Recognit Lett 27(13):1419\u20131430","journal-title":"Pattern Recognit Lett"},{"issue":"2","key":"336_CR11","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1007\/s11222-006-8451-7","volume":"16","author":"N Bouguila","year":"2006","unstructured":"Bouguila N, Ziou D, Monga E (2006) Practical Bayesian estimation of a finite beta mixture through Gibbs sampling and its applications. Stat Comput 16(2):215\u2013225","journal-title":"Stat Comput"},{"issue":"12","key":"336_CR12","doi-asserted-by":"crossref","first-page":"1607","DOI":"10.1109\/TKDE.2007.190649","volume":"19","author":"E Cesario","year":"2007","unstructured":"Cesario E, Manco G, Ortale R (2007) Top-down parameter-free clustering of high-dimensional categorical data. IEEE Trans Knowl Data Eng 19(12):1607\u20131624","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"336_CR13","doi-asserted-by":"crossref","unstructured":"Das K, Schneider J (2007) Detecting anomalous records in categorical datasets. In: Proceedings of the 13th ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201907), pp 220\u2013229","DOI":"10.1145\/1281192.1281219"},{"issue":"1","key":"336_CR14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster AP, Laird NM, Rubin DB (1977) Maximum likelihood from incomplete data via the EM algorithm. J R Stat Soc B 39(1):1\u201338","journal-title":"J R Stat Soc B"},{"issue":"1","key":"336_CR15","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1007\/s10618-006-0060-8","volume":"14","author":"C Domeniconi","year":"2007","unstructured":"Domeniconi C, Gunopulos D, Ma S, Yan B, Al-Razgan M, Papadopoulos D (2007) Locally adaptive metrics for clustering high dimensional data. Data Min Knowl Discov 14(1):63\u201397","journal-title":"Data Min Knowl Discov"},{"issue":"3","key":"336_CR16","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1109\/34.990138","volume":"24","author":"MAT Figueiredo","year":"2002","unstructured":"Figueiredo MAT, Jain AK (2002) Unsupervised learning of finite mixture models. IEEE Trans Pattern Anal Mach Intell 24(3):381\u2013396","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"336_CR17","first-page":"139","volume":"2","author":"DH Fisher","year":"1987","unstructured":"Fisher DH (1987) Knowledge acquisition via incremental conceptual clustering. Mach Learn 2(2):139\u2013172","journal-title":"Mach Learn"},{"issue":"2","key":"336_CR18","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1145\/1046456.1046468","volume":"6","author":"G Gan","year":"2004","unstructured":"Gan G, Wu J (2004) Subspace clustering for high dimensional categorical data. ACM SIGKDD Explor Newsl 6(2):87\u201394","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"336_CR19","doi-asserted-by":"crossref","unstructured":"Ganti V, Gehrke J, Ramakrishnan R (1999) CACTUS: clustering categorical data using summaries. In: Proceedings of the 5th ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201999), pp 73\u201383","DOI":"10.1145\/312129.312201"},{"issue":"5","key":"336_CR20","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1016\/S0306-4379(00)00022-3","volume":"25","author":"S Guha","year":"2000","unstructured":"Guha S, Rastogi R, Shim K (2000) ROCK: a robust clustering algorithm for categorical attributes. Inf Syst 25(5):345\u2013366","journal-title":"Inf Syst"},{"key":"336_CR21","doi-asserted-by":"crossref","unstructured":"He Z, Deng S, Xu X, Huang JZ (2006) A fast greedy algorithm for outlier mining. In: Proceedings of the 10th Pacific-Asia conference on advances in knowledge discovery and data mining (PAKDD\u201906), pp 567\u2013576","DOI":"10.1007\/11731139_67"},{"issue":"9","key":"336_CR22","doi-asserted-by":"crossref","first-page":"2118","DOI":"10.1093\/bioinformatics\/bti318","volume":"21","author":"Y Ji","year":"2005","unstructured":"Ji Y, Wu C, Liu P, Wang J, Coombes KR (2005) Applications of beta-mixture models in bioinformatics. Bioinformatics 21(9):2118\u20132122","journal-title":"Bioinformatics"},{"issue":"8","key":"336_CR23","doi-asserted-by":"crossref","first-page":"1026","DOI":"10.1109\/TKDE.2007.1048","volume":"19","author":"L Jing","year":"2007","unstructured":"Jing L, Ng MK, Huang JZ (2007) An entropy weighting k-means algorithm for subspace clustering of high-dimensional sparse data. IEEE Trans Knowl Data Eng 19(8):1026\u20131041","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"336_CR24","doi-asserted-by":"crossref","unstructured":"Keogh E, Lonardi S, Ratanamahatana CA (2004) Towards parameter-free data mining. In: Proceedings of the 10th ACM SIGKDD international conference on knowledge discovery and data mining (KDD\u201904), pp 206\u2013215","DOI":"10.1145\/1014052.1014077"},{"issue":"12","key":"336_CR25","doi-asserted-by":"crossref","first-page":"1405","DOI":"10.1016\/j.patrec.2006.01.011","volume":"27","author":"M Kim","year":"2006","unstructured":"Kim M, Ramakrishna RS (2006) Projected clustering for categorical datasets. Pattern Recognit Lett 27(12):1405\u20131417","journal-title":"Pattern Recognit Lett"},{"issue":"2","key":"336_CR26","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1007\/s10618-009-0148-z","volume":"20","author":"A Koufakou","year":"2010","unstructured":"Koufakou A, Georgiopoulos M (2010) A fast outlier detection strategy for distributed high-dimensional data sets with mixed attributes. Data Min Knowl Discov 20(2):259\u2013289","journal-title":"Data Min Knowl Discov"},{"key":"336_CR27","doi-asserted-by":"crossref","unstructured":"Koufakou A, Ortiz EG, Georgiopoulos M, Anagnostopoulos GC, Reynolds KM (2007) A scalable and efficient outlier detection strategy for categorical data. In: Proceedings of the 19th IEEE international conference on tools with artificial intelligence (ICTAI\u201907), pp 210\u2013217","DOI":"10.1109\/ICTAI.2007.125"},{"key":"336_CR28","doi-asserted-by":"crossref","unstructured":"Kriegel HP, Kr\u00f6ger P, Zimek A (2009) Clustering high-dimensional data: a survey on subspace clustering, pattern-based clustering, and correlation clustering. ACM Trans Knowl Discov Data 3(1), art no 1","DOI":"10.1145\/1497577.1497578"},{"key":"336_CR29","doi-asserted-by":"crossref","unstructured":"Ma Z, Leijon A (2009) Beta mixture models and the application to image classification. In: Proceedings of the 16th IEEE international conference on image processing (ICIP\u201909), pp 2045\u20132048","DOI":"10.1109\/ICIP.2009.5414043"},{"issue":"3","key":"336_CR30","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/s10115-007-0090-6","volume":"14","author":"G Moise","year":"2008","unstructured":"Moise G, Sander J, Ester M (2008) Robust projected clustering. Knowl Inf Syst 14(3):273\u2013298","journal-title":"Knowl Inf Syst"},{"issue":"1","key":"336_CR31","first-page":"1270","volume":"2","author":"E M\u00fcller","year":"2009","unstructured":"M\u00fcller E, G\u00fcnnemann S, Assent I, Seidl T (2009) Evaluating clustering in subspace projections of high dimensional data. Proc Very Large Databases Endow 2(1):1270\u20131281","journal-title":"Proc Very Large Databases Endow"},{"issue":"2\u20133","key":"336_CR32","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1007\/s10618-005-0014-6","volume":"12","author":"ME Otey","year":"2006","unstructured":"Otey ME, Ghoting A, Parthasarathy S (2006) Fast distributed outlier detection in mixed-attribute data sets. Data Min Knowl Discov 12(2\u20133):203\u2013228","journal-title":"Data Min Knowl Discov"},{"issue":"19","key":"336_CR33","doi-asserted-by":"crossref","first-page":"2738","DOI":"10.1093\/bioinformatics\/btr464","volume":"27","author":"DS Rodriguez-Baena","year":"2011","unstructured":"Rodriguez-Baena DS, Perez-Pulido AJ, Aguilar-Ruiz JS (2011) A biclustering algorithm for extracting bit-patterns from binary datasets. Bioinformatics 27(19):2738\u20132745","journal-title":"Bioinformatics"},{"issue":"2","key":"336_CR34","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1214\/aos\/1176344136","volume":"6","author":"G Schwarz","year":"1978","unstructured":"Schwarz G (1978) Estimating the dimension of a model. Ann Stat 6(2):461\u2013464","journal-title":"Ann Stat"},{"issue":"1","key":"336_CR35","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1023\/A:1008940618127","volume":"10","author":"P Smyth","year":"2000","unstructured":"Smyth P (2000) Model selection for probabilistic clustering using cross-validated likelihood. Stat Comput 10(1):63\u201372","journal-title":"Stat Comput"},{"key":"336_CR36","doi-asserted-by":"crossref","unstructured":"Wang K, Xu C, Liu B (1999) Clustering transactions using large items. In: Proceedings of the 8th ACM international conference on information and knowledge management (CIKM\u201999), pp 483\u2013490","DOI":"10.1145\/319950.320054"},{"issue":"1","key":"336_CR37","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1007\/s10618-011-0221-2","volume":"24","author":"T Xiong","year":"2012","unstructured":"Xiong T, Wang S, Mayers A, Monga E (2012) DHCC: divisive hierarchical clustering of categorical data. Data Min Knowl Discov 24(1):103\u2013135","journal-title":"Data Min Knowl Discov"},{"issue":"11","key":"336_CR38","doi-asserted-by":"crossref","first-page":"1387","DOI":"10.1109\/TKDE.2004.74","volume":"16","author":"KY Yip","year":"2004","unstructured":"Yip KY, Cheung DW, Ng MK (2004) HARP: A practical projected clustering algorithm. IEEE Trans Knowl Data Eng 16(11):1387\u20131397","journal-title":"IEEE Trans Knowl Data Eng"},{"issue":"3","key":"336_CR39","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1109\/TCBB.2007.1022","volume":"4","author":"AM Yip","year":"2007","unstructured":"Yip AM, Ng MK, Wu EH, Chan TF (2007) Strategies for identifying statistically significant dense regions in microarray data. IEEE\/ACM Trans Comput Biol Bioinform 4(3):415\u2013429","journal-title":"IEEE\/ACM Trans Comput Biol Bioinform"},{"issue":"4","key":"336_CR40","doi-asserted-by":"crossref","first-page":"531","DOI":"10.1137\/1037125","volume":"37","author":"TJ Ypma","year":"1995","unstructured":"Ypma TJ (1995) Historical development of the Newton\u2013Raphson method. SIAM Rev 37(4):531\u2013551","journal-title":"SIAM Rev"},{"issue":"1","key":"336_CR41","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1016\/j.datak.2006.01.005","volume":"60","author":"MJ Zaki","year":"2007","unstructured":"Zaki MJ, Peters M, Assent I, Seidl T (2007) CLICKS: an effective algorithm for mining subspace clusters in categorical datasets. Data Knowl Eng 60(1):51\u201370","journal-title":"Data Knowl Eng"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-013-0336-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10618-013-0336-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-013-0336-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,15]],"date-time":"2024-05-15T19:27:53Z","timestamp":1715801273000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10618-013-0336-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,8,1]]},"references-count":41,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,1]]}},"alternative-id":["336"],"URL":"https:\/\/doi.org\/10.1007\/s10618-013-0336-8","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,8,1]]}}}