{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T05:19:16Z","timestamp":1743139156367,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434112"},{"type":"electronic","value":"9783031434129"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43412-9_39","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T20:28:38Z","timestamp":1694896118000},"page":"662-677","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["k-SubMix: Common Subspace Clustering on\u00a0Mixed-Type Data"],"prefix":"10.1007","author":[{"given":"Mauritius","family":"Klein","sequence":"first","affiliation":[]},{"given":"Collin","family":"Leiber","sequence":"additional","affiliation":[]},{"given":"Christian","family":"B\u00f6hm","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"issue":"4","key":"39_CR1","first-page":"651","volume":"2","author":"H Abdi","year":"2007","unstructured":"Abdi, H., Valentin, D.: Multiple correspondence analysis. Encyclopedia Measur. Stat. 2(4), 651\u2013657 (2007)","journal-title":"Encyclopedia Measur. Stat."},{"issue":"2","key":"39_CR2","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1016\/j.datak.2007.03.016","volume":"63","author":"A Ahmad","year":"2007","unstructured":"Ahmad, A., Dey, L.: A k-mean clustering algorithm for mixed numeric and categorical data. Data Knowl. Eng. 63(2), 503\u2013527 (2007)","journal-title":"Data Knowl. Eng."},{"issue":"7","key":"39_CR3","doi-asserted-by":"publisher","first-page":"1062","DOI":"10.1016\/j.patrec.2011.02.017","volume":"32","author":"A Ahmad","year":"2011","unstructured":"Ahmad, A., Dey, L.: A k-means type clustering algorithm for subspace clustering of mixed numeric and categorical datasets. Pattern Recogn. Lett. 32(7), 1062\u20131069 (2011)","journal-title":"Pattern Recogn. Lett."},{"key":"39_CR4","doi-asserted-by":"publisher","first-page":"31883","DOI":"10.1109\/ACCESS.2019.2903568","volume":"7","author":"A Ahmad","year":"2019","unstructured":"Ahmad, A., Khan, S.S.: Survey of state-of-the-art mixed data clustering algorithms. IEEE Access 7, 31883\u201331902 (2019)","journal-title":"IEEE Access"},{"issue":"6","key":"39_CR5","doi-asserted-by":"publisher","first-page":"2743","DOI":"10.1109\/18.720554","volume":"44","author":"A Barron","year":"1998","unstructured":"Barron, A., Rissanen, J., Yu, B.: The minimum description length principle in coding and modeling. IEEE Trans. Inf. Theory 44(6), 2743\u20132760 (1998)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"39_CR6","doi-asserted-by":"crossref","unstructured":"Bauer, L.G., Leiber, C., B\u00f6hm, C., Plant, C.: Extension of the dip-test repertoire-efficient and differentiable p-value calculation for clustering. In: Proceedings of the 2023 SIAM International Conference on Data Mining (SDM), pp. 109\u2013117. SIAM (2023)","DOI":"10.1137\/1.9781611977653.ch13"},{"key":"39_CR7","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1007\/978-3-642-13657-3_7","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"C B\u00f6hm","year":"2010","unstructured":"B\u00f6hm, C., Goebl, S., Oswald, A., Plant, C., Plavinski, M., Wackersreuther, B.: Integrative parameter-free clustering of data with mixed type attributes. In: Zaki, M.J., Yu, J.X., Ravindran, B., Pudi, V. (eds.) PAKDD 2010. LNCS (LNAI), vol. 6118, pp. 38\u201347. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-13657-3_7"},{"key":"39_CR8","doi-asserted-by":"crossref","unstructured":"B\u00f6hm, C., Kailing, K., Kr\u00f6ger, P., Zimek, A.: Computing clusters of correlation connected objects. In: ACM SIGMOD, pp. 455\u2013466 (2004)","DOI":"10.1145\/1007568.1007620"},{"issue":"1","key":"39_CR9","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/s00357-001-0004-3","volume":"18","author":"A Chaturvedi","year":"2001","unstructured":"Chaturvedi, A., Green, P.E., Caroll, J.D.: K-modes clustering. J. Classif. 18(1), 35\u201355 (2001)","journal-title":"J. Classif."},{"issue":"8","key":"39_CR10","doi-asserted-by":"publisher","first-page":"2228","DOI":"10.1016\/j.patcog.2013.01.027","volume":"46","author":"YM Cheung","year":"2013","unstructured":"Cheung, Y.M., Jia, H.: Categorical-and-numerical-attribute data clustering based on a unified similarity metric without knowing cluster number. Pattern Recogn. 46(8), 2228\u20132238 (2013)","journal-title":"Pattern Recogn."},{"issue":"1","key":"39_CR11","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster, A.P., Laird, N.M., Rubin, D.B.: Maximum likelihood from incomplete data via the EM algorithm. J. Roy. Stat. Soc.: Ser. B (Methodol.) 39(1), 1\u201322 (1977)","journal-title":"J. Roy. Stat. Soc.: Ser. B (Methodol.)"},{"key":"39_CR12","doi-asserted-by":"crossref","unstructured":"Ding, C., Li, T.: Adaptive dimension reduction using discriminant analysis and k-means clustering. In: ICML, pp. 521\u2013528 (2007)","DOI":"10.1145\/1273496.1273562"},{"key":"39_CR13","doi-asserted-by":"crossref","unstructured":"Diop, A., El Malki, N., Chevalier, M., Peninou, A., Teste, O.: Impact of similarity measures on clustering mixed data. In: 34th International Conference on Scientific and Statistical Database Management, pp. 1\u201312 (2022)","DOI":"10.1145\/3538712.3538742"},{"issue":"4","key":"39_CR14","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1111\/j.1469-1809.1938.tb02189.x","volume":"8","author":"RA Fisher","year":"1938","unstructured":"Fisher, R.A.: The statistical utilization of multiple measurements. Ann. Eugen. 8(4), 376\u2013386 (1938)","journal-title":"Ann. Eugen."},{"key":"39_CR15","doi-asserted-by":"crossref","unstructured":"Goebl, S., He, X., Plant, C., B\u00f6hm, C.: Finding the optimal subspace for clustering. In: ICDM, pp. 130\u2013139. IEEE (2014)","DOI":"10.1109\/ICDM.2014.34"},{"key":"39_CR16","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1214\/aos\/1176346577","volume":"13","author":"JA Hartigan","year":"1985","unstructured":"Hartigan, J.A., Hartigan, P.M.: The dip test of unimodality. Ann. Stat. 13, 70\u201384 (1985)","journal-title":"Ann. Stat."},{"issue":"1","key":"39_CR17","first-page":"100","volume":"28","author":"JA Hartigan","year":"1979","unstructured":"Hartigan, J.A., Wong, M.A.: Algorithm as 136: a k-means clustering algorithm. J. Roy. Stat. Soc.: Ser. C (Methodol.) 28(1), 100\u2013108 (1979)","journal-title":"J. Roy. Stat. Soc.: Ser. C (Methodol.)"},{"issue":"5","key":"39_CR18","doi-asserted-by":"publisher","first-page":"657","DOI":"10.1109\/TPAMI.2005.95","volume":"27","author":"JZ Huang","year":"2005","unstructured":"Huang, J.Z., Ng, M.K., Rong, H., Li, Z.: Automated variable weighting in k-means type clustering. IEEE Trans. Pattern Anal. Mach. Intell. 27(5), 657\u2013668 (2005)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"39_CR19","unstructured":"Huang, Z.: Clustering large data sets with mixed numeric and categorical values. In: PAKDD, pp. 21\u201334. Citeseer (1997)"},{"issue":"3","key":"39_CR20","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1023\/A:1009769707641","volume":"2","author":"Z Huang","year":"1998","unstructured":"Huang, Z.: Extensions to the k-means algorithm for clustering large data sets with categorical values. Data Min. Knowl. Disc. 2(3), 283\u2013304 (1998)","journal-title":"Data Min. Knowl. Disc."},{"issue":"8","key":"39_CR21","first-page":"3308","volume":"29","author":"H Jia","year":"2017","unstructured":"Jia, H., Cheung, Y.M.: Subspace clustering of categorical and numerical data with an unknown number of clusters. IEEE TNNLS 29(8), 3308\u20133325 (2017)","journal-title":"IEEE TNNLS"},{"issue":"8","key":"39_CR22","doi-asserted-by":"publisher","first-page":"1026","DOI":"10.1109\/TKDE.2007.1048","volume":"19","author":"L Jing","year":"2007","unstructured":"Jing, L., Ng, M.K., Huang, J.Z.: An entropy weighting k-means algorithm for subspace clustering of high-dimensional sparse data. IEEE Trans. Knowl. Data Eng. 19(8), 1026\u20131041 (2007)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"39_CR23","doi-asserted-by":"crossref","unstructured":"Leiber, C., Mautz, D., Plant, C., B\u00f6hm, C.: Automatic parameter selection for non-redundant clustering. In: SIAM SDM, pp. 226\u2013234. SIAM (2022)","DOI":"10.1137\/1.9781611977172.26"},{"key":"39_CR24","doi-asserted-by":"crossref","unstructured":"Mautz, D., Ye, W., Plant, C., B\u00f6hm, C.: Towards an optimal subspace for k-means. In: Proceedings of the 23rd ACM SIGKDD, pp. 365\u2013373 (2017)","DOI":"10.1145\/3097983.3097989"},{"key":"39_CR25","doi-asserted-by":"crossref","unstructured":"Mautz, D., Ye, W., Plant, C., B\u00f6hm, C.: Discovering non-redundant k-means clusterings in optimal subspaces. In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 1973\u20131982 (2018)","DOI":"10.1145\/3219819.3219945"},{"issue":"3","key":"39_CR26","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1023\/A:1024016609528","volume":"52","author":"DS Modha","year":"2003","unstructured":"Modha, D.S., Spangler, W.S.: Feature weighting in k-means clustering. Mach. Learn. 52(3), 217\u2013237 (2003)","journal-title":"Mach. Learn."},{"key":"39_CR27","doi-asserted-by":"publisher","DOI":"10.1201\/b17700","volume-title":"Multiple Factor Analysis by Example Using R","author":"J Pag\u00e8s","year":"2014","unstructured":"Pag\u00e8s, J.: Multiple Factor Analysis by Example Using R. CRC Press, New York (2014)"},{"issue":"11","key":"39_CR28","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1080\/14786440109462720","volume":"2","author":"K Pearson","year":"1901","unstructured":"Pearson, K.: LIII. On lines and planes of closest fit to systems of points in space. Lond. Edinb. Dublin Philos. Mag. J. Sci. 2(11), 559\u2013572 (1901)","journal-title":"Lond. Edinb. Dublin Philos. Mag. J. Sci."},{"key":"39_CR29","doi-asserted-by":"crossref","unstructured":"Plant, C., B\u00f6hm, C.: INCONCO: interpretable clustering of numerical and categorical objects. In: Proceedings of the 17th ACM SIGKDD, pp. 1127\u20131135 (2011)","DOI":"10.1145\/2020408.2020584"},{"key":"39_CR30","first-page":"583","volume":"3","author":"A Strehl","year":"2002","unstructured":"Strehl, A., Ghosh, J.: Cluster ensembles-a knowledge reuse framework for combining multiple partitions. J. Mach. Learn. Res. 3, 583\u2013617 (2002)","journal-title":"J. Mach. Learn. Res."},{"issue":"10","key":"39_CR31","doi-asserted-by":"publisher","first-page":"2761","DOI":"10.1109\/TIP.2010.2049235","volume":"19","author":"Y Yang","year":"2010","unstructured":"Yang, Y., Xu, D., Nie, F., Yan, S., Zhuang, Y.: Image clustering using local discriminant models and global integration. IEEE Trans. Image Process. 19(10), 2761\u20132773 (2010)","journal-title":"IEEE Trans. Image Process."},{"key":"39_CR32","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1007\/978-3-540-71701-0_129","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"W-D Zhao","year":"2007","unstructured":"Zhao, W.-D., Dai, W.-H., Tang, C.-B.: K-centers algorithm for clustering mixed type data. In: Zhou, Z.-H., Li, H., Yang, Q. (eds.) PAKDD 2007. LNCS (LNAI), vol. 4426, pp. 1140\u20131147. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-71701-0_129"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43412-9_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T06:14:47Z","timestamp":1730096087000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43412-9_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434112","9783031434129"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43412-9_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Our proposed k-SubMix approach is a new clustering method in the area of unsupervised learning. We have no specific ethical concerns regarding the work of this paper, as all experiments were run on either synthetically generated data or on publicly available datasets that are commonly used in the field of mixed-type clustering. Real-world datasets containing personal data such as Heart, Derma and Adult are standard datasets for the evaluation of mixed-type clustering approaches and the resulting cluster did not reveal any new or ethically critical patterns. No data from humans has been collected for this paper. K-SubMix can be applied to any dataset and is not targeting a specific ethically critical domain.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}