{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:23:17Z","timestamp":1759332197475,"version":"3.40.3"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030731960"},{"type":"electronic","value":"9783030731977"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-73197-7_23","type":"book-chapter","created":{"date-parts":[[2021,4,6]],"date-time":"2021-04-06T19:03:01Z","timestamp":1617735781000},"page":"342-358","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Clustering Mixed-Type Data with Correlation-Preserving Embedding"],"prefix":"10.1007","author":[{"given":"Luan","family":"Tran","sequence":"first","affiliation":[]},{"given":"Liyue","family":"Fan","sequence":"additional","affiliation":[]},{"given":"Cyrus","family":"Shahabi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,4,6]]},"reference":[{"key":"23_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/978-3-540-24741-8_9","volume-title":"Advances in Database Technology - EDBT 2004","author":"P Andritsos","year":"2004","unstructured":"Andritsos, P., Tsaparas, P., Miller, R.J., Sevcik, K.C.: LIMBO: scalable clustering of categorical data. In: Bertino, E., et al. (eds.) EDBT 2004. LNCS, vol. 2992, pp. 123\u2013146. Springer, Heidelberg (2004). https:\/\/doi.org\/10.1007\/978-3-540-24741-8_9"},{"issue":"2","key":"23_CR2","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1145\/304181.304187","volume":"28","author":"M Ankerst","year":"1999","unstructured":"Ankerst, M., Breunig, M.M., Kriegel, H.P., Sander, J.: Optics: ordering points to identify the clustering structure. ACM SIGMOD Rec. 28(2), 49\u201360 (1999)","journal-title":"ACM SIGMOD Rec."},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Aytekin, C., Ni, X., Cricri, F., Aksu, E.: Clustering and unsupervised anomaly detection with l 2 normalized deep auto-encoder representations. In: 2018 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20136. IEEE (2018)","DOI":"10.1109\/IJCNN.2018.8489068"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Barbar\u00e1, D., Li, Y., Couto, J.: Coolcat: an entropy-based algorithm for categorical clustering. In: Proceedings of the Eleventh International Conference on Information and Knowledge Management, pp. 582\u2013589 (2002)","DOI":"10.1145\/584792.584888"},{"key":"23_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/978-3-319-98812-2_2","volume-title":"Database and Expert Systems Applications","author":"S Behzadi","year":"2018","unstructured":"Behzadi, S., Ibrahim, M.A., Plant, C.: Parameter free mixed-type density-based clustering. In: Hartmann, S., Ma, H., Hameurlain, A., Pernul, G., Wagner, R.R. (eds.) DEXA 2018. LNCS, vol. 11030, pp. 19\u201334. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-98812-2_2"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Behzadi, S., M\u00fcller, N.S., Plant, C., B\u00f6hm, C.: Clustering of mixed-type data considering concept hierarchies. In: Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 555\u2013573. Springer (2019)","DOI":"10.1007\/978-3-030-16148-4_43"},{"key":"23_CR7","series-title":"Springer Topics in Signal Processing","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-00296-0_5","volume-title":"In: Noise Reduction in Speech Processing","author":"J Benesty","year":"2009","unstructured":"Benesty, J., Chen, J., Huang, Y., Cohen, I.: Pearson correlation coefficient. In: Noise Reduction in Speech Processing. Springer Topics in Signal Processing, vol. 2, pp. 1\u20134. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-00296-0_5"},{"issue":"8","key":"23_CR8","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio, Y., Courville, A., Vincent, P.: Representation learning: a review and new perspectives. IEEE Trans. Pattern Anal. Mach. Intell. 35(8), 1798\u20131828 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"23_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1007\/978-3-642-13657-3_7","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"C B\u00f6hm","year":"2010","unstructured":"B\u00f6hm, C., Goebl, S., Oswald, A., Plant, C., Plavinski, M., Wackersreuther, B.: Integrative parameter-free clustering of data with mixed type attributes. In: Zaki, M.J., Yu, J.X., Ravindran, B., Pudi, V. (eds.) PAKDD 2010. LNCS (LNAI), vol. 6118, pp. 38\u201347. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-13657-3_7"},{"issue":"10","key":"23_CR10","doi-asserted-by":"publisher","first-page":"4593","DOI":"10.1109\/TNNLS.2017.2770167","volume":"29","author":"F Cao","year":"2017","unstructured":"Cao, F., et al.: An algorithm for clustering categorical data with set-valued features. IEEE Trans. Neural Networks Learning Syst. 29(10), 4593\u20134606 (2017)","journal-title":"IEEE Trans. Neural Networks Learning Syst."},{"issue":"1","key":"23_CR11","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/S0893-6080(03)00169-2","volume":"17","author":"V Cherkassky","year":"2004","unstructured":"Cherkassky, V., Ma, Y.: Practical selection of SVM parameters and noise estimation for SVM regression. Neural Netw. 17(1), 113\u2013126 (2004)","journal-title":"Neural Netw."},{"issue":"5","key":"23_CR12","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1109\/34.1000236","volume":"24","author":"D Comaniciu","year":"2002","unstructured":"Comaniciu, D., Meer, P.: Mean shift: a robust approach toward feature space analysis. IEEE Trans. Pattern Anal. Mach. Intell. 24(5), 603\u2013619 (2002)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"23_CR13","unstructured":"Dua, D., Graff, C.: UCI machine learning repository (2017). http:\/\/archive.ics.uci.edu\/ml"},{"key":"23_CR14","first-page":"226","volume":"96","author":"M Ester","year":"1996","unstructured":"Ester, M., Kriegel, H.P., Sander, J., Xu, X., et al.: A density-based algorithm for discovering clusters in large spatial databases with noise. KDD 96, 226\u2013231 (1996)","journal-title":"KDD"},{"issue":"383","key":"23_CR15","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1080\/01621459.1983.10478008","volume":"78","author":"EB Fowlkes","year":"1983","unstructured":"Fowlkes, E.B., Mallows, C.L.: A method for comparing two hierarchical clusterings. J. Am. Stat. Assoc. 78(383), 553\u2013569 (1983)","journal-title":"J. Am. Stat. Assoc."},{"issue":"5","key":"23_CR16","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1016\/S0306-4379(00)00022-3","volume":"25","author":"S Guha","year":"2000","unstructured":"Guha, S., Rastogi, R., Shim, K.: Rock: a robust clustering algorithm for categorical attributes. Inf. Syst. 25(5), 345\u2013366 (2000)","journal-title":"Inf. Syst."},{"issue":"2","key":"23_CR17","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1016\/j.tele.2013.08.006","volume":"31","author":"F Hamka","year":"2014","unstructured":"Hamka, F., Bouwman, H., De Reuver, M., Kroesen, M.: Mobile customer segmentation based on smartphone measurement. Telematics Inform. 31(2), 220\u2013227 (2014)","journal-title":"Telematics Inform."},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Hartigan, J.A., Wong, M.A.: Algorithm as 136: a k-means clustering algorithm. J. Royal Stat. Soci. Series c (applied statistics) 28(1), 100\u2013108 (1979)","DOI":"10.2307\/2346830"},{"key":"23_CR19","doi-asserted-by":"crossref","unstructured":"Hershey, J.R., Olsen, P.A.: Approximating the kullback leibler divergence between Gaussian mixture models. In: 2007 IEEE International Conference on Acoustics, Speech and Signal Processing-ICASSP\u201907, vol. 4, pp. IV-317. IEEE (2007)","DOI":"10.1109\/ICASSP.2007.366913"},{"issue":"2","key":"23_CR20","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/0893-6080(91)90009-T","volume":"4","author":"K Hornik","year":"1991","unstructured":"Hornik, K.: Approximation capabilities of multilayer feedforward networks. Neural Netw. 4(2), 251\u2013257 (1991)","journal-title":"Neural Netw."},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Hosmer Jr., D.W., Lemeshow, S., Sturdivant, R.X.: Applied logistic regression, vol. 398. Wiley (2013)","DOI":"10.1002\/9781118548387"},{"issue":"3","key":"23_CR22","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1023\/A:1009769707641","volume":"2","author":"Z Huang","year":"1998","unstructured":"Huang, Z.: Extensions to the k-means algorithm for clustering large data sets with categorical values. Data Min. Knowl. Disc. 2(3), 283\u2013304 (1998)","journal-title":"Data Min. Knowl. Disc."},{"key":"23_CR23","doi-asserted-by":"crossref","unstructured":"Indyk, P., Motwani, R.: Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of The Thirtieth Annual ACM Symposium on Theory of Computing, pp. 604\u2013613. ACM (1998)","DOI":"10.1145\/276698.276876"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Jian, S., Hu, L., Cao, L., Lu, K.: Metric-based auto-instructor for learning mixed data representation. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11597"},{"key":"23_CR25","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"23_CR26","first-page":"202","volume":"96","author":"R Kohavi","year":"1996","unstructured":"Kohavi, R.: Scaling up the accuracy of Naive-bayes classifiers: a decision-tree hybrid. KDD 96, 202\u2013207 (1996)","journal-title":"KDD"},{"key":"23_CR27","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"key":"23_CR28","unstructured":"van der Maaten, L., Hinton, G.: Visualizing data using t-sne. J. Mach. Learn. Res. 9, 2579\u20132605 (2008)"},{"key":"23_CR29","doi-asserted-by":"crossref","unstructured":"Marchi, E., Vesperini, F., Eyben, F., Squartini, S., Schuller, B.: A novel approach for automatic acoustic novelty detection using a denoising autoencoder with bidirectional LSTM neural networks. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1996\u20132000. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178320"},{"key":"23_CR30","unstructured":"Ni, X., Quadrianto, N., Wang, Y., Chen, C.: Composing tree graphical models with persistent homology features for clustering mixed-type data. In: International Conference on Machine Learning, pp. 2622\u20132631 (2017)"},{"key":"23_CR31","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1016\/j.compeleceng.2018.04.023","volume":"68","author":"SB Salem","year":"2018","unstructured":"Salem, S.B., Naouali, S., Chtourou, Z.: A fast and effective partitional clustering algorithm for large categorical datasets using a k-means based approach. Comput. Electr. Eng. 68, 463\u2013483 (2018)","journal-title":"Comput. Electr. Eng."},{"issue":"6","key":"23_CR32","doi-asserted-by":"publisher","first-page":"568","DOI":"10.1109\/72.97934","volume":"2","author":"DF Specht","year":"1991","unstructured":"Specht, D.F., et al.: A general regression neural network. IEEE Trans. Neural Networks 2(6), 568\u2013576 (1991)","journal-title":"IEEE Trans. Neural Networks"},{"key":"23_CR33","doi-asserted-by":"crossref","unstructured":"Tate, R.F.: Correlation between a discrete and a continuous variable. point-biserial correlation. Ann. Math. Stat. 25(3), 603\u2013607 (1954)","DOI":"10.1214\/aoms\/1177728730"},{"key":"23_CR34","doi-asserted-by":"crossref","unstructured":"Tavallaee, M., Bagheri, E., Lu, W., Ghorbani, A.A.: A detailed analysis of the KDD cup 99 data set. In: 2009 IEEE Symposium on Computational Intelligence for Security and Defense Applications, pp. 1\u20136. IEEE (2009)","DOI":"10.1109\/CISDA.2009.5356528"},{"key":"23_CR35","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., Manzagol, P.A.: Extracting and composing robust features with denoising autoencoders. In: Proceedings of the 25th International Conference on Machine Learning, pp. 1096\u20131103 (2008)","DOI":"10.1145\/1390156.1390294"},{"key":"23_CR36","first-page":"2837","volume":"11","author":"NX Vinh","year":"2010","unstructured":"Vinh, N.X., Epps, J., Bailey, J.: Information theoretic measures for clusterings comparison: variants, properties, normalization and correction for chance. J. Mach. Learn. Res. 11, 2837\u20132854 (2010)","journal-title":"J. Mach. Learn. Res."},{"issue":"4","key":"23_CR37","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/s11222-007-9033-z","volume":"17","author":"U Von Luxburg","year":"2007","unstructured":"Von Luxburg, U.: A tutorial on spectral clustering. Stat. Comput. 17(4), 395\u2013416 (2007)","journal-title":"Stat. Comput."},{"issue":"11","key":"23_CR38","doi-asserted-by":"publisher","first-page":"1033","DOI":"10.1038\/nmeth.3583","volume":"12","author":"C Wiwie","year":"2015","unstructured":"Wiwie, C., Baumbach, J., R\u00f6ttger, R.: Comparing the performance of biomedical clustering methods. Nat. Methods 12(11), 1033 (2015)","journal-title":"Nat. Methods"},{"issue":"1\u20133","key":"23_CR39","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/0169-7439(87)80084-9","volume":"2","author":"S Wold","year":"1987","unstructured":"Wold, S., Esbensen, K., Geladi, P.: Principal component analysis. Chemom. Intell. Lab. Syst. 2(1\u20133), 37\u201352 (1987)","journal-title":"Chemom. Intell. Lab. Syst."},{"issue":"s2","key":"23_CR40","doi-asserted-by":"publisher","first-page":"S519","DOI":"10.3233\/THC-150989","volume":"23","author":"Y Wu","year":"2015","unstructured":"Wu, Y., Duan, H., Du, S.: Multiple fuzzy c-means clustering algorithm in medical diagnosis. Technol. Health Care 23(s2), S519\u2013S527 (2015)","journal-title":"Technol. Health Care"},{"key":"23_CR41","unstructured":"Xie, J., Girshick, R., Farhadi, A.: Unsupervised deep embedding for clustering analysis. In: International Conference on Machine Learning, pp. 478\u2013487 (2016)"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-73197-7_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,23]],"date-time":"2022-12-23T14:56:56Z","timestamp":1671807416000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-73197-7_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030731960","9783030731977"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-73197-7_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"6 April 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taipei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 April 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 April 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/dm.iis.sinica.edu.tw\/DASFAA2021\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"490","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"98","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic this event was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}