{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T21:42:09Z","timestamp":1771882929794,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,4,1]],"date-time":"2019-04-01T00:00:00Z","timestamp":1554076800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100007243","name":"Vysok\u00e1 \u0160kola Ekonomick\u00e1 v Praze","doi-asserted-by":"publisher","award":["IGA F4\/41\/2016"],"award-info":[{"award-number":["IGA F4\/41\/2016"]}],"id":[{"id":"10.13039\/501100007243","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Classif"],"published-print":{"date-parts":[[2019,4]]},"DOI":"10.1007\/s00357-019-09317-5","type":"journal-article","created":{"date-parts":[[2019,4,2]],"date-time":"2019-04-02T06:40:23Z","timestamp":1554187223000},"page":"58-72","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":38,"title":["Comparison of Similarity Measures for Categorical Data in Hierarchical Clustering"],"prefix":"10.1007","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7624-8104","authenticated-orcid":false,"given":"Zden\u011bk","family":"\u0160ulc","sequence":"first","affiliation":[]},{"given":"Hana","family":"\u0158ezankov\u00e1","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,4,2]]},"reference":[{"key":"9317_CR1","volume-title":"Cluster analysis for applications. Probability and mathematical statistics","author":"MR Anderberg","year":"1973","unstructured":"Anderberg, M. R. (1973). Cluster analysis for applications. Probability and mathematical statistics. New York: Academic Press."},{"key":"9317_CR2","doi-asserted-by":"crossref","unstructured":"Boriah, S., Chandola, V., Kumar, V. (2008). Similarity measures for categorical data: a comparative evaluation. In Proceedings of the eighth SIAM International Conference on Data Mining (pp. 243\u2013254).","DOI":"10.1137\/1.9781611972788.22"},{"key":"9317_CR3","unstructured":"Chandola, V., Boriah, S., Kumar, V. (2009). A framework for exploring categorical data. In Proceedings of the ninth SIAM International Conference on Data Mining (pp. 187\u2013198): SIAM."},{"issue":"1","key":"9317_CR4","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/s00357-001-0004-3","volume":"18","author":"A Chatuverdi","year":"2001","unstructured":"Chatuverdi, A., Foods, K., Green, P. E., Carroll, J. D. (2001). K-modes clustering. Journal of Classification, 18(1), 35\u201355.","journal-title":"Journal of Classification"},{"key":"9317_CR5","unstructured":"Chen, L., & Guo, G. (2014). Centroid-based classification of categorical data. In Li, F., Li, G., Hwang, S.-w., Yao, B., Zhang, Z. (Eds.) Web-age information management (pp. 472\u2013475). Cham: Springer International Publishing."},{"key":"9317_CR6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00234-2","volume-title":"Encyclopedia of distances","author":"MM Deza","year":"2009","unstructured":"Deza, M. M., & Deza, E. (2009). Encyclopedia of distances. Berlin: Springer."},{"key":"9317_CR7","first-page":"77","volume-title":"A geometric framework for unsupervised anomaly detection","author":"E Eskin","year":"2002","unstructured":"Eskin, E., Arnold, A., Prerau, M., Portnoy, L., Stolfo, S. (2002). A geometric framework for unsupervised anomaly detection, (pp. 77\u2013101). Boston: Springer US."},{"key":"9317_CR8","volume-title":"Cluster analysis. Wiley series in probability and statistics","author":"B Everitt","year":"2011","unstructured":"Everitt, B., Landau, S., Leese, M., Stahl, D. (2011). Cluster analysis. Wiley series in probability and statistics. New York: Wiley."},{"issue":"4","key":"9317_CR9","doi-asserted-by":"publisher","first-page":"882","DOI":"10.2307\/2528080","volume":"22","author":"DW Goodall","year":"1966","unstructured":"Goodall, D. W. (1966). A new similarity index based on probability. Biometrics, 22(4), 882\u2013907.","journal-title":"Biometrics"},{"issue":"4","key":"9317_CR10","doi-asserted-by":"publisher","first-page":"857","DOI":"10.2307\/2528823","volume":"27","author":"JC Gower","year":"1971","unstructured":"Gower, J. C. (1971). A general coefficient of similarity and some of its properties. Biometrics, 27(4), 857\u2013871.","journal-title":"Biometrics"},{"key":"9317_CR11","doi-asserted-by":"crossref","unstructured":"Hennig, C., Meila, M., Murtagh, F., Rocci, R. (2015). Handbook of cluster analysis. Chapman & Hall\/CRC Handbooks of modern statistical methods. Taylor & Francis.","DOI":"10.1201\/b19706"},{"issue":"3","key":"9317_CR12","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1023\/A:1009769707641","volume":"2","author":"Z Huang","year":"1998","unstructured":"Huang, Z. (1998). Extensions to the k-means algorithm for clustering large data sets with categorical values. Data Mining and Knowledge Discovery, 2(3), 283\u2013304.","journal-title":"Data Mining and Knowledge Discovery"},{"issue":"2","key":"9317_CR13","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1111\/j.1469-8137.1912.tb05611.x","volume":"11","author":"P Jaccard","year":"1912","unstructured":"Jaccard, P. (1912). The distribution of the flora in the alpine zone. New Phytologist, 11(2), 37\u201350.","journal-title":"New Phytologist"},{"key":"9317_CR14","unstructured":"Lin, D. (1998). An information-theoretic definition of similarity. In Proceedings of the 15th International Conference on Machine Learning (pp. 296\u2013304): Morgan Kaufmann."},{"issue":"2","key":"9317_CR15","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1007\/s00357-012-9107-2","volume":"29","author":"I Morlini","year":"2012","unstructured":"Morlini, I., & Zani, S. (2012). A new class of weighted similarity indices using polytomous variables. Journal of Classification, 29(2), 199\u2013226.","journal-title":"Journal of Classification"},{"key":"9317_CR16","unstructured":"Qiu, W., & Joe, H. (2015). clusterGeneration: random cluster generation (with specified degree of separation). R package version 1.3.4."},{"issue":"2","key":"9317_CR17","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1007\/s00357-006-0018-y","volume":"23","author":"W Qiu","year":"2016","unstructured":"Qiu, W., & Joe, H. (2016). Generation of random clusters with specified degree of separation. Journal of Classification, 23(2), 315\u2013334.","journal-title":"Journal of Classification"},{"issue":"2","key":"9317_CR18","first-page":"216","volume":"89","author":"H \u0158ezankov\u00e1","year":"2009","unstructured":"\u0158ezankov\u00e1, H. (2009). Cluster analysis and categorical data. Statistika, 89(2), 216\u2013232.","journal-title":"Statistika"},{"key":"9317_CR19","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/978-3-642-18029-3_18","volume":"3","author":"H \u0158ezankov\u00e1","year":"2011","unstructured":"\u0158ezankov\u00e1, H., L\u00f6ster, T., H\u00fasek, D. (2011). Evaluation of categorical data clustering. Advances in Intelligent Web Mastering, 3, 173\u2013182.","journal-title":"Advances in Intelligent Web Mastering"},{"issue":"2","key":"9317_CR20","first-page":"241","volume":"14","author":"OM San","year":"2004","unstructured":"San, O. M., Huynh, V. N., Nakamori, Y. (2004). An alternative extension of the k-means algorithm for clustering categorical data. International Journal of Applied Mathematics and Computer Science, 14(2), 241\u2013247.","journal-title":"International Journal of Applied Mathematics and Computer Science"},{"issue":"1","key":"9317_CR21","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1145\/584091.584093","volume":"5","author":"CE Shannon","year":"2001","unstructured":"Shannon, C. E. (2001). A mathematical theory of communication. ACM SIGMOBILE Mobile Computing and Communications Review, 5(1), 3\u201355.","journal-title":"ACM SIGMOBILE Mobile Computing and Communications Review"},{"key":"9317_CR22","first-page":"1409","volume":"28","author":"RR Sokal","year":"1958","unstructured":"Sokal, R. R., & Michener, C. D. (1958). A statistical method for evaluating systematic relationships. University of Kansas Scientific Bulletin, 28, 1409\u20131438.","journal-title":"University of Kansas Scientific Bulletin"},{"key":"9317_CR23","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1108\/eb026526","volume":"28","author":"K Sp\u00e4rck Jones","year":"1972","unstructured":"Sp\u00e4rck Jones, K. (1972). A statistical interpretation of term specificity and its application in retrieval. Journal of Documentation, 28, 11\u201321.","journal-title":"Journal of Documentation"},{"issue":"1","key":"9317_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0168288","volume":"12","author":"T Strauss","year":"2017","unstructured":"Strauss, T., & von Maltitz, M. J. (2017). Generalising Ward\u2019s method for use with Manhattan distances. PLoS ONE, 12(1), 1\u201321.","journal-title":"PLoS ONE"},{"key":"9317_CR25","unstructured":"\u0160ulc, Z., & \u0158ezankov\u00e1, H. (2015). nomclust: an R package for hierarchical clustering of objects characterized by nominal variables. In Proceedings of the 9th International Days of Statistics and Economics (pp. 1581\u20131590). Slan\u00fd: Melandrium."},{"issue":"11","key":"9317_CR26","doi-asserted-by":"publisher","first-page":"2884","DOI":"10.1021\/ci300261r","volume":"52","author":"R Todeschini","year":"2012","unstructured":"Todeschini, R., Consonni, J., Xiang, H., Holliday, V., Buscema, M., Willett, P. (2012). Similarity coefficients for binary chemoinformatics data: overview and extended comparison using simulated and real data sets. Journal of Chemical Information and Modeling, 52(11), 2884\u20132901.","journal-title":"Journal of Chemical Information and Modeling"},{"key":"9317_CR27","unstructured":"Warrens, M. J. (2008). Similarity coefficients for binary data. Ph.D. thesis, University of Leiden."},{"issue":"2","key":"9317_CR28","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/s00357-016-9200-z","volume":"33","author":"MJ Warrens","year":"2016","unstructured":"Warrens, M. J. (2016). Inequalities between similarities for numerical data. Journal of Classification, 33(2), 141\u2013148.","journal-title":"Journal of Classification"},{"issue":"5","key":"9317_CR29","first-page":"1145","volume":"32","author":"J Yi","year":"2016","unstructured":"Yi, J., Yang, G., Wan, J. (2016). Category discrimination based feature selection algorithm in chinese text classification. Journal of Information Science and Engineering, 32(5), 1145\u20131159.","journal-title":"Journal of Information Science and Engineering"},{"issue":"1","key":"9317_CR30","doi-asserted-by":"publisher","first-page":"8","DOI":"10.20982\/tqmp.11.1.p008","volume":"11","author":"O Yim","year":"2015","unstructured":"Yim, O., & Ramdeen, K. T. (2015). Hierarchical cluster analysis: comparison of three linkage measures and application to psychological data. The Quantitative Methods for Psychology, 11(1), 8\u201321.","journal-title":"The Quantitative Methods for Psychology"}],"container-title":["Journal of Classification"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00357-019-09317-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00357-019-09317-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00357-019-09317-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,3,31]],"date-time":"2020-03-31T23:37:40Z","timestamp":1585697860000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00357-019-09317-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4]]},"references-count":30,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,4]]}},"alternative-id":["9317"],"URL":"https:\/\/doi.org\/10.1007\/s00357-019-09317-5","relation":{},"ISSN":["0176-4268","1432-1343"],"issn-type":[{"value":"0176-4268","type":"print"},{"value":"1432-1343","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,4]]},"assertion":[{"value":"2 April 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}