{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T06:58:59Z","timestamp":1777273139193,"version":"3.51.4"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031975752","type":"print"},{"value":"9783031975769","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T00:00:00Z","timestamp":1751068800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T00:00:00Z","timestamp":1751068800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-97576-9_14","type":"book-chapter","created":{"date-parts":[[2025,7,5]],"date-time":"2025-07-05T05:57:28Z","timestamp":1751695048000},"page":"212-225","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Apache Spark Implementation of the Constrained K-Means Clustering Algorithm"],"prefix":"10.1007","author":[{"given":"Nguyen Quang","family":"Huy","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vu Thu","family":"Diep","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6033-6484","authenticated-orcid":false,"given":"Phan Duy","family":"Hung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,28]]},"reference":[{"issue":"3","key":"14_CR1","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1145\/331499.331504","volume":"31","author":"AK Jain","year":"1999","unstructured":"Jain, A.K., Murty, M.N., Flynn, P.J.: Data clustering. ACM Comput. Surv. 31(3), 264\u2013323 (1999). https:\/\/doi.org\/10.1145\/331499.331504","journal-title":"ACM Comput. Surv."},{"key":"14_CR2","doi-asserted-by":"publisher","unstructured":"Madhulatha, T.S.: An overview on clustering methods. arXiv (2012). https:\/\/doi.org\/10.48550\/arxiv.1205.1117","DOI":"10.48550\/arxiv.1205.1117"},{"key":"14_CR3","doi-asserted-by":"publisher","DOI":"10.1201\/9781584889977","volume-title":"Constrained clustering: Advances in Algorithms, Theory, and Applications","author":"S Basu","year":"2008","unstructured":"Basu, S., Davidson, I., Wagstaff, K.: Constrained clustering: Advances in Algorithms, Theory, and Applications. CRC Press (2008)"},{"key":"14_CR4","unstructured":"Wagstaff, K., Cardie, C., Rogers, S., Schr\u00f6dl, S.: Constrained K-means Clustering with Background Knowledge. In: International Conference on Machine Learning, pp. 577\u2013584 (2001). https:\/\/web.cse.msu.edu\/~cse802\/notes\/ConstrainedKmeans.pdf"},{"key":"14_CR5","doi-asserted-by":"publisher","unstructured":"Bilenko, M., Basu, S., Mooney, R.J.: Integrating constraints and metric learning in semi-supervised clustering. In: Proceedings of the Twenty-First International Conference on Machine Learning (ICML \u201904), p. 11. Association for Computing Machinery (2004). https:\/\/doi.org\/10.1145\/1015330.1015360","DOI":"10.1145\/1015330.1015360"},{"key":"14_CR6","doi-asserted-by":"publisher","unstructured":"Baumann, P., Hochbaum, D.S.: PCCC: the pairwise-confidence-constraints clustering algorithm. arXiv (2022). https:\/\/doi.org\/10.48550\/arxiv.2212.14437","DOI":"10.48550\/arxiv.2212.14437"},{"key":"14_CR7","unstructured":"Zaharia, M., et al.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: Networked Systems Design and Implementation, 2 (2012). http:\/\/cobweb.cs.uga.edu\/~squinn\/mmd_s15\/papers\/nsdi12-final138.pdf"},{"issue":"2","key":"14_CR8","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1109\/TIT.1982.1056489","volume":"28","author":"S Lloyd","year":"1982","unstructured":"Lloyd, S.: Least squares quantization in PCM. IEEE Trans. Inf. Theory 28(2), 129\u2013137 (1982)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"14_CR9","doi-asserted-by":"publisher","unstructured":"Arthur, D., Vassilvitskii, S.: K-means++: the advantages of careful seeding. Symp. Discrete Algorithms 1027\u20131035,(2007). https:\/\/doi.org\/10.5555\/1283383.1283494","DOI":"10.5555\/1283383.1283494"},{"key":"14_CR10","doi-asserted-by":"publisher","unstructured":"Kozyriev, A., Norkin, V.: robust clustering on high-dimensional data with stochastic quantization.\u00a0arXiv (2024).\u00a0https:\/\/doi.org\/10.48550\/arxiv.2409.02066","DOI":"10.48550\/arxiv.2409.02066"},{"key":"14_CR11","doi-asserted-by":"publisher","unstructured":"Meng, X., et al.: MLLIB: Machine learning in apache spark.\u00a0arXiv (2015).\u00a0https:\/\/doi.org\/10.48550\/arxiv.1505.06807","DOI":"10.48550\/arxiv.1505.06807"},{"key":"14_CR12","doi-asserted-by":"publisher","unstructured":"Bahmani, B., Moseley, B., Vattani, A., Kumar, R., Vassilvitskii, S.: Scalable K-Means++.\u00a0arXiv (2012).\u00a0https:\/\/doi.org\/10.48550\/arxiv.1203.6402","DOI":"10.48550\/arxiv.1203.6402"},{"key":"14_CR13","doi-asserted-by":"publisher","unstructured":"Bl\u00f6mer, J., Lammersen, C., Schmidt, M., Sohler, C.: Theoretical analysis of the $k$-means algorithm a survey.\u00a0arXiv (2016).\u00a0https:\/\/doi.org\/10.48550\/arxiv.1602.08254","DOI":"10.48550\/arxiv.1602.08254"},{"key":"14_CR14","doi-asserted-by":"publisher","unstructured":"J\u0229drzejowicz, J., J\u0229drzejowicz, P., Wierzbowska, I.: Apache spark implementation of the distance-based kernel-based fuzzy c-means clustering classifier. In:\u00a0Smart Innovation, Systems and Technologies, pp. 317\u2013324 (2016).\u00a0https:\/\/doi.org\/10.1007\/978-3-319-39630-9_26","DOI":"10.1007\/978-3-319-39630-9_26"},{"key":"14_CR15","doi-asserted-by":"publisher","unstructured":"Han, D., Agrawal, A., Liao, W., Choudhary, A.: Parallel DBSCAN algorithm using a data partitioning strategy with spark implementation.\u00a0In: Proceedings of the IEEE International Conference on Big Data (Big Data), pp. 305\u2013312 (2018).\u00a0https:\/\/doi.org\/10.1109\/bigdata.2018.8622258","DOI":"10.1109\/bigdata.2018.8622258"},{"key":"14_CR16","doi-asserted-by":"publisher","unstructured":"Lavanya, K., Banu, J.S., Jain, P.: Clustering of Zika virus epidemic using gaussian mixture model in spark environment.\u00a0Biomed. Res.\u00a030(1) (2019).\u00a0https:\/\/doi.org\/10.35841\/biomedicalresearch.30-18-1132","DOI":"10.35841\/biomedicalresearch.30-18-1132"},{"key":"14_CR17","unstructured":"Apache Software Foundation. Hadoop (2010). https:\/\/hadoop.apache.org"},{"issue":"1","key":"14_CR18","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: MapReduce. Commun. ACM 51(1), 107\u2013113 (2008). https:\/\/doi.org\/10.1145\/1327452.1327492","journal-title":"Commun. ACM"},{"key":"14_CR19","doi-asserted-by":"publisher","first-page":"104979","DOI":"10.1016\/j.cor.2020.104979","volume":"121","author":"G Gonz\u00e1lez-Almagro","year":"2020","unstructured":"Gonz\u00e1lez-Almagro, G., Luengo, J., Cano, J., Garc\u00eda, S.: DILS: constrained clustering through dual iterative local search. Comput. Oper. Res. 121, 104979 (2020). https:\/\/doi.org\/10.1016\/j.cor.2020.104979","journal-title":"Comput. Oper. Res."},{"key":"14_CR20","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/BF01908075","volume":"2","author":"L Hubert","year":"1985","unstructured":"Hubert, L., Arabie, P.: Comparing partitions. J. Classif. 2, 193\u2013218 (1985)","journal-title":"J. Classif."},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Dave, A., Jindal, A., Li, L.E., Xin, R., Gonzalez, J., Zaharia, M.: Graphframes: an integrated api for mixing graph and relational queries. In: Proceedings of the fourth international workshop on graph data management experiences and systems, pp. 1\u20138 (2016)","DOI":"10.1145\/2960414.2960416"},{"key":"14_CR22","unstructured":"The Constrained K-Means algorithm code is available on GitHub. https:\/\/github.com\/huynqcharles\/spark-constrained-kmeans"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Hung, P.D.: Detection of central sleep apnea based on a single-lead ECG. In: Proceedings of the 5th International Conference on Bioinformatics Research and Applications (ICBRA \u201818), pp. 78\u201383. Association for Computing Machinery, New York, NY, USA (2018)","DOI":"10.1145\/3309129.3309132"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Hung, P.D.: Central sleep apnea detection using an accelerometer. In: Proceedings of the 1st International Conference on Control and Computer Vision (ICCCV \u201818), pp. 106\u2013111. Association for Computing Machinery, New York, NY, USA (2018)","DOI":"10.1145\/3232651.3232660"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Hai, P.N., Hieu, H.T., Hung, P.D.: An empirical examination on forecasting VN30 short-term uptrend stocks using LSTM along with the ichimoku cloud trading strategy. In: Sharma, H., Shrivastava, V., Kumari Bharti, K., Wang, L. (eds.) Communication and Intelligent Systems. Lecture Notes in Networks and Systems, vol. 461. Springer, Singapore (2022)","DOI":"10.1007\/978-981-19-2130-8_19"},{"key":"14_CR26","doi-asserted-by":"crossref","unstructured":"Dat, D.Q., Hung, P.D.: Improvement for time series clustering with the deep learning approach. In: Luo, Y. (eds.) Cooperative Design, Visualization, and Engineering. CDVE 2021. Lecture Notes in Computer Science(), vol. 12983. Springer, Cham (2021)","DOI":"10.1007\/978-3-030-88207-5_8"}],"container-title":["Lecture Notes in Computer Science","Computational Science and Its Applications \u2013 ICCSA 2025 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-97576-9_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T06:27:31Z","timestamp":1777271251000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-97576-9_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,28]]},"ISBN":["9783031975752","9783031975769"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-97576-9_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,28]]},"assertion":[{"value":"28 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCSA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Science and Its Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Istanbul","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"T\u00fcrkiye","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 June 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccsa2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iccsa.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}