{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T16:28:48Z","timestamp":1775838528997,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1007\/s10618-020-00678-9","type":"journal-article","created":{"date-parts":[[2020,3,10]],"date-time":"2020-03-10T15:13:34Z","timestamp":1583853214000},"page":"776-811","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":59,"title":["An efficient K-means clustering algorithm for tall data"],"prefix":"10.1007","volume":"34","author":[{"given":"Marco","family":"Cap\u00f3","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aritz","family":"P\u00e9rez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jose A.","family":"Lozano","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,7,15]]},"reference":[{"issue":"2","key":"678_CR1","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1007\/s10994-009-5103-0","volume":"75","author":"D Aloise","year":"2009","unstructured":"Aloise D, Deshpande A, Hansen P, Popat P (2009) NP-hardness of Euclidean sum-of-squares clustering. Mach Learn 75(2):245\u2013248","journal-title":"Mach Learn"},{"key":"678_CR2","unstructured":"Arthur D, Vassilvitskii S (2007) k-means++: The Advantages of Careful Seeding. In: Proceedings of the 18th annual ACM-SIAM symposium on discrete algorithms, pp 1027\u20131035"},{"key":"678_CR3","unstructured":"\u00c4yr\u00e4m\u00f6 S, K\u00e4rkk\u00e4inen T (2006) Introduction to Partitioning-Based Clustering Methods with a Robust Example. Reports of the Department of Mathematical Information Technology Series C, Software engineering and computational intelligence 1\/2006"},{"key":"678_CR4","unstructured":"Bachem O, Lucic M, Hassani H, Krause A (2016) Fast and Provably Good Seedings for K-means. In: Advances in neural information processing systems, pp 55\u201363"},{"key":"678_CR5","doi-asserted-by":"crossref","unstructured":"Bachem O, Lucic M, Krause A (2018) Scalable K-means Clustering via Lightweight Coresets. In: Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery and data mining, pp 1119\u20131127","DOI":"10.1145\/3219819.3219973"},{"issue":"7","key":"678_CR6","doi-asserted-by":"publisher","first-page":"622","DOI":"10.14778\/2180912.2180915","volume":"5","author":"B Bahmani","year":"2012","unstructured":"Bahmani B, Moseley B, Vattani A, Kumar R, Vassilvitskii S (2012) Scalable K-means++. Proc VLDB Endow 5(7):622\u2013633","journal-title":"Proc VLDB Endow"},{"key":"678_CR7","unstructured":"Balcan MFF, Ehrlich S, Liang Y (2013) Distributed K-means and K-median clustering on general topologies. In: Advances in neural information processing systems, pp 1995\u20132003"},{"key":"678_CR8","first-page":"71","volume":"25","author":"P Berkhin","year":"2006","unstructured":"Berkhin P et al (2006) A survey of clustering data mining techniques. Group Multidimens Data 25:71","journal-title":"Group Multidimens Data"},{"key":"678_CR9","unstructured":"Bottou L, Bengio Y (1995) Convergence Properties of the K-means Algorithms. In: Advances in neural information processing systems, pp 585\u2013592"},{"key":"678_CR10","unstructured":"Boutsidis C, Drineas P, Mahoney MW (2009) Unsupervised Feature Selection for the K-means Clustering Problem. In: Advances in neural information processing systems, pp 153\u2013161"},{"key":"678_CR11","unstructured":"Boutsidis C, Zouzias A, Drineas P (2010) Random Projections for K-means clustering. In: Advances in neural information processing systems, pp 298\u2013306"},{"key":"678_CR12","unstructured":"Bradley PS, Fayyad UM (1998) Refining initial points for K-means clustering. In: Proceedings of the 15th international conference on machine learning, vol\u00a098, pp 91\u201399"},{"key":"678_CR13","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.knosys.2016.06.031","volume":"117","author":"M Cap\u00f3","year":"2017","unstructured":"Cap\u00f3 M, P\u00e9rez A, Lozano JA (2017) An efficient approximation to the K-means clustering for massive data. Knowl-Based Syst 117:56\u201369","journal-title":"Knowl-Based Syst"},{"key":"678_CR14","doi-asserted-by":"crossref","unstructured":"Cohen MB, Elder S, Musco C, Musco C, Persu M (2015) Dimensionality reduction for K-means clustering and low rank approximation. In: Proceedings of the 47th annual ACM symposium on theory of computing. ACM, pp 163\u2013172","DOI":"10.1145\/2746539.2746569"},{"key":"678_CR15","unstructured":"Davidson I, Satyanarayana A (2003) Speeding up K-means clustering by bootstrap averaging. In: IEEE data mining workshop on clustering large data sets"},{"key":"678_CR16","doi-asserted-by":"crossref","unstructured":"Ding C, He X (2004) K-means Clustering via Principal Component Analysis. In: Proceedings of the 21st international conference on Machine learning. ACM, p\u00a029","DOI":"10.1145\/1015330.1015408"},{"key":"678_CR17","unstructured":"Ding Y, Zhao Y, Shen X, Musuvathi M, Mytkowicz T (2015) Yinyang k-means: a drop-in replacement of the classic K-means with consistent speedup. In: International conference on machine learning, pp 579\u2013587"},{"key":"678_CR18","unstructured":"Drake J, Hamerly G (2012) Accelerated K-means with adaptive distance bounds. In: 5th NIPS workshop on optimization for machine learning, pp 42\u201353"},{"key":"678_CR19","unstructured":"Elkan C (2003) Using the triangle inequality to accelerate K-means. In: Proceedings of the 20th international conference on machine learning, pp 147\u2013153"},{"key":"678_CR20","doi-asserted-by":"crossref","unstructured":"Feldman D, Monemizadeh M, Sohler C (2007) A PTAS for K-means clustering based on weak coresets. In: Proceedings of the 23rd annual symposium on computational geometry, pp 11\u201318","DOI":"10.1145\/1247069.1247072"},{"key":"678_CR21","first-page":"768","volume":"21","author":"EW Forgy","year":"1965","unstructured":"Forgy EW (1965) Cluster analysis of multivariate data: efficiency vs interpretability of classifications. Biometrics 21:768\u2013769","journal-title":"Biometrics"},{"key":"678_CR22","doi-asserted-by":"crossref","unstructured":"Hamerly G (2010) Making K-means even faster. In: Proceedings of the SIAM international conference on data mining, pp 130\u2013140","DOI":"10.1137\/1.9781611972801.12"},{"key":"678_CR23","doi-asserted-by":"crossref","unstructured":"Har-Peled S, Mazumdar S (2004) On Coresets for K-means and K-median Clustering. In: Proceedings of the 36th ACM symposium on theory of computing, pp 291\u2013300","DOI":"10.1145\/1007352.1007400"},{"issue":"8","key":"678_CR24","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1016\/j.patrec.2009.09.011","volume":"31","author":"AK Jain","year":"2010","unstructured":"Jain AK (2010) Data clustering: 50 years beyond K-means. Pattern Recogn Lett 31(8):651\u2013666","journal-title":"Pattern Recogn Lett"},{"key":"678_CR25","volume-title":"Algorithms for Clustering Data","author":"AK Jain","year":"1988","unstructured":"Jain AK, Dubes RC (1988) Algorithms for Clustering Data. Prentice-Hall, Inc, Upper Saddle River"},{"issue":"3","key":"678_CR26","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1145\/331499.331504","volume":"31","author":"AK Jain","year":"1999","unstructured":"Jain AK, Murty MN, Flynn PJ (1999) Data clustering: a review. ACM Comput Surv 31(3):264\u2013323","journal-title":"ACM Comput Surv"},{"key":"678_CR27","unstructured":"Jordan M (2013) Committee on the analysis of massive data, committee on applied and theoretical statistics, board on mathematical sciences and their applications, division on engineering and physical sciences, council, nr. frontiers in massive data analysis. Front Mass Data Anal"},{"issue":"7","key":"678_CR28","doi-asserted-by":"publisher","first-page":"881","DOI":"10.1109\/TPAMI.2002.1017616","volume":"24","author":"T Kanungo","year":"2002","unstructured":"Kanungo T, Mount DM, Netanyahu NS, Piatko CD, Silverman R, Wu AY (2002a) An efficient K-means clustering algorithm: analysis and implementation. IEEE Trans Pattern Anal Mach Intell 24(7):881\u2013892","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"678_CR29","doi-asserted-by":"crossref","unstructured":"Kanungo T, Mount DM, Netanyahu NS, Piatko CD, Silverman R, Wu AY (2002b) A local search approximation algorithm for K-means clustering. In: Proceedings of the 18th annual symposium on computational geometry, pp 10\u201318","DOI":"10.1145\/513400.513402"},{"key":"678_CR30","volume-title":"Clustering by means of medoids","author":"L Kaufman","year":"1987","unstructured":"Kaufman L, Rousseeuw P (1987) Clustering by means of medoids. North-Holland, Amsterdam"},{"key":"678_CR31","doi-asserted-by":"crossref","unstructured":"Kumar A, Sabharwal Y, Sen S (2004) A simple linear time (1 + $$\\varepsilon $$)-approximation algorithm for K-means clustering in any dimensions. In: Proceedings of the 45th annual IEEE symposium on foundations of computer science, pp 454\u2013462","DOI":"10.1109\/FOCS.2004.7"},{"issue":"2","key":"678_CR32","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1109\/TIT.1982.1056489","volume":"28","author":"S Lloyd","year":"1982","unstructured":"Lloyd S (1982) Least squares quantization in PCM. IEEE Trans Inf Theory 28(2):129\u2013137","journal-title":"IEEE Trans Inf Theory"},{"key":"678_CR33","unstructured":"Lucic M, Bachem O, Krause A (2016) Strong coresets for hard and soft Bregman clustering with applications to exponential family mixtures. In: Artificial intelligence and statistics, pp 1\u20139"},{"key":"678_CR34","doi-asserted-by":"crossref","unstructured":"Mahajan M, Nimbhorkar P, Varadarajan K (2009) The Planar k-means problem is NP-hard. In: International workshop on algorithms and computation, pp 274\u2013285","DOI":"10.1007\/978-3-642-00202-1_24"},{"key":"678_CR35","doi-asserted-by":"crossref","unstructured":"Manning CD, Raghavan P, Sch\u00fctze H (2008) Evaluation in information retrieval. In: Introduction to information retrieval pp 151\u2013175","DOI":"10.1017\/CBO9780511809071"},{"issue":"1","key":"678_CR36","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/s004540010019","volume":"24","author":"J Matou\u0161ek","year":"2000","unstructured":"Matou\u0161ek J (2000) On approximate geometric K-clustering. Discret Comput Geom 24(1):61\u201384","journal-title":"Discret Comput Geom"},{"key":"678_CR37","unstructured":"Newling J, Fleuret F (2016) Nested mini-batch K-means. In: Advances in neural information processing systems, pp 1352\u20131360"},{"issue":"10","key":"678_CR38","doi-asserted-by":"publisher","first-page":"1027","DOI":"10.1016\/S0167-8655(99)00069-0","volume":"20","author":"JM Pe\u00f1a","year":"1999","unstructured":"Pe\u00f1a JM, Lozano JA, Larra\u00f1aga P (1999) An empirical comparison of four initialization methods for the K-means algorithm. Pattern Recogn Lett 20(10):1027\u20131040","journal-title":"Pattern Recogn Lett"},{"issue":"8","key":"678_CR39","doi-asserted-by":"publisher","first-page":"965","DOI":"10.1016\/j.patrec.2007.01.001","volume":"28","author":"SJ Redmond","year":"2007","unstructured":"Redmond SJ, Heneghan C (2007) A method for initialising the K-means clustering algorithm using KD-trees. Pattern Recogn Lett 28(8):965\u2013973","journal-title":"Pattern Recogn Lett"},{"key":"678_CR40","doi-asserted-by":"crossref","unstructured":"Sculley D (2010) Web-scale K-means clustering. In: Proceedings of the 19th international conference on world wide web, pp 1177\u20131178","DOI":"10.1145\/1772690.1772862"},{"key":"678_CR41","doi-asserted-by":"crossref","unstructured":"Shen X, Liu W, Tsang I, Shen F, Sun QS (2017) Compressed K-means for large-scale clustering. In: 31st AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v31i1.10852"},{"issue":"1","key":"678_CR42","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s00357-007-0003-0","volume":"24","author":"D Steinley","year":"2007","unstructured":"Steinley D, Brusco MJ (2007) Initializing K-means batch clustering: a critical evaluation of several techniques. J Classif 24(1):99\u2013121","journal-title":"J Classif"},{"issue":"4","key":"678_CR43","doi-asserted-by":"publisher","first-page":"596","DOI":"10.1007\/s00454-011-9340-1","volume":"45","author":"A Vattani","year":"2011","unstructured":"Vattani A (2011) K-means requires exponentially many iterations even in the plane. Discret Comput Geom 45(4):596\u2013616","journal-title":"Discret Comput Geom"},{"issue":"1","key":"678_CR44","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10115-007-0114-2","volume":"14","author":"X Wu","year":"2008","unstructured":"Wu X, Kumar V, Quinlan JR, Ghosh J, Yang Q, Motoda H, McLachlan GJ, Ng A, Liu B, Philip SY et al (2008) Top 10 algorithms in data mining. Knowl Inf Syst 14(1):1\u201337","journal-title":"Knowl Inf Syst"},{"key":"678_CR45","doi-asserted-by":"crossref","unstructured":"Zhao W, Ma H, He Q (2009) Parallel K-means clustering based on MapReduce. In: IEEE international conference on cloud computing, pp 674\u2013679","DOI":"10.1007\/978-3-642-10665-1_71"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-020-00678-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10618-020-00678-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-020-00678-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,18]],"date-time":"2022-10-18T06:02:20Z","timestamp":1666072940000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10618-020-00678-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":45,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,5]]}},"alternative-id":["678"],"URL":"https:\/\/doi.org\/10.1007\/s10618-020-00678-9","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,5]]},"assertion":[{"value":"20 January 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 February 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 July 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}