{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T04:22:48Z","timestamp":1777695768230,"version":"3.51.4"},"reference-count":24,"publisher":"SAGE Publications","issue":"4","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IDA"],"published-print":{"date-parts":[[2019,9,26]]},"DOI":"10.3233\/ida-173795","type":"journal-article","created":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T14:01:56Z","timestamp":1569938516000},"page":"825-838","source":"Crossref","is-referenced-by-count":4,"title":["Scalable k-means for large-scale clustering"],"prefix":"10.1177","volume":"23","author":[{"given":"Yuewei","family":"Ming","sequence":"first","affiliation":[{"name":"College of Computer, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"En","family":"Zhu","sequence":"additional","affiliation":[{"name":"College of Computer, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mao","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Computer, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiang","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Computer, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinwang","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Computer, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianping","family":"Yin","sequence":"additional","affiliation":[{"name":"Dongguan University of Technology, Dongguan, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"179","reference":[{"key":"10.3233\/IDA-173795_ref1","first-page":"1","article-title":"Geometric approximation via coresets","volume":"52","author":"Agarwal","year":"2005","journal-title":"Combinatorial and Computational Geometry"},{"issue":"7","key":"10.3233\/IDA-173795_ref2","doi-asserted-by":"crossref","first-page":"622","DOI":"10.14778\/2180912.2180915","article-title":"Scalable k-means++","volume":"5","author":"Bahmani","year":"2012","journal-title":"Proc. VLDB Endow"},{"key":"10.3233\/IDA-173795_ref3","doi-asserted-by":"crossref","unstructured":"J. Bhimani, M. Leeser and N. Mi, Accelerating k-means clustering with parallel implementations and gpu computing, in: 2015 IEEE High Performance Extreme Computing Conference (HPEC), 2015, pp. 1\u20136.","DOI":"10.1109\/HPEC.2015.7322467"},{"key":"10.3233\/IDA-173795_ref4","unstructured":"T. Bottesch, T. B\u00fchler and M. K\u00e4chele, Speeding up k-means by approximating euclidean distances via block vectors, in: International Conference on Machine Learning, 2016, pp. 2578\u20132586."},{"key":"10.3233\/IDA-173795_ref5","unstructured":"L. Bottou and Y. Bengio, Convergence properties of the k-means algorithms, in: Advances in Neural Information Processing Systems, 1995, pp. 585\u2013592."},{"key":"10.3233\/IDA-173795_ref7","doi-asserted-by":"crossref","unstructured":"S. Chawla and A. Gionis, k-means: A unified approach to clustering and outlier detection, in: Proceedings of the 2013 SIAM International Conference on Data Mining, SIAM, 2013, pp. 189\u2013197.","DOI":"10.1137\/1.9781611972832.21"},{"key":"10.3233\/IDA-173795_ref8","doi-asserted-by":"crossref","unstructured":"W. Dai, A. Kumar, J. Wei, Q. Ho, G. Gibson and E.P. Xing, High-performance distributed ml at scale through parameter server consistency models, in: Twenty-Ninth AAAI Conference on Artificial Intelligence, 2015.","DOI":"10.1609\/aaai.v29i1.9195"},{"key":"10.3233\/IDA-173795_ref9","unstructured":"Y. Ding, Y. Zhao, X. Shen, M. Musuvathi and T. Mytkowicz, Yinyang k-means: A drop-in replacement of the classic k-means with consistent speedup, in: Proceedings of the 32nd International Conference on Machine Learning, 2015, pp.\u00a0579\u2013587."},{"key":"10.3233\/IDA-173795_ref10","unstructured":"C. Elkan, Using the triangle inequality to accelerate k-means, in: Proceedings of the 20th International Conference on Machine Learning, 2003, pp. 147\u2013153."},{"key":"10.3233\/IDA-173795_ref12","doi-asserted-by":"crossref","unstructured":"G. Hamerly, Making k-means even faster, in: Proceedings of the 2010 SIAM International Conference on Data Mining, SIAM, 2010, pp. 130\u2013140.","DOI":"10.1137\/1.9781611972801.12"},{"key":"10.3233\/IDA-173795_ref13","unstructured":"Q. Ho, J. Cipar, H. Cui, S. Lee, J.K. Kim, P.B. Gibbons, G.A. Gibson, G. Ganger and E.P. Xing, More effective distributed ml via a stale synchronous parallel parameter server, in: Advances in Neural Information Processing Systems, 2013, pp.\u00a01223\u20131231."},{"issue":"7","key":"10.3233\/IDA-173795_ref14","doi-asserted-by":"crossref","first-page":"881","DOI":"10.1109\/TPAMI.2002.1017616","article-title":"An efficient k-means clustering algorithm: Analysis and implementation","volume":"24","author":"Kanungo","year":"2002","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.3233\/IDA-173795_ref16","doi-asserted-by":"crossref","unstructured":"Q. Li, P. Wang, W. Wang, H. Hu, Z. Li and J. Li, An efficient k-means clustering algorithm on mapreduce, in: Database Systems for Advanced Applications, Cham, Springer International Publishing, 2014, pp. 357\u2013371.","DOI":"10.1007\/978-3-319-05810-8_24"},{"key":"10.3233\/IDA-173795_ref17","unstructured":"J. Newling and F. Fleuret, Fast k-means with accurate bounds, in: International Conference on Machine Learning, 2016, pp. 936\u2013944."},{"key":"10.3233\/IDA-173795_ref18","unstructured":"J. Newling and F. Fleuret, Nested mini-batch k-means, in: Advances in Neural Information Processing Systems, 2016, pp. 1352\u20131360."},{"key":"10.3233\/IDA-173795_ref19","doi-asserted-by":"crossref","unstructured":"D. Pelleg and A. Moore, Accelerating exact k-means algorithms with geometric reasoning, in: Proceedings of the Fifth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, ACM, 1999, pp. 277\u2013281.","DOI":"10.1145\/312129.312248"},{"key":"10.3233\/IDA-173795_ref20","doi-asserted-by":"crossref","unstructured":"M. Perd\u2019och, O. Chum and J. Matas, Efficient representation of local geometry for large scale object retrieval, in: IEEE Conference on Computer Vision and Pattern Recognition, IEEE, 2009, pp. 9\u201316.","DOI":"10.1109\/CVPR.2009.5206529"},{"key":"10.3233\/IDA-173795_ref21","doi-asserted-by":"crossref","unstructured":"J. Philbin, O. Chum, M. Isard, J. Sivic and A. Zisserman, Object retrieval with large vocabularies and fast spatial matching, in: IEEE Conference on Computer Vision and Pattern Recognition, IEEE, 2007, pp. 1\u20138.","DOI":"10.1109\/CVPR.2007.383172"},{"key":"10.3233\/IDA-173795_ref22","doi-asserted-by":"crossref","unstructured":"D. Sculley, Web-scale k-means clustering, in: Proceedings of the 19th International Conference on World Wide Web, ACM, 2010, pp. 1177\u20131178.","DOI":"10.1145\/1772690.1772862"},{"issue":"1","key":"10.3233\/IDA-173795_ref24","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40537-017-0087-2","article-title":"Clustering large datasets using k-means modified inter and intra clustering (km-i2c) in hadoop","volume":"4","author":"Sreedhar","year":"2017","journal-title":"Journal of Big Data"},{"key":"10.3233\/IDA-173795_ref25","unstructured":"M. Steinbach, G. Karypis, V. Kumar et al., A comparison of document clustering techniques, in: KDD Workshop on Text Mining, Boston, Vol. 400, 2000, pp. 525\u2013526."},{"issue":"11","key":"10.3233\/IDA-173795_ref26","doi-asserted-by":"crossref","first-page":"2346","DOI":"10.1109\/TPAMI.2015.2409868","article-title":"Visual place recognition with repetitive structures","volume":"37","author":"Torii","year":"2015","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"2","key":"10.3233\/IDA-173795_ref27","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1109\/TBDATA.2015.2472014","article-title":"Petuum: A new platform for distributed machine learning on big data","volume":"1","author":"Xing","year":"2015","journal-title":"IEEE Transactions on Big Data"},{"key":"10.3233\/IDA-173795_ref28","doi-asserted-by":"crossref","unstructured":"J. Zhang, G. Wu, X. Hu, S. Li and S. Hao, A parallel k-means clustering algorithm with mpi, in: 2011 Fourth International Symposium on Parallel Architectures, Algorithms and Programming, 2011, pp. 60\u201364.","DOI":"10.1109\/PAAP.2011.17"}],"container-title":["Intelligent Data Analysis"],"original-title":[],"link":[{"URL":"https:\/\/content.iospress.com\/download?id=10.3233\/IDA-173795","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T09:18:23Z","timestamp":1777454303000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/full\/10.3233\/IDA-173795"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,9,26]]},"references-count":24,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.3233\/ida-173795","relation":{},"ISSN":["1088-467X","1571-4128"],"issn-type":[{"value":"1088-467X","type":"print"},{"value":"1571-4128","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,9,26]]}}}