{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T14:32:53Z","timestamp":1776436373586,"version":"3.51.2"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s10994-024-06540-z","type":"journal-article","created":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T15:02:01Z","timestamp":1713798121000},"page":"5891-5906","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Coresets for kernel clustering"],"prefix":"10.1007","volume":"113","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7972-827X","authenticated-orcid":false,"given":"Shaofeng H. -C.","family":"Jiang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Krauthgamer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianing","family":"Lou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yubo","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,4,22]]},"reference":[{"key":"6540_CR1","unstructured":"Arthur, D., & Vassilvitskii, S. (2007). K-means++: the advantages of careful seeding. SODA (pp. 1027-1035). SIAM."},{"key":"6540_CR2","unstructured":"Balcan, M., Ehrlich, S., & Liang, Y. (2013). Distributed k-means and k-median clustering on general communication topologies. NIPS (pp. 1995-2003)."},{"issue":"4","key":"6540_CR3","doi-asserted-by":"publisher","first-page":"92","DOI":"10.3390\/a13040092","volume":"13","author":"A Barger","year":"2020","unstructured":"Barger, A., & Feldman, D. (2020). Deterministic coresets for k-means of big sparse data. Algorithms, 13(4), 92.","journal-title":"Algorithms"},{"key":"6540_CR4","doi-asserted-by":"crossref","unstructured":"Becchetti, L., Bury, M., Cohen-Addad, V., Grandoni, F., & Schwiegelshohn, C. (2019). Oblivious dimension reduction for k-means: Beyond subspaces and the Johnson-Lindenstrauss lemma. STOC (pp. 1039-1050). ACM.","DOI":"10.1145\/3313276.3316318"},{"key":"6540_CR5","doi-asserted-by":"crossref","unstructured":"Braverman, V., Jiang, S. H., Krauthgamer, R., & Wu, X. (2021). Coresets for clustering in excluded-minor graphs and beyond. SODA (pp. 2679-2696). SIAM.","DOI":"10.1137\/1.9781611976465.159"},{"key":"6540_CR6","unstructured":"Chan, T. -H. H., Guerquin, A., & Sozio, M. (2018). Twitter data set. Retrieved from https:\/\/github.com\/fe6Bc5R4JvLkFkSeExHM\/k-center"},{"key":"6540_CR7","unstructured":"Chen, D., & Phillips, J. M. (2017). Relative error embeddings of the gaussian kernel distance. ALT (Vol. 76, pp. 560-576). PMLR."},{"key":"6540_CR8","doi-asserted-by":"crossref","unstructured":"Chitta, R., Jin, R., Havens, T. C., & Jain, A. K. (2011). Approximate kernel kmeans: Solution to large scale kernel clustering. KDD (pp. 895-903). ACM.","DOI":"10.1145\/2020408.2020558"},{"key":"6540_CR9","doi-asserted-by":"crossref","unstructured":"Chitta, R., Jin, R., & Jain, A. K. (2012). Efficient kernel clustering using random Fourier features. ICDM (pp. 161-170). IEEE Computer Society.","DOI":"10.1109\/ICDM.2012.61"},{"key":"6540_CR10","doi-asserted-by":"crossref","unstructured":"Cohen-Addad, V., Saulpic, D., & Schwiegelshohn, C. (2021). A new coreset framework for clustering. STOC (pp. 169-182). ACM.","DOI":"10.1145\/3406325.3451022"},{"issue":"1\u20132","key":"6540_CR11","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1002\/rsa.20157","volume":"30","author":"A Czumaj","year":"2007","unstructured":"Czumaj, A., & Sohler, C. (2007). Sublinear-time approximation algorithms for clustering via random sampling. Random Structures and Algorithms, 30(1\u20132), 226\u2013256. https:\/\/doi.org\/10.1002\/rsa.20157","journal-title":"Random Structures and Algorithms"},{"key":"6540_CR12","doi-asserted-by":"crossref","unstructured":"Dhillon, I. S., Guan, Y., & Kulis, B. (2004). Kernel k-means: Spectral clustering and normalized cuts. KDD (pp. 551-556). ACM.","DOI":"10.1145\/1014052.1014118"},{"key":"6540_CR13","doi-asserted-by":"crossref","unstructured":"Ding, C.H.Q., He, X., & Simon, H.D. (2005). Nonnegative lagrangian relaxation of K-means and spectral clustering. ECML (Vol. 3720, pp. 530-538). Springer.","DOI":"10.1007\/11564096_51"},{"key":"6540_CR14","unstructured":"Dua, D., & Graff, C. (2017). UCI machine learning repository, adult dataset. Retrieved from https:\/\/archive.ics.uci.edu\/ml\/datasets\/adult"},{"key":"6540_CR15","doi-asserted-by":"crossref","unstructured":"Feldman, D., & Langberg, M. (2011). A unified framework for approximating and clustering data. STOC (pp. 569-578). ACM.","DOI":"10.1145\/1993636.1993712"},{"key":"6540_CR16","doi-asserted-by":"crossref","unstructured":"Feldman, D., Monemizadeh, M., & Sohler, C. (2007). A PTAS for k-means clustering based on weak coresets. SoCG (pp. 11-18). ACM.","DOI":"10.1145\/1247069.1247072"},{"issue":"3","key":"6540_CR17","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1137\/18M1209854","volume":"49","author":"D Feldman","year":"2020","unstructured":"Feldman, D., Schmidt, M., & Sohler, C. (2020). Turning big data into tiny data: Constant-size coresets for k-means, PCA, and projective clustering. SIAM Journal on Computing, 49(3), 601\u2013657.","journal-title":"SIAM Journal on Computing"},{"key":"6540_CR18","unstructured":"Feng, Z., Kacham, P., & Woodruff, D. P. (2021). Dimensionality reduction for the sum-of-distances metric. ICML (Vol. 139, pp. 3220-3229). PMLR."},{"key":"6540_CR19","unstructured":"Ghojogh, B., Ghodsi, A., Karray, F., & Crowley, M. (2021). Reproducing kernel Hilbert space, Mercer\u2019s theorem, eigenfunctions, Nystr\u00f6m method, and use of kernels in machine learning: Tutorial and survey. CoRR, abs\/2106.08443 ."},{"issue":"3","key":"6540_CR20","doi-asserted-by":"publisher","first-page":"780","DOI":"10.1109\/TNN.2002.1000150","volume":"13","author":"MA Girolami","year":"2002","unstructured":"Girolami, M. A. (2002). Mercer kernel-based clustering in feature space. IEEE Transactions on Neural Networks, 13(3), 780\u2013784.","journal-title":"IEEE Transactions on Neural Networks"},{"key":"6540_CR21","doi-asserted-by":"crossref","unstructured":"Har-Peled, S., & Mazumdar, S. (2004). On coresets for k-means and k-median clustering. STOC (pp. 291-300). ACM.","DOI":"10.1145\/1007352.1007400"},{"key":"6540_CR22","unstructured":"Henzinger, M., & Kale, S. (2020). Fully-dynamic coresets. ESA (Vol. 173, pp.57:1-57:21). Schloss Dagstuhl-Leibniz-Zentrum f\u00fcr Informatik."},{"key":"6540_CR23","doi-asserted-by":"crossref","unstructured":"Huang, L., & Vishnoi, N. K. (2020). Coresets for clustering in Euclidean spaces: Importance sampling is nearly optimal. STOC (pp. 1416-1429). ACM.","DOI":"10.1145\/3357713.3384296"},{"key":"6540_CR24","doi-asserted-by":"crossref","unstructured":"Joshi, S. C., Kommaraju, R. V., Phillips, J. M., & Venkatasubramanian, S. (2011). Comparing distributions and shapes using the kernel distance. SoCG (pp. 47-56). ACM.","DOI":"10.1145\/1998196.1998204"},{"issue":"4","key":"6540_CR25","doi-asserted-by":"publisher","first-page":"607","DOI":"10.1016\/j.patcog.2004.09.006","volume":"38","author":"D Kim","year":"2005","unstructured":"Kim, D., Lee, K. Y., Lee, D., & Lee, K. H. (2005). Evaluation of the performance of clustering algorithms in kernel-induced feature space. Pattern Recognition, 38(4), 607\u2013611.","journal-title":"Pattern Recognition"},{"key":"6540_CR26","doi-asserted-by":"crossref","unstructured":"Kumar, A., Sabharwal, Y., & Sen, S. (2004). A simple linear time (1+\u2019.)- approximation algorithm for k-means clustering in any dimensions. FOCS (pp. 454-462). IEEE Computer Society.","DOI":"10.1109\/FOCS.2004.7"},{"key":"6540_CR27","doi-asserted-by":"crossref","unstructured":"Langberg, M., & Schulman, L. J. (2010). Universal epsilon-approximators for integrals. SODA (pp. 598-607). SIAM.","DOI":"10.1137\/1.9781611973075.50"},{"key":"6540_CR28","unstructured":"Meek, C., Thiesson, B., & Heckerman, D. (1990). UCI machine learning repository, census1990 dataset. Retrieved from http:\/\/archive.ics.uci.edu\/ml\/datasets\/US+Census+Data+(1990)"},{"key":"6540_CR29","unstructured":"Moro, S., Cortez, P., & Rita, P. (2014). UCI machine learning repository, bank dataset. Retrieved from https:\/\/archive.ics.uci.edu\/ml\/datasets\/Bank+Marketing"},{"key":"6540_CR30","unstructured":"Musco, C., & Musco, C. (2017). Recursive sampling for the Nystrom method. NIPS (pp. 3833-3845)."},{"key":"6540_CR31","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., & Duchesnay, E. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research, 12, 2825\u20132830.","journal-title":"Journal of Machine Learning Research"},{"key":"6540_CR32","unstructured":"Rahimi, A., & Recht, B. (2007). Random features for large-scale kernel machines. NIPS (pp. 1177-1184). Curran Associates, Inc."},{"key":"6540_CR33","doi-asserted-by":"crossref","unstructured":"Ren, Y., & Du, Y. (2020). Uniform and non-uniform sampling methods for sub-linear time k-means clustering. ICPR (pp. 7775-7781). IEEE.","DOI":"10.1109\/ICPR48806.2021.9413190"},{"key":"6540_CR34","unstructured":"Schmidt, M. (2014). Coresets and streaming algorithms for the k-means problem and related clustering objectives (Unpublished doctoral dissertation)."},{"issue":"5","key":"6540_CR35","doi-asserted-by":"publisher","first-page":"1299","DOI":"10.1162\/089976698300017467","volume":"10","author":"B Sch\u00f6lkopf","year":"1998","unstructured":"Sch\u00f6lkopf, B., Smola, A. J., & M\u00fcller, K. (1998). Nonlinear component analysis as a kernel eigenvalue problem. Neural Computation, 10(5), 1299\u20131319.","journal-title":"Neural Computation"},{"key":"6540_CR36","doi-asserted-by":"crossref","unstructured":"Sohler, C., & Woodruff, D. P. (2018). Strong coresets for k-median and subspace approximation: Goodbye dimension. FOCS (pp. 802-813). IEEE Computer Society.","DOI":"10.1109\/FOCS.2018.00081"},{"issue":"12","key":"6540_CR37","first-page":"1","volume":"20","author":"S Wang","year":"2019","unstructured":"Wang, S., Gittens, A., & Mahoney, M. W. (2019). Scalable kernel k-means clustering with Nystrom approximation: Relative-error bounds. Journal of Machine Learning Research, 20(12), 1\u201349.","journal-title":"Journal of Machine Learning Research"},{"key":"6540_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, R., & Rudnicky, A. I. (2002). A large scale clustering scheme for kernel k-means. ICPR (4) (pp. 289-292). IEEE Computer Society.","DOI":"10.1109\/ICPR.2002.1047453"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06540-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06540-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06540-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:08:14Z","timestamp":1764266894000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06540-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":38,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["6540"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06540-z","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4,22]]},"assertion":[{"value":"16 July 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 April 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors do not have any Conflict of interest, except for standard cases such as close and recent collaborators, doctoral advisors etc.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}