{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:40:05Z","timestamp":1755877205577,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T00:00:00Z","timestamp":1740700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["DBI-2019674"],"award-info":[{"award-number":["DBI-2019674"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"European High-Performance Computing Joint Under- taking","award":["01175702"],"award-info":[{"award-number":["01175702"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,28]]},"DOI":"10.1145\/3710848.3710887","type":"proceedings-article","created":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T06:20:57Z","timestamp":1740723657000},"page":"426-440","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Popcorn: Accelerating Kernel K-means on GPUs through Sparse Linear Algebra"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1375-5720","authenticated-orcid":false,"given":"Julian","family":"Bellavita","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, NY, USA and University of Trento"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9998-4748","authenticated-orcid":false,"given":"Thomas","family":"Pasquali","sequence":"additional","affiliation":[{"name":"University of Trento, Trento, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7812-3161","authenticated-orcid":false,"given":"Laura","family":"Del Rio Martin","sequence":"additional","affiliation":[{"name":"University of Trento, Trento, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5676-9228","authenticated-orcid":false,"given":"Flavio","family":"Vella","sequence":"additional","affiliation":[{"name":"University of Trento, Trento, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8925-3239","authenticated-orcid":false,"given":"Giulia","family":"Guidi","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA and University of Trento"}]}],"member":"320","published-online":{"date-parts":[[2025,2,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.60084\/ljes.v2i1.181"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.15678\/EBER.2021.090411"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the BICOB","author":"Melman Paul","year":"2018","unstructured":"Paul Melman and Usman W Roshan. K-means-based feature learning for protein sequence classification. Proceedings of the BICOB, Las Vegas, NV, USA, pages 19--21, 2018."},{"key":"e_1_3_2_1_4_1","volume-title":"Randomly pivoted cholesky: Practical approximation of a kernel matrix with few entry evaluations. arXiv preprint arXiv:2207.06503","author":"Chen Yifan","year":"2022","unstructured":"Yifan Chen, Ethan N Epperly, Joel A Tropp, and Robert J Webber. Randomly pivoted cholesky: Practical approximation of a kernel matrix with few entry evaluations. arXiv preprint arXiv:2207.06503, 2022."},{"key":"e_1_3_2_1_5_1","first-page":"992","volume-title":"International Conference on Machine Learning","author":"Blalock Davis","year":"2021","unstructured":"Davis Blalock and John Guttag. Multiplying matrices without multiplying. In International Conference on Machine Learning, pages 992--1004. PMLR, 2021."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1014052.1014118"},{"key":"e_1_3_2_1_7_1","volume-title":"Advances in Neural Information Processing Systems","author":"G\u00f6nen Mehmet","year":"2014","unstructured":"Mehmet G\u00f6nen and Adam A Margolin. Localized data fusion for kernel k-means clustering with application to cancer biology. In Z. Ghahramani, M. Welling, C. Cortes, N. Lawrence, and K.Q. Weinberger, editors, Advances in Neural Information Processing Systems, volume 27. Curran Associates, Inc., 2014."},{"key":"e_1_3_2_1_8_1","first-page":"012003","volume-title":"Journal of Physics: Conference Series","volume":"947","author":"Alamsyah MARTHA","unstructured":"MARTHA Alamsyah, ZUMROTUN Nafisah, E Prayitno, AM Afida, and EM Imah. The classification of diabetes mellitus using kernel k-means. In Journal of Physics: Conference Series, volume 947, page 012003. IOP Publishing, 2018."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.20965\/jaciii.2020.p0073"},{"key":"e_1_3_2_1_10_1","unstructured":"Sajadin Sembiring. An Application of Predicting Student Performance Using Kernel K-Means and Smooth Support Vector Machine. PhD thesis UMP 2012."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDSIS61070.2024.10594566"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2016.2550666"},{"key":"e_1_3_2_1_13_1","first-page":"1","volume-title":"2018 6th International Conference on Control Engineering & Information Technology (CEIT)","author":"Ebubekir","year":"2018","unstructured":"Ebubekir BUBER and Banu DIRI. Performance analysis and cpu vs gpu comparison for deep learning. In 2018 6th International Conference on Control Engineering & Information Technology (CEIT), pages 1--6, 2018."},{"key":"e_1_3_2_1_14_1","first-page":"47","volume-title":"SC '04: Proceedings of the 2004 ACM\/IEEE Conference on Supercomputing","author":"Fan Zhe","year":"2004","unstructured":"Zhe Fan, Feng Qiu, A. Kaufman, and S. Yoakum-Stover. Gpu cluster for high performance computing. In SC '04: Proceedings of the 2004 ACM\/IEEE Conference on Supercomputing, pages 47--47, 2004."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2014.6927483"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404426"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-018-2405-7"},{"volume-title":"Raft contains fundamental widely-used algorithms and primitives for data science, graph and machine learning","year":"2022","key":"e_1_3_2_1_18_1","unstructured":"Rapidsai. Rapidsai\/raft: Raft contains fundamental widely-used algorithms and primitives for data science, graph and machine learning., 2022."},{"key":"e_1_3_2_1_19_1","volume-title":"src-d\/kmcuda: 6.0.0-1","author":"Markovtsev Vadim","year":"2017","unstructured":"Vadim Markovtsev and M\u00e1ximo Cuadros. src-d\/kmcuda: 6.0.0-1, 2017."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1365490.1365500"},{"key":"e_1_3_2_1_21_1","first-page":"209","volume-title":"Automatic Tuning of CUDA Execution Parameters for Stencil Processing","author":"Sato Katsuto","year":"2010","unstructured":"Katsuto Sato, Hiroyuki Takizawa, Kazuhiko Komatsu, and Hiroaki Kobayashi. Automatic Tuning of CUDA Execution Parameters for Stencil Processing, pages 209--228. Springer New York, New York, NY, 2010."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCSim.2011.5999886"},{"key":"e_1_3_2_1_23_1","volume-title":"cuSPARSE","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. cuSPARSE, 2024. https:\/\/docs.nvidia.com\/cuda\/cusparse."},{"key":"e_1_3_2_1_24_1","volume-title":"cuBLAS","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. cuBLAS, 2024. https:\/\/docs.nvidia.com\/cuda\/cublas."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056489"},{"key":"e_1_3_2_1_26_1","first-page":"1027","volume-title":"Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA '07","author":"Arthur David","year":"2007","unstructured":"David Arthur and Sergei Vassilvitskii. k-means++: the advantages of careful seeding. In Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA '07, page 1027--1035, USA, 2007. Society for Industrial and Applied Mathematics."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809682"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-385963-1.00026-5"},{"key":"e_1_3_2_1_29_1","volume-title":"Libsvm: A library for support vector machines. ACM transactions on intelligent systems and technology (TIST), 2(3):1--27","author":"Chang Chih-Chung","year":"2011","unstructured":"Chih-Chung Chang and Chih-Jen Lin. Libsvm: A library for support vector machines. ACM transactions on intelligent systems and technology (TIST), 2(3):1--27, 2011."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/GlobalSIP.2013.6737057"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2002.1047453"},{"key":"e_1_3_2_1_32_1","volume-title":"Retrieved","author":"Chen Mo","year":"2024","unstructured":"Mo Chen. Pattern recognition and machine learning toolbox. https:\/\/github.com\/PRML\/PRMLT, 2024. Retrieved July 30, 2024."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_34_1","volume-title":"NVIDIA Nsight Compute","author":"NVIDIA Corporation","year":"2024","unstructured":"NVIDIA Corporation. NVIDIA Nsight Compute, 2024. https:\/\/docs.nvidia.com\/nsight-compute\/index.html."},{"key":"e_1_3_2_1_35_1","first-page":"147","volume-title":"Proceedings of the 20th international conference on Machine Learning (ICML-03)","author":"Elkan Charles","year":"2003","unstructured":"Charles Elkan. Using the triangle inequality to accelerate k-means. In Proceedings of the 20th international conference on Machine Learning (ICML-03), pages 147--153, 2003."},{"key":"e_1_3_2_1_36_1","first-page":"41","volume-title":"Accelerating lloyd's algorithm for k-means clustering. Partitional clustering algorithms","author":"Hamerly Greg","year":"2015","unstructured":"Greg Hamerly and Jonathan Drake. Accelerating lloyd's algorithm for k-means clustering. Partitional clustering algorithms, pages 41--78, 2015."},{"key":"e_1_3_2_1_37_1","first-page":"212","volume-title":"Pdpta","volume":"13","author":"Farivar Reza","year":"2008","unstructured":"Reza Farivar, Daniel Rebolledo, Ellick Chan, and Roy H Campbell. A parallel implementation of k-means clustering on gpus. In Pdpta, volume 13, pages 212--312, 2008."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3211922.3211925"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2012.05.004"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-022-00869-6"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020558"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109873"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2015.06.008"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00012"},{"key":"e_1_3_2_1_45_1","volume-title":"Algorithm 1000: Suitesparse: Graphblas: Graph algorithms in the language of sparse linear algebra. ACM Transactions on Mathematical Software (TOMS), 45(4):1--25","author":"Davis Timothy A","year":"2019","unstructured":"Timothy A Davis. Algorithm 1000: Suitesparse: Graphblas: Graph algorithms in the language of sparse linear algebra. ACM Transactions on Mathematical Software (TOMS), 45(4):1--25, 2019."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342011403516"},{"key":"e_1_3_2_1_47_1","volume-title":"Sparse tensor algebra as a parallel programming model. arXiv preprint arXiv:1512.00066","author":"Solomonik Edgar","year":"2015","unstructured":"Edgar Solomonik and Torsten Hoefler. Sparse tensor algebra as a parallel programming model. arXiv preprint arXiv:1512.00066, 2015."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00060"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545050"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611976830.12"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433794"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW63119.2024.00205"},{"key":"e_1_3_2_1_53_1","volume-title":"Hipmcl: a high-performance parallel implementation of the markov clustering algorithm for large-scale networks. Nucleic acids research, 46(6):e33-e33","author":"Azad Ariful","year":"2018","unstructured":"Ariful Azad, Georgios A Pavlopoulos, Christos A Ouzounis, Nikos C Kyrpides, and Aydin Bulu\u00e7. Hipmcl: a high-performance parallel implementation of the markov clustering algorithm for large-scale networks. Nucleic acids research, 46(6):e33-e33, 2018."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.05.377"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC56025.2022.00029"},{"key":"e_1_3_2_1_56_1","volume-title":"Toward capturing genetic epistasis from multivariate genome-wide association studies using mixed-precision kernel ridge regression. arXiv preprint arXiv:2409.01712","author":"Ltaief Hatem","year":"2024","unstructured":"Hatem Ltaief, Rabab Alomairy, Qinglei Cao, Jie Ren, Lotfi Slim, Thorsten Kurth, Benedikt Dorschner, Salim Bougouffa, Rached Abdelkhalak, and David E Keyes. Toward capturing genetic epistasis from multivariate genome-wide association studies using mixed-precision kernel ridge regression. arXiv preprint arXiv:2409.01712, 2024."},{"key":"e_1_3_2_1_57_1","volume-title":"Human activity recognition based on parallel approximation kernel k-means algorithm. Computer Systems Science & Engineering, 35(6)","author":"Jamel Ahmed AM","year":"2020","unstructured":"Ahmed AM Jamel and Bahriye Akay. Human activity recognition based on parallel approximation kernel k-means algorithm. Computer Systems Science & Engineering, 35(6), 2020."}],"event":{"name":"PPoPP '25: The 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"],"location":"Las Vegas NV USA","acronym":"PPoPP '25"},"container-title":["Proceedings of the 30th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710887","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3710848.3710887","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:14:16Z","timestamp":1755875656000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3710848.3710887"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,28]]},"references-count":57,"alternative-id":["10.1145\/3710848.3710887","10.1145\/3710848"],"URL":"https:\/\/doi.org\/10.1145\/3710848.3710887","relation":{},"subject":[],"published":{"date-parts":[[2025,2,28]]},"assertion":[{"value":"2025-02-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}