{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T14:24:18Z","timestamp":1775571858402,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,1]],"date-time":"2021-08-01T00:00:00Z","timestamp":1627776000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"NIH (National Institutes of Health)","doi-asserted-by":"publisher","award":["R01GM118568"],"award-info":[{"award-number":["R01GM118568"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014989","name":"Chan Zuckerberg Initiative","doi-asserted-by":"publisher","award":["CZF2019-00244"],"award-info":[{"award-number":["CZF2019-00244"]}],"id":[{"id":"10.13039\/100014989","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8]]},"DOI":"10.1145\/3459930.3469523","type":"proceedings-article","created":{"date-parts":[[2021,7,30]],"date-time":"2021-07-30T18:30:10Z","timestamp":1627669810000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Fast and memory-efficient scRNA-seq\n            <i>k<\/i>\n            -means clustering with various distances"],"prefix":"10.1145","author":[{"given":"Daniel N.","family":"Baker","sequence":"first","affiliation":[{"name":"Johns Hopkins University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nathan","family":"Dyjack","sequence":"additional","affiliation":[{"name":"Johns Hopkins University, Bloomberg"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vladimir","family":"Braverman","sequence":"additional","affiliation":[{"name":"Johns Hopkins University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stephanie C.","family":"Hicks","sequence":"additional","affiliation":[{"name":"Johns Hopkins University, Bloomberg"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ben","family":"Langmead","sequence":"additional","affiliation":[{"name":"Johns Hopkins University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Unsupervised Feature Learning with K-means and An Ensemble of Deep Convolutional Neural Networks for Medical Image Classification. CoRR, arXiv:1906.03359","author":"Ahn Euijoon","year":"2019","unstructured":"Euijoon Ahn, Ashnil Kumar, Dagan Feng, Michael J. Fulham, and Jinman Kim. 2019. Unsupervised Feature Learning with K-means and An Ensemble of Deep Convolutional Neural Networks for Medical Image Classification. CoRR, arXiv:1906.03359 (2019). arXiv:1906.03359"},{"key":"e_1_3_2_1_2_1","volume-title":"The Advantages of Careful Seeding. SODA","author":"Arthur David","year":"2007","unstructured":"David Arthur and Sergei Vassilvitskii. 2007. K-Means++: The Advantages of Careful Seeding. SODA (2007), 1027--1035."},{"key":"e_1_3_2_1_3_1","unstructured":"Daniel Baker. 2008. libsimdsampling. http:\/\/github.com\/dnbaker\/libsimdsampling. [Online; accessed 7 Feb 2021]."},{"key":"e_1_3_2_1_4_1","first-page":"1995","article-title":"Distributed k-means and k-median Clustering on General Topologies","volume":"26","author":"Balcan Maria-Florina F","year":"2013","unstructured":"Maria-Florina F Balcan, Steven Ehrlich, and Yingyu Liang. 2013. Distributed k-means and k-median Clustering on General Topologies. Advances in Neural Information Processing Systems 26 (2013), 1995--2003.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/1046920.1194902"},{"key":"e_1_3_2_1_6_1","unstructured":"Leon Bottou and Yoshua Bengio. 1995. Convergence properties of the k-means algorithms. In Advances in neural information processing systems. 585--592."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.2645"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2556195.2556260"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"J. Cao D. R. O'Day H. A. Pliner P. D. Kingsley M. Deng R. M. Daza M. A. Zager K. A. Aldinger R. Blecher-Gonen F. Zhang M. Spielmann J. Palis D. Doherty F. J. Steemers I. A. Glass C. Trapnell and J. Shendure. 2020. A human cell atlas of fetal gene expression. Science 370 6518 (11 2020).","DOI":"10.1126\/science.aba7721"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"J. Cao M. Spielmann X. Qiu X. Huang D. M. Ibrahim A. J. Hill F. Zhang S. Mundlos L. Christiansen F. J. Steemers C. Trapnell and J. Shendure. 2019. The single-cell transcriptional landscape of mammalian organogenesis. Nature 566 7745 (02 2019) 496--502.","DOI":"10.1038\/s41586-019-0969-x"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/2900423.2900472"},{"key":"e_1_3_2_1_12_1","unstructured":"Wenzel Jakob Daniel Lemire. 2013. SIMDPCG. https:\/\/github.com\/lemire\/simdpcg."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1101\/2019.12.17.879304"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"B. DeMeo and B. Berger. 2020. Hopper: a mathematically optimal algorithm for sketching biological data. Bioinformatics 36 (07 2020) i236--i241.","DOI":"10.1093\/bioinformatics\/btaa408"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/3041838.3041857"},{"key":"e_1_3_2_1_16_1","volume-title":"A Unified Framework for Approximating and Clustering Data. CoRR abs\/1106.1379","author":"Feldman Dan","year":"2011","unstructured":"Dan Feldman and Michael Langberg. 2011. A Unified Framework for Approximating and Clustering Data. CoRR abs\/1106.1379 (2011). arXiv:1106.1379 http:\/\/arxiv.org\/abs\/1106.1379"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1008625"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"B. Hie H. Cho B. DeMeo B. Bryson and B. Berger. 2019. Geometric Sketching Compactly Summarizes the Single-Cell Transcriptomic Landscape. Cell Syst 8 6 (06 2019) 483--493.","DOI":"10.1016\/j.cels.2019.05.003"},{"key":"e_1_3_2_1_19_1","volume-title":"arXiv:1910.11069 [cs.DS]","author":"H\u00fcbschle-Schneider Lorenz","year":"2020","unstructured":"Lorenz H\u00fcbschle-Schneider and Peter Sanders. 2020. Communication-Efficient (Weighted) Reservoir Sampling from Fully Distributed Data Streams. CoRR (2020). arXiv:1910.11069 [cs.DS]"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"3671","author":"Lattanzi Silvio","year":"2019","unstructured":"Silvio Lattanzi and Christian Sohler. 2019. A Better k-means++ Algorithm via Local Search. In Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 97), Kamalika Chaudhuri and Ruslan Salakhutdinov (Eds.). PMLR, 3662--3671. http:\/\/proceedings.mlr.press\/v97\/lattanzi19a.html"},{"key":"e_1_3_2_1_21_1","unstructured":"Daniel Lemire. 2016--2018. SIMDPCG. https:\/\/lemire.me\/blog\/2018\/06\/07\/vectorizing-random-number-generators-for-greater-speed-pcg-and-xorshift128-avx-512-edition\/."},{"key":"e_1_3_2_1_22_1","volume-title":"Database Systems for Advanced Applications","author":"Li Qiuhong","unstructured":"Qiuhong Li, Peng Wang, Wei Wang, Hao Hu, Zhongsheng Li, and Junxian Li. 2014. An Efficient K-means Clustering Algorithm on MapReduce. In Database Systems for Advanced Applications, Sourav S. Bhowmick, Curtis E. Dyreson, Christian S. Jensen, Mong Li Lee, Agus Muliantara, and Bernhard Thalheim (Eds.). Springer International Publishing, Cham, 357--371."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056489"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056489"},{"key":"e_1_3_2_1_25_1","volume-title":"Strong Coresets for Hard and Soft Bregman Clustering with Applications to Exponential Family Mixtures. CoRR","author":"Lucic Mario","year":"2016","unstructured":"Mario Lucic, Olivier Bachem, and Andreas Krause. 2016. Strong Coresets for Hard and Soft Bregman Clustering with Applications to Exponential Family Mixtures. CoRR (2016). arXiv:1508.05243 [stat.ML]"},{"key":"e_1_3_2_1_26_1","volume-title":"Razenshteyn","author":"Makarychev Konstantin","year":"2018","unstructured":"Konstantin Makarychev, Yury Makarychev, and Ilya P. Razenshteyn. 2018. Performance of Johnson-Lindenstrauss Transform for k-Means and k-Medians Clustering. CoRR abs\/1811.03195 (2018). arXiv:1811.03195 http:\/\/arxiv.org\/abs\/1811.03195"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Deanna Needell Nathan Srebro and Rachel Ward. 2015. Stochastic Gradient Descent Weighted Sampling and the Randomized Kaczmarz algorithm. arXiv:1310.5715 [math.NA]","DOI":"10.1007\/s10107-015-0864-7"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.21105\/joss.01230"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"O. Rozenblatt-Rosen M. J. T. Stubbington A. Regev and S. A. Teichmann. 2017. The Human Cell Atlas: from vision to reality. Nature 550 7677 (10 2017) 451--453.","DOI":"10.1038\/550451a"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772862"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/tpds.2019.2960333"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"F. W. Townes S. C. Hicks M. J. Aryee and R. A. Irizarry. 2019. Feature selection and dimension reduction for single-cell RNA-Seq based on a multinomial model. Genome Biol 20 1 (12 2019) 295.","DOI":"10.1186\/s13059-019-1861-6"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1101\/gr.254557.119"},{"key":"e_1_3_2_1_35_1","volume-title":"McIntosh","author":"Wei Yuanyuan","year":"2019","unstructured":"Yuanyuan Wei, Julian Jang-Jaccard, Fariza Sabrina, and Timothy R. McIntosh. 2019. MSD-Kmeans: A Novel Algorithm for Efficient Detection of Global and Local Outliers. CoRR abs\/1910.06588 (2019). arXiv:1910.06588 http:\/\/arxiv.org\/abs\/1910.06588"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1214\/11-aoas493"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3388440.3412409"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1101\/2020.05.01.066738"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"G. X. Zheng J. M. Terry P. Belgrader P. Ryvkin Z. W. Bent R. Wilson S. B. Ziraldo T. D. Wheeler G. P. McDermott J. Zhu M. T. Gregory J. Shuga L. Montesclaros J. G. Underwood D. A. Masquelier S. Y. Nishimura M. Schnall-Levin P. W. Wyatt C. M. Hindson R. Bharadwaj A. Wong K. D. Ness L. W. Beppu H. J. Deeg C. McFarland K. R. Loeb W. J. Valente N. G. Ericson E. A. Stevens J. P. Radich T. S. Mikkelsen B. J. Hindson and J. H. Bielas. 2017. Massively parallel digital transcriptional profiling of single cells. Nat Commun 8 (01 2017) 14049.","DOI":"10.1038\/ncomms14049"}],"event":{"name":"BCB '21: 12th ACM International Conference on Bioinformatics, Computational Biology and Health Informatics","location":"Gainesville Florida","acronym":"BCB '21","sponsor":["SIGBIOM ACM Special Interest Group on Biomedical Computing"]},"container-title":["Proceedings of the 12th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3459930.3469523","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3459930.3469523","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:17:43Z","timestamp":1750191463000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3459930.3469523"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8]]},"references-count":39,"alternative-id":["10.1145\/3459930.3469523","10.1145\/3459930"],"URL":"https:\/\/doi.org\/10.1145\/3459930.3469523","relation":{},"subject":[],"published":{"date-parts":[[2021,8]]},"assertion":[{"value":"2021-08-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}