{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T15:12:44Z","timestamp":1777129964294,"version":"3.51.4"},"reference-count":11,"publisher":"Oxford University Press (OUP)","issue":"1","license":[{"start":{"date-parts":[[2021,7,21]],"date-time":"2021-07-21T00:00:00Z","timestamp":1626825600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"name":"AIR@innoHK programme of the Innovation and Technology Commission of Hong Kong"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,22]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Scalable clustering algorithms are needed to analyze millions of cells in single cell RNA-seq (scRNA-seq) data.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Here, we present an open source python package called FlowGrid that can integrate into the Scanpy workflow to perform clustering on very large scRNA-seq datasets. FlowGrid implements a fast density-based clustering algorithm originally designed for flow cytometry data analysis. We introduce a new automated parameter tuning procedure, and show that FlowGrid can achieve comparable clustering accuracy as state-of-the-art clustering algorithms but at a substantially reduced run time for very large single cell RNA-seq datasets. For example, FlowGrid can complete a one-hour clustering task for one million cells in about five\u2009min.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>https:\/\/github.com\/holab-hku\/FlowGrid.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btab521","type":"journal-article","created":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T19:06:50Z","timestamp":1626635210000},"page":"282-283","source":"Crossref","is-referenced-by-count":6,"title":["FlowGrid enables fast clustering of very large single-cell RNA-seq data"],"prefix":"10.1093","volume":"38","author":[{"given":"Xiunan","family":"Fang","sequence":"first","affiliation":[{"name":"School of Biomedical Sciences, Li Ka Shing Faculty of Medicine, The University of Hong Kong , Hong Kong SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2331-7011","authenticated-orcid":false,"given":"Joshua W K","family":"Ho","sequence":"additional","affiliation":[{"name":"School of Biomedical Sciences, Li Ka Shing Faculty of Medicine, The University of Hong Kong , Hong Kong SAR, China"},{"name":"Laboratory of Data Discovery for Health Limited (D24H), Hong Kong Science Park, New Territories, Hong Kong SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2021,7,21]]},"reference":[{"key":"2023020108390067200_btab521-B1","doi-asserted-by":"crossref","first-page":"411","DOI":"10.1038\/nbt.4096","article-title":"Integrating single-cell transcriptomic data across different conditions, technologies, and species","volume":"36","author":"Butler","year":"2018","journal-title":"Nat. Biotechnol"},{"key":"2023020108390067200_btab521-B2","doi-asserted-by":"crossref","first-page":"120","DOI":"10.1038\/s41593-017-0029-5","article-title":"Single-cell analysis of experience-dependent transcriptomic states in the mouse visual cortex","volume":"21","author":"Hrvatin","year":"2018","journal-title":"Nat. Neurosci"},{"key":"2023020108390067200_btab521-B3","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1038\/s41576-018-0088-9","article-title":"Challenges in unsupervised clustering of single-cell RNA-seq data","volume":"20","author":"Kiselev","year":"2019","journal-title":"Nat. Rev. Genet"},{"key":"2023020108390067200_btab521-B4","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1016\/j.cell.2015.05.047","article-title":"Data-driven phenotypic dissection of AML reveals progenitor-like cells that correlate with prognosis","volume":"162","author":"Levine","year":"2015","journal-title":"Cell"},{"key":"2023020108390067200_btab521-B5","doi-asserted-by":"crossref","first-page":"2778","DOI":"10.1093\/bioinformatics\/btaa042","article-title":"PARC: ultrafast and accurate clustering of phenotypic data of millions of single cells","volume":"36","author":"Stassen","year":"2020","journal-title":"Bioinformatics"},{"key":"2023020108390067200_btab521-B6","doi-asserted-by":"crossref","DOI":"10.1093\/database\/baaa073","article-title":"A curated database reveals trends in single-cell transcriptomics","volume":"2020","author":"Svensson","year":"2020","journal-title":"Database"},{"key":"2023020108390067200_btab521-B7","author":"Traag","year":"2019"},{"key":"2023020108390067200_btab521-B8","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1186\/s13059-017-1382-0","article-title":"SCANPY: large-scale single-cell gene expression data analysis","volume":"19","author":"Wolf","year":"2018","journal-title":"Genome Biol"},{"key":"2023020108390067200_btab521-B9","doi-asserted-by":"crossref","first-page":"281","DOI":"10.1016\/j.cels.2018.11.005","article-title":"Scrublet: computational identification of cell doublets in single-cell transcriptomic data","volume":"8","author":"Wolock","year":"2019","journal-title":"Cell Syst"},{"key":"2023020108390067200_btab521-B10","doi-asserted-by":"crossref","first-page":"35","DOI":"10.1186\/s12918-019-0690-2","article-title":"Ultrafast clustering of single-cell flow cytometry data using FlowGrid","volume":"13","author":"Ye","year":"2019","journal-title":"BMC Syst. Biol"},{"key":"2023020108390067200_btab521-B11","doi-asserted-by":"crossref","first-page":"999","DOI":"10.1016\/j.cell.2018.06.021","article-title":"Molecular architecture of the mouse nervous system","volume":"174","author":"Zeisel","year":"2018","journal-title":"Cell"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btab521\/39555278\/btab521.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/38\/1\/282\/49006286\/btab521.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/38\/1\/282\/49006286\/btab521.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T19:54:25Z","timestamp":1675281265000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/38\/1\/282\/6325016"}},"subtitle":[],"editor":[{"given":"Olga","family":"Vitek","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2021,7,21]]},"references-count":11,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,12,22]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btab521","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2022,1,1]]},"published":{"date-parts":[[2021,7,21]]}}}