{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T17:45:06Z","timestamp":1763142306161,"version":"3.41.2"},"reference-count":38,"publisher":"Oxford University Press (OUP)","issue":"4","license":[{"start":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T00:00:00Z","timestamp":1720396800000},"content-version":"vor","delay-in-days":46,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Research Foundation of Korea: Basic Science Research Programs","award":["2020R1A2C210226811","RS-2023-00248114"],"award-info":[{"award-number":["2020R1A2C210226811","RS-2023-00248114"]}]},{"name":"Genomics Program","award":["2020M3C9A5086069"],"award-info":[{"award-number":["2020M3C9A5086069"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,23]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>Unsupervised feature selection is a critical step for efficient and accurate analysis of single-cell RNA-seq data. Previous benchmarks used two different criteria to compare feature selection methods: (i) proportion of ground-truth marker genes included in the selected features and (ii) accuracy of cell clustering using ground-truth cell types. Here, we systematically compare the performance of 11 feature selection methods for both criteria. We first demonstrate the discordance between these criteria and suggest using the latter. We then compare the distribution of selected genes in their means between feature selection methods. We show that lowly expressed genes exhibit seriously high coefficients of variation and are mostly excluded by high-performance methods. In particular, high-deviation- and high-expression-based methods outperform the widely used in Seurat package in clustering cells and data visualization. We further show they also enable a clear separation of the same cell type from different tissues as well as accurate estimation of cell trajectories.<\/jats:p>","DOI":"10.1093\/bib\/bbae317","type":"journal-article","created":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T12:31:09Z","timestamp":1720441869000},"source":"Crossref","is-referenced-by-count":6,"title":["Characterizing efficient feature selection for single-cell expression analysis"],"prefix":"10.1093","volume":"25","author":[{"given":"Juok","family":"Cho","sequence":"first","affiliation":[{"name":"Department of Biomedical Engineering, Ulsan National Institute of Science and Technology (UNIST) , 50, UNIST-gil, Ulsan 44919 , Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5438-3903","authenticated-orcid":false,"given":"Bukyung","family":"Baik","sequence":"additional","affiliation":[{"name":"Department of Biological Sciences, Ulsan National Institute of Science and Technology (UNIST) , 50, UNIST-gil, Ulsan 44919 , Republic of Korea"}]},{"given":"Hai C T","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Department of Biological Sciences, Ulsan National Institute of Science and Technology (UNIST) , 50, UNIST-gil, Ulsan 44919 , Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9452-5849","authenticated-orcid":false,"given":"Daeui","family":"Park","sequence":"additional","affiliation":[{"name":"Department of Predictive Toxicology, Korea Institute of Toxicology , 141, Gajeong-ro, Daejeon 34114 , Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0239-2899","authenticated-orcid":false,"given":"Dougu","family":"Nam","sequence":"additional","affiliation":[{"name":"Department of Biological Sciences, Ulsan National Institute of Science and Technology (UNIST) , 50, UNIST-gil, Ulsan 44919 , Republic of Korea"},{"name":"Department of Mathematical Sciences, Ulsan National Institute of Science and Technology (UNIST) , 50, UNIST-gil, Ulsan 44919 , Republic of Korea"}]}],"member":"286","published-online":{"date-parts":[[2024,7,8]]},"reference":[{"key":"2024070812263343100_ref1","doi-asserted-by":"crossref","first-page":"e694","DOI":"10.1002\/ctm2.694","article-title":"Single-cell RNA sequencing technologies and applications: a brief overview","volume":"12","author":"Jovic","year":"2022","journal-title":"Clin Transl Med"},{"key":"2024070812263343100_ref2","doi-asserted-by":"crossref","first-page":"395","DOI":"10.1038\/s41576-022-00449-w","article-title":"Single-cell atlases: shared and tissue-specific cell types across human organs","volume":"23","author":"Elmentaite","year":"2022","journal-title":"Nat Rev Genet"},{"key":"2024070812263343100_ref3","doi-asserted-by":"crossref","first-page":"496","DOI":"10.1038\/s41573-023-00688-4","article-title":"Applications of single-cell RNA sequencing in drug discovery and development","volume":"22","author":"Van de Sande","year":"2023","journal-title":"Nat Rev Drug Discov"},{"key":"2024070812263343100_ref4","doi-asserted-by":"crossref","first-page":"3573","DOI":"10.1016\/j.cell.2021.04.048","article-title":"Integrated analysis of multimodal single-cell data","volume":"184","author":"Hao","year":"2021","journal-title":"Cell"},{"key":"2024070812263343100_ref5","doi-asserted-by":"crossref","first-page":"bbab295","DOI":"10.1093\/bib\/bbab295","article-title":"Selecting gene features for unsupervised analysis of single-cell gene expression data","volume":"22","author":"Sheng","year":"2021","journal-title":"Brief Bioinform"},{"key":"2024070812263343100_ref6","doi-asserted-by":"crossref","first-page":"2865","DOI":"10.1093\/bioinformatics\/bty1044","article-title":"M3Drop: dropout-based feature selection for scRNASeq","volume":"35","author":"Andrews","year":"2019","journal-title":"Bioinformatics"},{"key":"2024070812263343100_ref7","doi-asserted-by":"crossref","first-page":"5849","DOI":"10.1038\/s41467-021-26085-2","article-title":"DUBStepR is a scalable correlation-based feature selection method for accurately clustering single-cell data","volume":"12","author":"Ranjan","year":"2021","journal-title":"Nat Commun"},{"key":"2024070812263343100_ref8","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1186\/s13059-019-1861-6","article-title":"Feature selection and dimension reduction for single-cell RNA-Seq based on a multinomial model","volume":"20","author":"Townes","year":"2019","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref9","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1186\/s13059-020-02136-7","article-title":"pipeComp, a general framework for the evaluation of computational pipelines, reveals performant single cell RNA-seq preprocessing tools","volume":"21","author":"Germain","year":"2020","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref10","doi-asserted-by":"crossref","first-page":"5692","DOI":"10.1038\/s41467-021-25960-2","article-title":"Confronting false discoveries in single-cell differential expression","volume":"12","author":"Squair","year":"2021","journal-title":"Nat Commun"},{"key":"2024070812263343100_ref11","doi-asserted-by":"crossref","first-page":"1570","DOI":"10.1038\/s41467-023-37126-3","article-title":"Benchmarking integration of single-cell differential expression","volume":"14","author":"Nguyen","year":"2023","journal-title":"Nat Commun"},{"key":"2024070812263343100_ref12","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1038\/s41576-020-00292-x","article-title":"Deciphering cell-cell interactions and communication from gene expression","volume":"22","author":"Armingol","year":"2021","journal-title":"Nat Rev Genet"},{"key":"2024070812263343100_ref13","doi-asserted-by":"crossref","first-page":"3224","DOI":"10.1038\/s41467-022-30755-0","article-title":"Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data","volume":"13","author":"Dimitrov","year":"2022","journal-title":"Nat Commun"},{"key":"2024070812263343100_ref14","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1186\/s13059-021-02584-9","article-title":"Comparison and evaluation of statistical error models for scRNA-seq","volume":"23","author":"Choudhary","year":"2022","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref15","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1186\/s13059-020-02096-y","article-title":"Demystifying \u201cdrop-outs\u201d in single-cell UMI data","volume":"21","author":"Kim","year":"2020","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref16","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1186\/s13059-017-1305-0","article-title":"Splatter: simulation of single-cell RNA sequencing data","volume":"18","author":"Zappia","year":"2017","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref17","doi-asserted-by":"crossref","first-page":"194","DOI":"10.1186\/s13059-019-1795-z","article-title":"A comparison of automatic cell identification methods for single-cell RNA sequencing data","volume":"20","author":"Abdelaal","year":"2019","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref18","doi-asserted-by":"crossref","first-page":"324","DOI":"10.1016\/j.immuni.2009.03.003","article-title":"Follicular helper T cells: lineage and location","volume":"30","author":"Fazilleau","year":"2009","journal-title":"Immunity"},{"key":"2024070812263343100_ref19","doi-asserted-by":"crossref","first-page":"1848","DOI":"10.1161\/CIRCULATIONAHA.119.041433","article-title":"Single-cell RNA sequencing unveils unique transcriptomic signatures of organ-specific endothelial cells","volume":"142","author":"Paik","year":"2020","journal-title":"Circulation"},{"key":"2024070812263343100_ref20","doi-asserted-by":"crossref","first-page":"367","DOI":"10.1038\/s41586-018-0590-4","article-title":"Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris","volume":"562","author":"Tabula Muris","year":"2018","journal-title":"Nature"},{"key":"2024070812263343100_ref21","doi-asserted-by":"crossref","first-page":"619","DOI":"10.1038\/s41586-020-2922-4","article-title":"A molecular cell atlas of the human lung from single-cell RNA sequencing","volume":"587","author":"Travaglini","year":"2020","journal-title":"Nature"},{"key":"2024070812263343100_ref22","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1038\/s41590-018-0276-y","article-title":"Reference-based analysis of lung single-cell sequencing reveals a transitional profibrotic macrophage","volume":"20","author":"Aran","year":"2019","journal-title":"Nat Immunol"},{"key":"2024070812263343100_ref23","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1186\/s13059-020-02132-x","article-title":"A systematic evaluation of single-cell RNA-sequencing imputation methods","volume":"21","author":"Hou","year":"2020","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref24","doi-asserted-by":"crossref","first-page":"539","DOI":"10.1038\/s41592-018-0033-z","article-title":"SAVER: gene expression recovery for single-cell RNA sequencing","volume":"15","author":"Huang","year":"2018","journal-title":"Nat Methods"},{"key":"2024070812263343100_ref25","doi-asserted-by":"crossref","first-page":"875","DOI":"10.1038\/s41592-019-0537-1","article-title":"Data denoising with transfer learning in single-cell transcriptomics","volume":"16","author":"Wang","year":"2019","journal-title":"Nat Methods"},{"key":"2024070812263343100_ref26","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1038\/s41587-021-01206-w","article-title":"A python library for probabilistic analysis of single-cell omics data","volume":"40","author":"Gayoso","year":"2022","journal-title":"Nat Biotechnol"},{"key":"2024070812263343100_ref27","doi-asserted-by":"crossref","first-page":"547","DOI":"10.1038\/s41587-019-0071-9","article-title":"A comparison of single-cell trajectory inference methods","volume":"37","author":"Saelens","year":"2019","journal-title":"Nat Biotechnol"},{"key":"2024070812263343100_ref28","doi-asserted-by":"crossref","first-page":"3942","DOI":"10.1038\/s41467-021-24152-2","article-title":"Spearheading future omics analyses using Dyngen, a multi-modal simulator of single cells","volume":"12","author":"Cannoodt","year":"2021","journal-title":"Nat Commun"},{"key":"2024070812263343100_ref29","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1186\/s12864-018-4772-0","article-title":"Slingshot: cell lineage and pseudotime inference for single-cell transcriptomics","volume":"19","author":"Street","year":"2018","journal-title":"BMC Genomics"},{"key":"2024070812263343100_ref30","first-page":"060012","article-title":"Fast gene set enrichment analysis","author":"Korotkevich","year":"2021"},{"key":"2024070812263343100_ref31","doi-asserted-by":"crossref","first-page":"D687","DOI":"10.1093\/nar\/gkab1028","article-title":"The reactome pathway knowledgebase 2022","volume":"50","author":"Gillespie","year":"2022","journal-title":"Nucleic Acids Res"},{"key":"2024070812263343100_ref32","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1186\/s13059-019-1874-1","article-title":"Normalization and variance stabilization of single-cell RNA-seq data using regularized negative binomial regression","volume":"20","author":"Hafemeister","year":"2019","journal-title":"Genome Biol"},{"key":"2024070812263343100_ref33","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1038\/s41586-019-1195-2","article-title":"Single-cell transcriptomic analysis of Alzheimer's disease","volume":"570","author":"Mathys","year":"2019","journal-title":"Nature"},{"key":"2024070812263343100_ref34","doi-asserted-by":"crossref","first-page":"2285","DOI":"10.1038\/s41467-020-16164-1","article-title":"Single-cell RNA sequencing demonstrates the molecular and cellular reprogramming of metastatic lung adenocarcinoma","volume":"11","author":"Kim","year":"2020","journal-title":"Nat Commun"},{"key":"2024070812263343100_ref35","doi-asserted-by":"crossref","first-page":"829","DOI":"10.1080\/01621459.1979.10481038","article-title":"Robust locally weighted regression and smoothing scatterplots","volume":"74","author":"Cleveland","year":"1979","journal-title":"J Am Stat Assoc"},{"key":"2024070812263343100_ref36","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1093\/bioinformatics\/btp616","article-title":"edgeR: a bioconductor package for differential expression analysis of digital gene expression data","volume":"26","author":"Robinson","year":"2010","journal-title":"Bioinformatics"},{"key":"2024070812263343100_ref37","doi-asserted-by":"crossref","first-page":"289","DOI":"10.32614\/RJ-2016-021","article-title":"Mclust 5: clustering, classification and density estimation using Gaussian finite mixture models","volume":"8","author":"Scrucca","year":"2016","journal-title":"R Journal"},{"volume-title":"aricode: Efficient Computations of Standard Clustering Comparison Measures","year":"2022","author":"Chiquet","key":"2024070812263343100_ref38"}],"container-title":["Briefings in Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/25\/4\/bbae317\/58468911\/bbae317.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/25\/4\/bbae317\/58468911\/bbae317.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T12:31:30Z","timestamp":1720441890000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bib\/article\/doi\/10.1093\/bib\/bbae317\/7709086"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,23]]},"references-count":38,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,5,23]]}},"URL":"https:\/\/doi.org\/10.1093\/bib\/bbae317","relation":{},"ISSN":["1467-5463","1477-4054"],"issn-type":[{"type":"print","value":"1467-5463"},{"type":"electronic","value":"1477-4054"}],"subject":[],"published-other":{"date-parts":[[2024,7]]},"published":{"date-parts":[[2024,5,23]]},"article-number":"bbae317"}}