{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T12:49:15Z","timestamp":1776257355796,"version":"3.50.1"},"reference-count":41,"publisher":"Oxford University Press (OUP)","issue":"Supplement_1","license":[{"start":{"date-parts":[[2020,7,13]],"date-time":"2020-07-13T00:00:00Z","timestamp":1594598400000},"content-version":"vor","delay-in-days":12,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01HL137709"],"award-info":[{"award-number":["R01HL137709"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61922047"],"award-info":[{"award-number":["61922047"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["81890993"],"award-info":[{"award-number":["81890993"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61721003"],"award-info":[{"award-number":["61721003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Cellular Indexing of Transcriptomes and Epitopes by sequencing (CITE-seq), couples the measurement of surface marker proteins with simultaneous sequencing of mRNA at single cell level, which brings accurate cell surface phenotyping to single-cell transcriptomics. Unfortunately, multiplets in CITE-seq datasets create artificial cell types (ACT) and complicate the automation of cell surface phenotyping.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We propose CITE-sort, an artificial-cell-type aware surface marker clustering method\u00a0for CITE-seq. CITE-sort is aware of and is robust to multiplet-induced ACT. We benchmarked CITE-sort with real and simulated CITE-seq datasets and compared CITE-sort against canonical clustering methods. We show that CITE-sort produces the best clustering performance across the board. CITE-sort not only accurately identifies real biological cell types (BCT) but also consistently and reliably separates multiplet-induced artificial-cell-type droplet clusters from real BCT droplet clusters. In addition, CITE-sort organizes its clustering process with a binary tree, which facilitates easy interpretation and verification of its clustering result and simplifies cell-type annotation with domain knowledge in CITE-seq.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>http:\/\/github.com\/QiuyuLian\/CITE-sort.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data is available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa467","type":"journal-article","created":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T15:11:27Z","timestamp":1593616287000},"page":"i542-i550","source":"Crossref","is-referenced-by-count":14,"title":["Artificial-cell-type aware cell-type classification in CITE-seq"],"prefix":"10.1093","volume":"36","author":[{"given":"Qiuyu","family":"Lian","sequence":"first","affiliation":[{"name":"MOE Key Laboratory of Bioinformatics, BNRIST Bioinformatics Division, Department of Automation, Tsinghua University , Beijing 100084, China"},{"name":"Department of Pediatrics, School of Medicine, University of Pittsburgh, UPMC Children\u2019s Hospital of Pittsburgh , Pittsburgh, PA 15224, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongyi","family":"Xin","sequence":"additional","affiliation":[{"name":"Department of Pediatrics, School of Medicine, University of Pittsburgh, UPMC Children\u2019s Hospital of Pittsburgh , Pittsburgh, PA 15224, USA"},{"name":"University of Michigan-Shanghai Jiao Tong University Joint Institute, Shanghai Jiao Tong University , Shanghai 200240, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianzhu","family":"Ma","sequence":"additional","affiliation":[{"name":"Department of Biochemistry and Computer Science, Purdue University , West Lafayette, IA 47907, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liza","family":"Konnikova","sequence":"additional","affiliation":[{"name":"Department of Pediatrics, School of Medicine, University of Pittsburgh, UPMC Children\u2019s Hospital of Pittsburgh , Pittsburgh, PA 15224, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Pediatrics, School of Medicine, University of Pittsburgh, UPMC Children\u2019s Hospital of Pittsburgh , Pittsburgh, PA 15224, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jin","family":"Gu","sequence":"additional","affiliation":[{"name":"MOE Key Laboratory of Bioinformatics, BNRIST Bioinformatics Division, Department of Automation, Tsinghua University , Beijing 100084, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kong","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Medicine, School of Medicine, University of Pittsburgh , Pittsburgh, PA 15213, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2020,7,13]]},"reference":[{"key":"2024021913360186700_btaa467-B1","doi-asserted-by":"crossref","first-page":"R40","DOI":"10.1093\/hmg\/ddy100","article-title":"Cell type discovery using single-cell transcriptomics: implications for ontological representation","volume":"27","author":"Aevermann","year":"2018","journal-title":"Human Mol. Genet"},{"key":"2024021913360186700_btaa467-B2","first-page":"1583","article-title":"A public BCR present in a unique dual-receptor-expressing lymphocyte from type 1 diabetes patients encodes a potent T cell autoantigen","volume-title":"Cell","author":"Ahmed","year":"2019"},{"key":"2024021913360186700_btaa467-B3","doi-asserted-by":"crossref","first-page":"982","DOI":"10.3389\/fimmu.2019.00982","article-title":"Mass cytometry identifies distinct subsets of regulatory T cells and natural killer cells associated with high risk for type 1 diabetes","volume":"10","author":"Barcenilla","year":"2019","journal-title":"Front. Immunol"},{"key":"2024021913360186700_btaa467-B4","year":"2020"},{"key":"2024021913360186700_btaa467-B5","first-page":"17","article-title":"EBK-means: a clustering technique based on elbow method and k-means in WSN","volume":"105","author":"Bholowalia","year":"2014","journal-title":"Int. J. Comput. Appl"},{"key":"2024021913360186700_btaa467-B6","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1214\/06-BA104","article-title":"Variational inference for Dirichlet process mixtures","volume":"1","author":"Blei","year":"2006","journal-title":"Bayesian Anal"},{"key":"2024021913360186700_btaa467-B7","doi-asserted-by":"crossref","DOI":"10.1017\/9781108644181","volume-title":"Model-Based Clustering and Classification for Data Science: With Applications in R","author":"Bouveyron","year":"2019"},{"key":"2024021913360186700_btaa467-B8","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1186\/s40169-017-0150-9","article-title":"Single-cell RNA-sequencing of the brain","volume":"6","author":"Cuevas-Diaz Duran","year":"2017","journal-title":"Clin. Transl. Med"},{"key":"2024021913360186700_btaa467-B9","first-page":"1934","article-title":"Single-cell transcriptome analysis maps the developmental track of the human heart","volume-title":"Cell Rep","author":"Cui","year":"2019"},{"key":"2024021913360186700_btaa467-B10","doi-asserted-by":"crossref","first-page":"653","DOI":"10.1007\/s11390-010-9355-8","article-title":"Dirichlet process Gaussian mixture models: choice of the base distribution","volume":"25","author":"G\u00f6r\u00fcr","year":"2010","journal-title":"J. Comput. Sci. Technol"},{"key":"2024021913360186700_btaa467-B11","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s11634-010-0058-3","article-title":"Methods for merging Gaussian mixture components","volume":"4","author":"Hennig","year":"2010","journal-title":"Adv. Data Anal. Classif"},{"key":"2024021913360186700_btaa467-B12","doi-asserted-by":"crossref","first-page":"740","DOI":"10.1038\/nmeth.2967","article-title":"Bayesian approach to single-cell differential expression analysis","volume":"11","author":"Kharchenko","year":"2014","journal-title":"Nat. Methods"},{"key":"2024021913360186700_btaa467-B13","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1038\/s41576-018-0088-9","article-title":"Challenges in unsupervised clustering of single-cell RNA-seq data","volume":"20","author":"Kiselev","year":"2019","journal-title":"Nat. Rev. Genet"},{"key":"2024021913360186700_btaa467-B14","doi-asserted-by":"crossref","first-page":"dev181396","DOI":"10.1242\/dev.181396","article-title":"Single cell analyses of development in the modern era","volume":"146","author":"Klein","year":"2019","journal-title":"Development"},{"key":"2024021913360186700_btaa467-B15","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1007\/s13748-016-0094-0","article-title":"Learning from imbalanced data: open challenges and future directions","volume":"5","author":"Krawczyk","year":"2016","journal-title":"Progr. Artif. Intell"},{"key":"2024021913360186700_btaa467-B16","doi-asserted-by":"crossref","first-page":"595","DOI":"10.1038\/d41586-018-05214-w","article-title":"Single-cell approaches to immune profiling","volume":"557","author":"Landhuis","year":"2018","journal-title":"Nature"},{"key":"2024021913360186700_btaa467-B17","first-page":"1","article-title":"An accurate and robust imputation method scImpute for single-cell RNA-seq data","volume":"9","author":"Li","year":"2018","journal-title":"Nat. Commun"},{"key":"2024021913360186700_btaa467-B18","author":"Lu","year":"2019"},{"key":"2024021913360186700_btaa467-B19","doi-asserted-by":"crossref","first-page":"2881","DOI":"10.1162\/089976600300014764","article-title":"Asymptotic convergence rate of the EM algorithm for Gaussian mixtures","volume":"12","author":"Ma","year":"2000","journal-title":"Neural Comput"},{"key":"2024021913360186700_btaa467-B20","doi-asserted-by":"crossref","first-page":"1202","DOI":"10.1016\/j.cell.2015.05.002","article-title":"Highly parallel genome-wide expression profiling of individual cells using nanoliter droplets","volume":"161","author":"Macosko","year":"2015","journal-title":"Cell"},{"key":"2024021913360186700_btaa467-B21","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1038\/nri3158","article-title":"Standardizing immunophenotyping for the human immunology project","volume":"12","author":"Maecker","year":"2012","journal-title":"Nat. Rev. Immunol"},{"key":"2024021913360186700_btaa467-B22","first-page":"329","article-title":"DoubletFinder: doublet detection in single-cell RNA sequencing data using artificial nearest neighbors","volume-title":"Cell Syst","author":"McGinnis","year":"2019"},{"key":"2024021913360186700_btaa467-B23","first-page":"1655","author":"Naim","year":"2012"},{"key":"2024021913360186700_btaa467-B24","doi-asserted-by":"crossref","first-page":"46","DOI":"10.1186\/s40169-017-0177-y","article-title":"Using single-cell multiple omics approaches to resolve tumor heterogeneity","volume":"6","author":"Ortega","year":"2017","journal-title":"Clin. Transl. Med"},{"key":"2024021913360186700_btaa467-B25","first-page":"4635","article-title":"Adjusting for chance clustering comparison measures","volume":"17","author":"Romano","year":"2016","journal-title":"J. Mach. Learn. Res"},{"key":"2024021913360186700_btaa467-B26","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1165\/rcmb.2018-0416TR","article-title":"The Human Lung Cell Atlas: a high-resolution reference map of the human lung in health and disease","volume":"61","author":"Schiller","year":"2019","journal-title":"Am. J. Respir. Cell Mol. Biol"},{"key":"2024021913360186700_btaa467-B27","doi-asserted-by":"crossref","first-page":"100077","DOI":"10.1016\/j.bdq.2018.12.002","article-title":"Shedding light: the importance of reverse transcription efficiency standards in data interpretation","volume":"17","author":"Schwaber","year":"2019","journal-title":"Biomol. Detect. Quantif"},{"key":"2024021913360186700_btaa467-B28","doi-asserted-by":"crossref","first-page":"780","DOI":"10.1016\/j.cell.2016.04.019","article-title":"Mass cytometry: single cells, many features","volume":"165","author":"Spitzer","year":"2016","journal-title":"Cell"},{"key":"2024021913360186700_btaa467-B29","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1038\/nrg3833","article-title":"Computational and analytical challenges in single-cell transcriptomics","volume":"16","author":"Stegle","year":"2015","journal-title":"Nat. Rev. Genet"},{"key":"2024021913360186700_btaa467-B30","doi-asserted-by":"crossref","first-page":"865","DOI":"10.1038\/nmeth.4380","article-title":"Simultaneous epitope and transcriptome measurement in single cells","volume":"14","author":"Stoeckius","year":"2017","journal-title":"Nat. Methods"},{"key":"2024021913360186700_btaa467-B31","doi-asserted-by":"crossref","first-page":"224","DOI":"10.1186\/s13059-018-1603-1","article-title":"Cell hashing with barcoded antibodies enables multiplexing and doublet detection for single cell genomics","volume":"19","author":"Stoeckius","year":"2018","journal-title":"Genome Biol"},{"key":"2024021913360186700_btaa467-B32","doi-asserted-by":"crossref","first-page":"1649","DOI":"10.1038\/s41467-019-09639-3","article-title":"A Bayesian mixture model for clustering droplet-based single-cell transcriptomic data from population studies","volume":"10","author":"Sun","year":"2019","journal-title":"Nat. Commun"},{"key":"2024021913360186700_btaa467-B33","year":"2020"},{"key":"2024021913360186700_btaa467-B34","first-page":"3221","article-title":"Accelerating t-SNE using tree-based algorithms","volume":"15","author":"Van Der Maaten","year":"2014","journal-title":"J. Mach. Learn. Res"},{"key":"2024021913360186700_btaa467-B35","doi-asserted-by":"crossref","first-page":"380","DOI":"10.3389\/fimmu.2015.00380","article-title":"An introduction to automated flow cytometry gating tools and their implementation","volume":"6","author":"Verschoor","year":"2015","journal-title":"Front. Immunol"},{"key":"2024021913360186700_btaa467-B36","first-page":"1330","article-title":"A single-cell atlas of the tumor and immune ecosystem of human breast cancer","volume-title":"Cell","author":"Wagner","year":"2019"},{"key":"2024021913360186700_btaa467-B37","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1186\/s13059-017-1382-0","article-title":"SCANPY: large-scale single-cell gene expression data analysis","volume":"19","author":"Wolf","year":"2018","journal-title":"Genome Biol"},{"key":"2024021913360186700_btaa467-B38","first-page":"281","article-title":"Scrublet: computational identification of cell doublets in single-cell transcriptomic data","volume-title":"Cell Syst","author":"Wolock","year":"2019"},{"key":"2024021913360186700_btaa467-B39","doi-asserted-by":"publisher","first-page":"828483","DOI":"10.1101\/828483","author":"Xin","year":"2019"},{"key":"2024021913360186700_btaa467-B40","first-page":"89","author":"Xuan","year":"2013"},{"key":"2024021913360186700_btaa467-B41","doi-asserted-by":"crossref","first-page":"632","DOI":"10.1214\/19-EJS1660","article-title":"Statistical convergence of the EM algorithm on Gaussian mixture models","volume":"14","author":"Zhao","year":"2020","journal-title":"Electron. J. Statist"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/Supplement_1\/i542\/56702241\/bioinformatics_36_supplement1_i542.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/Supplement_1\/i542\/56702241\/bioinformatics_36_supplement1_i542.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,19]],"date-time":"2024-02-19T08:46:53Z","timestamp":1708332413000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/36\/Supplement_1\/i542\/5870491"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7,1]]},"references-count":41,"journal-issue":{"issue":"Supplement_1","published-print":{"date-parts":[[2020,7,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa467","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2020.01.31.928010","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2020,7]]},"published":{"date-parts":[[2020,7,1]]}}}