{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T20:33:56Z","timestamp":1772138036499,"version":"3.50.1"},"reference-count":48,"publisher":"Oxford University Press (OUP)","issue":"10","license":[{"start":{"date-parts":[[2020,2,13]],"date-time":"2020-02-13T00:00:00Z","timestamp":1581552000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01GM124061"],"award-info":[{"award-number":["R01GM124061"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Batch effect is a frequent challenge in deep sequencing data analysis that can lead to misleading conclusions. Existing methods do not correct batch effects satisfactorily, especially with single-cell RNA sequencing (RNA-seq) data.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We present scBatch, a numerical algorithm for batch-effect correction on bulk and single-cell RNA-seq data with emphasis on improving both clustering and gene differential expression analysis. scBatch is not restricted by assumptions on the mechanism of batch-effect generation. As shown in simulations and real data analyses, scBatch outperforms benchmark batch-effect correction methods.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>The R package is available at github.com\/tengfei-emory\/scBatch. The code to generate results and figures in this article is available at github.com\/tengfei-emory\/scBatch-paper-scripts.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaa097","type":"journal-article","created":{"date-parts":[[2020,2,6]],"date-time":"2020-02-06T15:10:54Z","timestamp":1581001854000},"page":"3115-3123","source":"Crossref","is-referenced-by-count":32,"title":["scBatch: batch-effect correction of RNA-seq data through sample distance matrix adjustment"],"prefix":"10.1093","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7888-1715","authenticated-orcid":false,"given":"Teng","family":"Fei","sequence":"first","affiliation":[{"name":"Department of Biostatistics and Bioinformatics , Emory University, Atlanta, GA 30322, USA"}]},{"given":"Tianwei","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Biostatistics and Bioinformatics , Emory University, Atlanta, GA 30322, USA"}]}],"member":"286","published-online":{"date-parts":[[2020,2,13]]},"reference":[{"key":"2023013111522713800_btaa097-B1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.2140\/pjm.1966.16.1","article-title":"Minimization of functions having Lipschitz continuous first partial derivatives","volume":"16","author":"Armijo","year":"1966","journal-title":"Pacific J. Math"},{"key":"2023013111522713800_btaa097-B2","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1186\/s13059-016-0927-y","article-title":"Design and computational analysis of single-cell RNA-sequencing experiments","volume":"17","author":"Bacher","year":"2016","journal-title":"Genome Biol"},{"key":"2023013111522713800_btaa097-B3","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1038\/nbt.4314","article-title":"Dimensionality reduction for visualizing single-cell data using UMAP","volume":"37","author":"Becht","year":"2019","journal-title":"Nat. Biotechnol"},{"key":"2023013111522713800_btaa097-B4","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1111\/j.2517-6161.1995.tb02031.x","article-title":"Controlling the false discovery rate: a practical and powerful approach to multiple testing","volume":"57","author":"Benjamini","year":"1995","journal-title":"J. R. Stat. Soc. Series B Methodol"},{"key":"2023013111522713800_btaa097-B5","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1038\/s41592-018-0254-1","article-title":"A test metric for assessing single-cell RNA-seq batch correction","volume":"16","author":"B\u00fcttner","year":"2019","journal-title":"Nat. Methods"},{"key":"2023013111522713800_btaa097-B6","doi-asserted-by":"crossref","first-page":"13587","DOI":"10.1038\/s41598-017-13665-w","article-title":"Controlling for confounding effects in single cell RNA sequencing studies using both control and target genes","volume":"7","author":"Chen","year":"2017","journal-title":"Sci. Rep"},{"key":"2023013111522713800_btaa097-B7","author":"Determan","year":"2019"},{"key":"2023013111522713800_btaa097-B8","doi-asserted-by":"crossref","first-page":"1054","DOI":"10.1016\/j.csda.2013.02.005","article-title":"RcppArmadillo: accelerating R with high-performance C++ linear algebra","volume":"71","author":"Eddelbuettel","year":"2014","journal-title":"Comput. Stat. Data Anal"},{"key":"2023013111522713800_btaa097-B9","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1093\/bioinformatics\/btl567","article-title":"Using GOstats to test gene lists for GO term association","volume":"23","author":"Falcon","year":"2007","journal-title":"Bioinformatics"},{"key":"2023013111522713800_btaa097-B10","doi-asserted-by":"crossref","first-page":"2634","DOI":"10.1093\/bioinformatics\/bty117","article-title":"Mitigating the adverse impact of batch effects in sample pattern detection","volume":"34","author":"Fei","year":"2018","journal-title":"Bioinformatics"},{"key":"2023013111522713800_btaa097-B11","doi-asserted-by":"crossref","first-page":"539","DOI":"10.1093\/biostatistics\/kxr034","article-title":"Using control genes to correct for unwanted variation in microarray data","volume":"13","author":"Gagnon-Bartsch","year":"2012","journal-title":"Biostatistics"},{"key":"2023013111522713800_btaa097-B12","doi-asserted-by":"crossref","first-page":"121","DOI":"10.12688\/f1000research.6536.1","article-title":"A reanalysis of mouse ENCODE comparative gene expression data","volume":"4","author":"Gilad","year":"2015","journal-title":"F1000Res"},{"key":"2023013111522713800_btaa097-B13","doi-asserted-by":"crossref","first-page":"569","DOI":"10.1038\/ng.3259","article-title":"Understanding multicellular function and disease with human tissue-specific networks","volume":"47","author":"Greene","year":"2015","journal-title":"Nat. Genet"},{"key":"2023013111522713800_btaa097-B14","doi-asserted-by":"crossref","first-page":"266","DOI":"10.1016\/j.stem.2016.05.010","article-title":"De novo prediction of stem cell identity using single-cell transcriptome data","volume":"19","author":"Gr\u00fcn","year":"2016","journal-title":"Cell Stem Cell"},{"key":"2023013111522713800_btaa097-B15","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1038\/nbt.4091","article-title":"Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors","volume":"36","author":"Haghverdi","year":"2018","journal-title":"Nat. Biotechnol"},{"key":"2023013111522713800_btaa097-B16","doi-asserted-by":"crossref","first-page":"666","DOI":"10.1016\/j.celrep.2012.08.003","article-title":"CEL-Seq: single-cell RNA-Seq by multiplexed linear amplification","volume":"2","author":"Hashimshony","year":"2012","journal-title":"Cell Rep"},{"key":"2023013111522713800_btaa097-B17","doi-asserted-by":"crossref","first-page":"562","DOI":"10.1093\/biostatistics\/kxx053","article-title":"Missing data and technical variability in single-cell RNA-sequencing experiments","volume":"19","author":"Hicks","year":"2018","journal-title":"Biostatistics"},{"key":"2023013111522713800_btaa097-B18","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1007\/BF01908075","article-title":"Comparing partitions","volume":"2","author":"Hubert","year":"1985","journal-title":"J. Classif"},{"key":"2023013111522713800_btaa097-B19","doi-asserted-by":"crossref","first-page":"1543","DOI":"10.1101\/gr.121095.111","article-title":"Synthetic spike-in standards for RNA-seq experiments","volume":"21","author":"Jiang","year":"2011","journal-title":"Genome Res"},{"key":"2023013111522713800_btaa097-B20","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1093\/biostatistics\/kxj037","article-title":"Adjusting batch effects in microarray expression data using empirical Bayes methods","volume":"8","author":"Johnson","year":"2007","journal-title":"Biostatistics"},{"key":"2023013111522713800_btaa097-B21","doi-asserted-by":"crossref","first-page":"8687","DOI":"10.1038\/ncomms9687","article-title":"Characterizing noise structure in single-cell RNA-seq distinguishes genuine from technical stochastic allelic expression","volume":"6","author":"Kim","year":"2015","journal-title":"Nat. Commun"},{"key":"2023013111522713800_btaa097-B22","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1038\/nmeth.4236","article-title":"SC3: consensus clustering of single-cell RNA-seq data","volume":"14","author":"Kiselev","year":"2017","journal-title":"Nat. Methods"},{"key":"2023013111522713800_btaa097-B23","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1101\/gr.212720.116","article-title":"Single-cell transcriptomes identify human islet cell signatures and reveal cell-type\u2013specific expression changes in type 2 diabetes","volume":"27","author":"Lawlor","year":"2017","journal-title":"Genome Res"},{"key":"2023013111522713800_btaa097-B24","doi-asserted-by":"crossref","first-page":"e161","DOI":"10.1093\/nar\/gku864","article-title":"svaseq: removing batch effects and other unwanted noise from sequencing data","volume":"42","author":"Leek","year":"2014","journal-title":"Nucleic Acids Res"},{"key":"2023013111522713800_btaa097-B25","doi-asserted-by":"crossref","first-page":"17224","DOI":"10.1073\/pnas.1413624111","article-title":"Comparison of the transcriptional landscapes between human and mouse tissues","volume":"111","author":"Lin","year":"2014","journal-title":"Proc. Natl. Acad. Sci. USA"},{"key":"2023013111522713800_btaa097-B26","doi-asserted-by":"crossref","first-page":"581","DOI":"10.1080\/01621459.2018.1497494","article-title":"Batch effects correction with unknown subtypes","volume":"114","author":"Luo","year":"2019","journal-title":"J. Am. Stat. Assoc"},{"key":"2023013111522713800_btaa097-B27","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Maaten","year":"2008","journal-title":"J. Mach. Learn. Res"},{"key":"2023013111522713800_btaa097-B28","doi-asserted-by":"crossref","first-page":"1179","DOI":"10.1093\/bioinformatics\/btw777","article-title":"Scater: pre-processing, quality control, normalization and visualization of single-cell RNA-seq data in R","volume":"33","author":"McCarthy","year":"2017","journal-title":"Bioinformatics"},{"key":"2023013111522713800_btaa097-B29","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1016\/j.cels.2016.09.002","article-title":"A single-cell transcriptome atlas of the human pancreas","volume":"3","author":"Muraro","year":"2016","journal-title":"Cell Syst"},{"key":"2023013111522713800_btaa097-B30","doi-asserted-by":"crossref","first-page":"1096","DOI":"10.1038\/nmeth.2639","article-title":"Smart-seq2 for sensitive full-length transcriptome profiling in single cells","volume":"10","author":"Picelli","year":"2013","journal-title":"Nat. Methods"},{"key":"2023013111522713800_btaa097-B31","doi-asserted-by":"crossref","first-page":"896","DOI":"10.1038\/nbt.2931","article-title":"Normalization of RNA-seq data using factor analysis of control genes or samples","volume":"32","author":"Risso","year":"2014","journal-title":"Nat. Biotechnol"},{"key":"2023013111522713800_btaa097-B32","doi-asserted-by":"crossref","first-page":"e47","DOI":"10.1093\/nar\/gkv007","article-title":"limma powers differential expression analyses for RNA-sequencing and microarray studies","volume":"43","author":"Ritchie","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2023013111522713800_btaa097-B33","doi-asserted-by":"crossref","first-page":"S412","DOI":"10.1137\/15M1026419","article-title":"ViennaCL-linear algebra library for multi- and many-core architectures","volume":"38","author":"Rupp","year":"2016","journal-title":"SIAM J. Sci. Comput"},{"key":"2023013111522713800_btaa097-B34","doi-asserted-by":"crossref","first-page":"495","DOI":"10.1038\/nbt.3192","article-title":"Spatial reconstruction of single-cell gene expression data","volume":"33","author":"Satija","year":"2015","journal-title":"Nat. Biotechnol"},{"key":"2023013111522713800_btaa097-B35","doi-asserted-by":"crossref","first-page":"593","DOI":"10.1016\/j.cmet.2016.08.020","article-title":"Single-cell transcriptome profiling of human pancreatic islets in health and type 2 diabetes","volume":"24","author":"Segerstolpe","year":"2016","journal-title":"Cell Metab"},{"key":"2023013111522713800_btaa097-B36","doi-asserted-by":"crossref","first-page":"2539","DOI":"10.1093\/bioinformatics\/btx196","article-title":"Removal of batch effects using distribution-matching residual networks","volume":"33","author":"Shaham","year":"2017","journal-title":"Bioinformatics"},{"key":"2023013111522713800_btaa097-B37","doi-asserted-by":"crossref","first-page":"268","DOI":"10.1186\/s12859-019-2855-9","article-title":"Batch correction evaluation framework using a-priori gene-gene associations: applied to the GTEx dataset","volume":"20","author":"Somekh","year":"2019","journal-title":"BMC Bioinformatics"},{"key":"2023013111522713800_btaa097-B38","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1038\/nrg3833","article-title":"Computational and analytical challenges in single-cell transcriptomics","volume":"16","author":"Stegle","year":"2015","journal-title":"Nat. Rev. Genet"},{"key":"2023013111522713800_btaa097-B39","doi-asserted-by":"crossref","first-page":"39921","DOI":"10.1038\/srep39921","article-title":"Batch effects and the effective design of single-cell gene expression studies","volume":"7","author":"Tung","year":"2017","journal-title":"Sci. Rep"},{"key":"2023013111522713800_btaa097-B40","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1038\/nn.3881","article-title":"Unbiased classification of sensory neuron types by large-scale single-cell RNA sequencing","volume":"18","author":"Usoskin","year":"2015","journal-title":"Nat. Neurosci"},{"key":"2023013111522713800_btaa097-B41","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-24277-4","volume-title":"ggplot2: Elegant Graphics for Data Analysis","author":"Wickham","year":"2016"},{"key":"2023013111522713800_btaa097-B42","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1016\/0169-7439(87)80084-9","article-title":"Principal component analysis","volume":"2","author":"Wold","year":"1987","journal-title":"Chemometr. Intell. Lab. Syst"},{"key":"2023013111522713800_btaa097-B43","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s10107-015-0892-3","article-title":"Coordinate descent algorithms","volume":"151","author":"Wright","year":"2015","journal-title":"Math. Program"},{"key":"2023013111522713800_btaa097-B44","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1093\/bioinformatics\/btu640","article-title":"PROPER: comprehensive power evaluation for differential expression using RNA-seq","volume":"31","author":"Wu","year":"2015","journal-title":"Bioinformatics"},{"key":"2023013111522713800_btaa097-B45","doi-asserted-by":"crossref","first-page":"608","DOI":"10.1016\/j.cmet.2016.08.018","article-title":"RNA sequencing of single human islet cells reveals type 2 diabetes genes","volume":"24","author":"Xin","year":"2016","journal-title":"Cell Metab"},{"key":"2023013111522713800_btaa097-B46","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1016\/S0076-6879(06)11004-6","article-title":"Use of external controls in microarray experiments","volume":"411","author":"Yang","year":"2006","journal-title":"Methods Enzymol"},{"key":"2023013111522713800_btaa097-B47","article-title":"Combat-seq: batch effect adjustment for RNA-seq count data","author":"Zhang","year":"2020","journal-title":"bioRxiv"},{"key":"2023013111522713800_btaa097-B48","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1091\/mbc.E17-07-0474","article-title":"Glycogen synthase kinase \u03b2 inhibition enhances Notch1 recycling","volume":"29","author":"Zheng","year":"2018","journal-title":"Mol. Biol. Cell"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaa097\/32916004\/btaa097.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/10\/3115\/48990996\/bioinformatics_36_10_3115.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/36\/10\/3115\/48990996\/bioinformatics_36_10_3115.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,31]],"date-time":"2024-07-31T00:16:27Z","timestamp":1722384987000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/36\/10\/3115\/5735411"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2020,2,13]]},"references-count":48,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2020,5,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaa097","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/669739","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2020,5,15]]},"published":{"date-parts":[[2020,2,13]]}}}