{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T20:34:04Z","timestamp":1772138044266,"version":"3.50.1"},"reference-count":47,"publisher":"Oxford University Press (OUP)","issue":"16","license":[{"start":{"date-parts":[[2021,2,9]],"date-time":"2021-02-09T00:00:00Z","timestamp":1612828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"DOI":"10.13039\/100000923","name":"Silicon Valley Community Foundation","doi-asserted-by":"publisher","award":["CZF2019-002443"],"award-info":[{"award-number":["CZF2019-002443"]}],"id":[{"id":"10.13039\/100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["2U24CA180996"],"award-info":[{"award-number":["2U24CA180996"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"name":"AIRC Foundation","award":["IG 2018-ID.21846"],"award-info":[{"award-number":["IG 2018-ID.21846"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,8,25]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Data transformations are an important step in the analysis of RNA-seq data. Nonetheless, the impact of transformation on the outcome of unsupervised clustering procedures is still unclear.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>Here, we present an Asymmetric Winsorization per-Sample Transformation (AWST), which is robust to data perturbations and removes the need for selecting the most informative genes prior to sample clustering. Our procedure leads to robust and biologically meaningful clusters both in bulk and in single-cell applications.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>The AWST method is available at https:\/\/github.com\/drisso\/awst. The code to reproduce the analyses is available at https:\/\/github.com\/drisso\/awst_analysis<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Supplementary information<\/jats:title>\n                    <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btab091","type":"journal-article","created":{"date-parts":[[2021,2,5]],"date-time":"2021-02-05T15:31:12Z","timestamp":1612539072000},"page":"2356-2364","source":"Crossref","is-referenced-by-count":8,"title":["Per-sample standardization and asymmetric winsorization lead to accurate clustering of RNA-seq expression profiles"],"prefix":"10.1093","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8508-5012","authenticated-orcid":false,"given":"Davide","family":"Risso","sequence":"first","affiliation":[{"name":"Department of Statistical Sciences, Universit\u00e0 degli Studi di Padova , Padova, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8298-9777","authenticated-orcid":false,"given":"Stefano Maria","family":"Pagnotta","sequence":"additional","affiliation":[{"name":"Department of Science and Technology, Universit\u00e0 degli Studi del Sannio , Benevento, Italy"}]}],"member":"286","published-online":{"date-parts":[[2021,2,9]]},"reference":[{"key":"2023051609145936900_btab091-B1","doi-asserted-by":"crossref","first-page":"R106","DOI":"10.1186\/gb-2010-11-10-r106","article-title":"Differential expression analysis for sequence count data","volume":"11","author":"Anders","year":"2010","journal-title":"Genome Biol"},{"key":"2023051609145936900_btab091-B2","first-page":"171","article-title":"A class of distributions which includes the normal ones","volume":"12","author":"Azzalini","year":"1985","journal-title":"Scand. J. Stat"},{"key":"2023051609145936900_btab091-B3","doi-asserted-by":"crossref","first-page":"P10008","DOI":"10.1088\/1742-5468\/2008\/10\/P10008","article-title":"Fast unfolding of communities in large networks","volume":"2008","author":"Blondel","year":"2008","journal-title":"J. Stat. Mech"},{"key":"2023051609145936900_btab091-B4","doi-asserted-by":"crossref","first-page":"94","DOI":"10.1186\/1471-2105-11-94","article-title":"Evaluation of statistical methods for normalization and differential expression in mrna-seq experiments","volume":"11","author":"Bullard","year":"2010","journal-title":"BMC Bioinform"},{"key":"2023051609145936900_btab091-B5","first-page":"1","article-title":"A dendrite method for cluster analysis","volume":"3","author":"Calinski","year":"1974","journal-title":"Commun. Stat"},{"key":"2023051609145936900_btab091-B6","doi-asserted-by":"crossref","first-page":"550","DOI":"10.1016\/j.cell.2015.12.028","article-title":"Molecular profiling reveals biologically discrete subsets and pathways of progression in diffuse glioma","volume":"164","author":"Ceccarelli","year":"2016","journal-title":"Cell"},{"key":"2023051609145936900_btab091-B7","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1093\/bib\/bbs046","article-title":"A comprehensive evaluation of normalization methods for illumina high-throughput RNA sequencing data analysis","volume":"14","author":"Dillies","year":"2013","journal-title":"Brief. Bioinform"},{"key":"2023051609145936900_btab091-B8","doi-asserted-by":"crossref","first-page":"research0036.1","DOI":"10.1186\/gb-2002-3-7-research0036","article-title":"A prediction-based resampling method for estimating the number of clusters in a dataset","volume":"3","author":"Dudoit","year":"2002","journal-title":"Genome Biol"},{"key":"2023051609145936900_btab091-B9","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1198\/016214502753479248","article-title":"Comparison of discrimination methods for the classification of tumors using gene expression data","volume":"97","author":"Dudoit","year":"2002","journal-title":"J. Am. Stat. Assoc"},{"key":"2023051609145936900_btab091-B10","doi-asserted-by":"crossref","first-page":"2499","DOI":"10.1056\/NEJMoa1407279","article-title":"Glioma groups based on 1p\/19q, IDH, and TERT promoter mutations in tumors","volume":"372","author":"Eckel-Passow","year":"2015","journal-title":"N. Engl. J. Med"},{"key":"2023051609145936900_btab091-B11","first-page":"105","article-title":"Log-transformation and its implications for data analysis","volume":"26","author":"Feng","year":"2014","journal-title":"Shanghai Arch. Psychiatry"},{"key":"2023051609145936900_btab091-B12","doi-asserted-by":"crossref","first-page":"545","DOI":"10.1093\/bib\/bbz158","article-title":"Toward a gold standard for benchmarking gene set enrichment analysis","volume":"22","author":"Geistlinger","year":"2021","journal-title":"Brief. Bioinform"},{"key":"2023051609145936900_btab091-B13","doi-asserted-by":"crossref","first-page":"3625","DOI":"10.1093\/bioinformatics\/btv425","article-title":"Statistical models for RNA-seq data derived from a two-condition 48-replicate experiment","volume":"31","author":"Gierli\u0144ski","year":"2015","journal-title":"Bioinformatics"},{"key":"2023051609145936900_btab091-B14","volume-title":"RaceID: Identification of Cell Types and Inference of Lineage Trees from Single-Cell RNA-Seq Data. R package version 0.2.1","author":"Gr\u00fcn","year":"2020"},{"key":"2023051609145936900_btab091-B15","doi-asserted-by":"crossref","first-page":"778","DOI":"10.1186\/1471-2164-14-778","article-title":"Finding the active genes in deep RNA-seq gene expression studies","volume":"14","author":"Hart","year":"2013","journal-title":"BMC Genomics"},{"key":"2023051609145936900_btab091-B16","doi-asserted-by":"crossref","first-page":"497","DOI":"10.1038\/msb.2011.28","article-title":"RNA sequencing reveals two major classes of gene expression levels in metazoan cells","volume":"7","author":"Hebenstreit","year":"2011","journal-title":"Mol. Syst. Biol"},{"key":"2023051609145936900_btab091-B17","doi-asserted-by":"crossref","first-page":"576","DOI":"10.1093\/bioinformatics\/18.4.576","article-title":"Making sense of microarray data distributions","volume":"18","author":"Hoyle","year":"2002","journal-title":"Bioinformatics"},{"key":"2023051609145936900_btab091-B18","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1007\/BF01908075","article-title":"Comparing partitions","volume":"2","author":"Hubert","year":"1985","journal-title":"J. Class"},{"key":"2023051609145936900_btab091-B19","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1016\/j.ymeth.2017.07.023","article-title":"Clustering of RNA-seq samples: comparison study on cancer data","volume":"132","author":"Jaskowiak","year":"2018","journal-title":"Methods"},{"key":"2023051609145936900_btab091-B20","doi-asserted-by":"crossref","first-page":"11263","DOI":"10.1038\/ncomms11263","article-title":"Integrated multi-omics analysis of oligodendroglial tumours identifies three subgroups of 1p\/19q co-deleted gliomas","volume":"7","author":"Kamoun","year":"2016","journal-title":"Nat. Commun"},{"key":"2023051609145936900_btab091-B21","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316801","volume-title":"Finding Groups in Data: An Introduction to Cluster Analysis","author":"Kaufman","year":"1990"},{"key":"2023051609145936900_btab091-B22","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1038\/nmeth.4236","article-title":"Sc3: consensus clustering of single-cell RNA-seq data","volume":"14","author":"Kiselev","year":"2017","journal-title":"Nat. Methods"},{"key":"2023051609145936900_btab091-B23","doi-asserted-by":"crossref","first-page":"708","DOI":"10.1038\/ng.3818","article-title":"Reference component analysis of single-cell transcriptomes elucidates cellular heterogeneity in human colorectal tumors","volume":"49","author":"Li","year":"2017","journal-title":"Nat. Genet"},{"key":"2023051609145936900_btab091-B24","doi-asserted-by":"crossref","first-page":"550","DOI":"10.1186\/s13059-014-0550-8","article-title":"Moderated estimation of fold change and dispersion for RNA-seq data with deseq2","volume":"15","author":"Love","year":"2014","journal-title":"Genome Biol"},{"key":"2023051609145936900_btab091-B25","doi-asserted-by":"publisher","DOI":"10.1101\/404962","article-title":"Overcoming systematic errors caused by log-transformation of normalized single-cell RNA sequencing data","author":"Lun","year":"2018"},{"key":"2023051609145936900_btab091-B26","first-page":"2122","article-title":"A step-by-step workflow for low-level analysis of single-cell RNA-seq data with bioconductor","volume":"5","author":"Lun","year":"2016","journal-title":"F1000Research"},{"key":"2023051609145936900_btab091-B27","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1186\/s13059-019-1662-y","article-title":"Emptydrops: distinguishing cells from empty droplets in droplet-based single-cell RNA sequencing data","volume":"20","author":"Lun","year":"2019","journal-title":"Genome Biol"},{"key":"2023051609145936900_btab091-B28","doi-asserted-by":"crossref","first-page":"4288","DOI":"10.1093\/nar\/gks042","article-title":"Differential expression analysis of multifactor RNA-seq experiments with respect to biological variation","volume":"40","author":"McCarthy","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2023051609145936900_btab091-B29","doi-asserted-by":"publisher","DOI":"10.1038\/nbt.4314","article-title":"UMAP: uniform manifold approximation and projection for dimension reduction","author":"McInnes","year":"2018"},{"key":"2023051609145936900_btab091-B30","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/A:1023949509487","article-title":"Consensus clustering: a resampling-based method for class discovery and visualization of gene expression microarray data","volume":"52","author":"Monti","year":"2003","journal-title":"Mach. Learn"},{"key":"2023051609145936900_btab091-B31","doi-asserted-by":"crossref","first-page":"510","DOI":"10.1016\/j.ccr.2010.03.017","article-title":"Identification of a CPG island methylator phenotype that defines a distinct subgroup of glioma","volume":"17","author":"Noushmehr","year":"2010","journal-title":"Cancer Cell"},{"key":"2023051609145936900_btab091-B32","doi-asserted-by":"crossref","first-page":"627","DOI":"10.1093\/biostatistics\/kxv018","article-title":"Shape analysis of high-throughput transcriptomics experiment data","volume":"16","author":"Okrah","year":"2015","journal-title":"Biostatistics"},{"key":"2023051609145936900_btab091-B33","doi-asserted-by":"crossref","first-page":"244","DOI":"10.1016\/j.ccell.2018.01.003","article-title":"The integrated genomic landscape of thymic epithelial tumors","volume":"33","author":"Radovich","year":"2018","journal-title":"Cancer Cell"},{"key":"2023051609145936900_btab091-B34","doi-asserted-by":"crossref","first-page":"e1006378","DOI":"10.1371\/journal.pcbi.1006378","article-title":"clusterexperiment and rsec: a bioconductor package and framework for clustering of single-cell and other large gene expression datasets","volume":"14","author":"Risso","year":"2018","journal-title":"PLoS Comput. Biol"},{"key":"2023051609145936900_btab091-B35","doi-asserted-by":"crossref","first-page":"2881","DOI":"10.1093\/bioinformatics\/btm453","article-title":"Moderated statistical tests for assessing differences in tag abundance","volume":"23","author":"Robinson","year":"2007","journal-title":"Bioinformatics"},{"key":"2023051609145936900_btab091-B36","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","article-title":"Silhouettes: a graphical aid to the interpretation and validation of cluster analysis","volume":"20","author":"Rousseeuw","year":"1987","journal-title":"J. Comput. Appl. Math"},{"key":"2023051609145936900_btab091-B37","doi-asserted-by":"crossref","first-page":"495","DOI":"10.1038\/nbt.3192","article-title":"Spatial reconstruction of single-cell gene expression data","volume":"33","author":"Satija","year":"2015","journal-title":"Nat. Biotechnol"},{"key":"2023051609145936900_btab091-B38","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1186\/s12881-019-0909-5","article-title":"Copy number variation is highly correlated with differential gene expression: a pan-cancer study","volume":"20","author":"Shao","year":"2019","journal-title":"BMC Med. Genet"},{"key":"2023051609145936900_btab091-B39","doi-asserted-by":"crossref","first-page":"865","DOI":"10.1038\/nmeth.4380","article-title":"Simultaneous epitope and transcriptome measurement in single cells","volume":"14","author":"Stoeckius","year":"2017","journal-title":"Nat. Methods"},{"key":"2023051609145936900_btab091-B40","doi-asserted-by":"crossref","first-page":"903","DOI":"10.1038\/nbt.2957","article-title":"A comprehensive assessment of RNA-seq accuracy, reproducibility and information content by the sequencing quality control consortium","volume":"32","author":"Su","year":"2014","journal-title":"Nat. Biotechnol"},{"key":"2023051609145936900_btab091-B41","doi-asserted-by":"crossref","first-page":"2481","DOI":"10.1056\/NEJMoa1402121","article-title":"Comprehensive, integrative genomic analysis of diffuse lower-grade gliomas","volume":"372","year":"2015","journal-title":"N. Engl. J. Med"},{"key":"2023051609145936900_btab091-B42","doi-asserted-by":"crossref","first-page":"479","DOI":"10.1038\/s41592-019-0425-8","article-title":"Benchmarking single cell RNA-sequencing analysis pipelines using mixture control experiments","volume":"16","author":"Tian","year":"2019","journal-title":"Nat. Methods"},{"key":"2023051609145936900_btab091-B43","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1186\/s13059-019-1861-6","article-title":"Feature selection and dimension reduction for single-cell RNA-seq based on a multinomial model","volume":"20","author":"Townes","year":"2019","journal-title":"Genome Biol"},{"key":"2023051609145936900_btab091-B44","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1214\/aoms\/1177704711","article-title":"The future of data analysis","volume":"33","author":"Tukey","year":"1962","journal-title":"Ann. Math. Stat"},{"key":"2023051609145936900_btab091-B45","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res"},{"key":"2023051609145936900_btab091-B46","doi-asserted-by":"crossref","first-page":"e0219102","DOI":"10.1371\/journal.pone.0219102","article-title":"Cluster analysis on high dimensional RNA-seq data with applications to cancer research\u2014an evaluation study","volume":"14","author":"Vidman","year":"2019","journal-title":"PLoS One"},{"key":"2023051609145936900_btab091-B47","doi-asserted-by":"crossref","first-page":"1572","DOI":"10.1093\/bioinformatics\/btq170","article-title":"Consensusclusterplus: a class discovery tool with confidence assessments and item tracking","volume":"26","author":"Wilkerson","year":"2010","journal-title":"Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btab091\/36394843\/btab091.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/16\/2356\/50339623\/btab091.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/37\/16\/2356\/50339623\/btab091.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T05:18:41Z","timestamp":1684214321000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/37\/16\/2356\/6131783"}},"subtitle":[],"editor":[{"given":"Jan","family":"Gorodkin","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2021,2,9]]},"references-count":47,"journal-issue":{"issue":"16","published-print":{"date-parts":[[2021,8,25]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btab091","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2020.06.04.134916","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2021,8,15]]},"published":{"date-parts":[[2021,2,9]]}}}