{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T11:43:22Z","timestamp":1767181402773,"version":"build-2238731810"},"update-to":[{"DOI":"10.1371\/journal.pcbi.1012241","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2024,7,22]],"date-time":"2024-07-22T00:00:00Z","timestamp":1721606400000}}],"reference-count":43,"publisher":"Public Library of Science (PLoS)","issue":"7","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000065","name":"National Institute of Neurological Disorders and Stroke","doi-asserted-by":"publisher","award":["R01 NS085211"],"award-info":[{"award-number":["R01 NS085211"]}],"id":[{"id":"10.13039\/100000065","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000065","name":"National Institute of Neurological Disorders and Stroke","doi-asserted-by":"publisher","award":["R01 NS060910"],"award-info":[{"award-number":["R01 NS060910"]}],"id":[{"id":"10.13039\/100000065","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000065","name":"National Institute of Neurological Disorders and Stroke","doi-asserted-by":"publisher","award":["Intramural Research Program"],"award-info":[{"award-number":["Intramural Research Program"]}],"id":[{"id":"10.13039\/100000065","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000890","name":"National Multiple Sclerosis Society","doi-asserted-by":"publisher","award":["RG-1707-28586"],"award-info":[{"award-number":["RG-1707-28586"]}],"id":[{"id":"10.13039\/100000890","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000025","name":"National Institute of Mental Health","doi-asserted-by":"publisher","award":["R01 MH123550"],"award-info":[{"award-number":["R01 MH123550"]}],"id":[{"id":"10.13039\/100000025","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000025","name":"National Institute of Mental Health","doi-asserted-by":"publisher","award":["R01 MH112847"],"award-info":[{"award-number":["R01 MH112847"]}],"id":[{"id":"10.13039\/100000025","id-type":"DOI","asserted-by":"publisher"}]},{"name":"University of Pennsylvania Center for Biomedical Image Computing and Analytics"}],"content-domain":{"domain":["www.ploscompbiol.org"],"crossmark-restriction":false},"short-container-title":["PLoS Comput Biol"],"abstract":"<jats:p>\n                    Dimension reduction tools preserving similarity and graph structure such as\n                    <jats:italic>t<\/jats:italic>\n                    -SNE and UMAP can capture complex biological patterns in high-dimensional data. However, these tools typically are not designed to separate effects of interest from unwanted effects due to confounders. We introduce the partial embedding (PARE) framework, which enables removal of confounders from any distance-based dimension reduction method. We then develop partial\n                    <jats:italic>t<\/jats:italic>\n                    -SNE and partial UMAP and apply these methods to genomic and neuroimaging data. For lower-dimensional visualization, our results show that the PARE framework can remove batch effects in single-cell sequencing data as well as separate clinical and technical variability in neuroimaging measures. We demonstrate that the PARE framework extends dimension reduction methods to highlight biological patterns of interest while effectively removing confounding effects.\n                  <\/jats:p>","DOI":"10.1371\/journal.pcbi.1012241","type":"journal-article","created":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T13:45:37Z","timestamp":1720619137000},"page":"e1012241","update-policy":"https:\/\/doi.org\/10.1371\/journal.pcbi.corrections_policy","source":"Crossref","is-referenced-by-count":2,"title":["PARE: A framework for removal of confounding effects from any distance-based dimension reduction method"],"prefix":"10.1371","volume":"20","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5027-6422","authenticated-orcid":true,"given":"Andrew A.","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kelly","family":"Clark","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Blake E.","family":"Dewey","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anna","family":"DuVal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nicole","family":"Pellegrini","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Govind","family":"Nair","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Youmna","family":"Jalkh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Samar","family":"Khalil","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jon","family":"Zurawski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7776-6472","authenticated-orcid":true,"given":"Peter A.","family":"Calabresi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2628-4334","authenticated-orcid":true,"given":"Daniel S.","family":"Reich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rohit","family":"Bakshi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haochang","family":"Shou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Russell T.","family":"Shinohara","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"name":"Alzheimer\u2019s Disease Neuroimaging Initiative, and North American Imaging in Multiple Sclerosis Cooperative","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"340","published-online":{"date-parts":[[2024,7,10]]},"reference":[{"issue":"1","key":"pcbi.1012241.ref001","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1038\/nbt.4314","article-title":"Dimensionality Reduction for Visualizing Single-Cell Data Using UMAP","volume":"37","author":"E Becht","year":"2019","journal-title":"Nature Biotechnology"},{"key":"pcbi.1012241.ref002","doi-asserted-by":"crossref","first-page":"9","DOI":"10.3389\/fninf.2016.00009","article-title":"A Tool for Interactive Data Visualization: Application to Over 10,000 Brain Imaging and Phantom MRI Data Sets","volume":"10","author":"SR Panta","year":"2016","journal-title":"Frontiers in Neuroinformatics"},{"issue":"4","key":"pcbi.1012241.ref003","doi-asserted-by":"crossref","first-page":"562","DOI":"10.1093\/biostatistics\/kxx053","article-title":"Missing Data and Technical Variability in Single-Cell RNA-sequencing Experiments","volume":"19","author":"SC Hicks","year":"2018","journal-title":"Biostatistics"},{"issue":"51","key":"pcbi.1012241.ref004","doi-asserted-by":"crossref","first-page":"14662","DOI":"10.1073\/pnas.1617317113","article-title":"Simultaneous Dimension Reduction and Adjustment for Confounding Variation","volume":"113","author":"Z Lin","year":"2016","journal-title":"Proceedings of the National Academy of Sciences"},{"issue":"13","key":"pcbi.1012241.ref005","doi-asserted-by":"crossref","first-page":"4099","DOI":"10.1093\/bioinformatics\/btaa276","article-title":"aPCoA: Covariate Adjusted Principal Coordinates Analysis","volume":"36","author":"Y Shi","year":"2020","journal-title":"Bioinformatics"},{"issue":"11","key":"pcbi.1012241.ref006","first-page":"3522","article-title":"Projected T-SNE for Batch Correction","volume":"36","author":"E Aliverti","year":"2020","journal-title":"Bioinformatics (Oxford, England)"},{"key":"pcbi.1012241.ref007","article-title":"Embedding to Reference T-SNE Space Addresses Batch Effects in Single-Cell Classification","author":"PG Poli\u010dar","year":"2021","journal-title":"Machine Learning"},{"issue":"Nov","key":"pcbi.1012241.ref008","first-page":"2579","article-title":"Visualizing Data Using T-SNE","volume":"9","author":"L van der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"pcbi.1012241.ref009","unstructured":"McInnes L, Healy J, Melville J. UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction. arXiv:180203426 [cs, stat]. 2020 Sep."},{"issue":"6","key":"pcbi.1012241.ref010","doi-asserted-by":"crossref","first-page":"1373","DOI":"10.1162\/089976603321780317","article-title":"Laplacian Eigenmaps for Dimensionality Reduction and Data Representation","volume":"15","author":"M Belkin","year":"2003","journal-title":"Neural Computation"},{"issue":"21","key":"pcbi.1012241.ref011","doi-asserted-by":"crossref","first-page":"7426","DOI":"10.1073\/pnas.0500334102","article-title":"Geometric Diffusions as a Tool for Harmonic Analysis and Structure Definition of Data: Diffusion Maps","volume":"102","author":"RR Coifman","year":"2005","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"pcbi.1012241.ref012","doi-asserted-by":"crossref","unstructured":"Tang J, Liu J, Zhang M, Mei Q. Visualizing Large-scale and High-dimensional Data. In: Proceedings of the 25th International Conference on World Wide Web. WWW\u201916. Republic and Canton of Geneva, CHE: International World Wide Web Conferences Steering Committee; 2016. p. 287\u201397.","DOI":"10.1145\/2872427.2883041"},{"key":"pcbi.1012241.ref013","unstructured":"Amid E, Warmuth MK. TriMap: Large-scale Dimensionality Reduction Using Triplets. arXiv:191000204 [cs, stat]. 2022 Mar."},{"issue":"6","key":"pcbi.1012241.ref014","doi-asserted-by":"crossref","first-page":"e98679","DOI":"10.1371\/journal.pone.0098679","article-title":"ForceAtlas2, a Continuous Graph Layout Algorithm for Handy Network Visualization Designed for the Gephi Software","volume":"9","author":"M Jacomy","year":"2014","journal-title":"PLOS ONE"},{"issue":"3\/4","key":"pcbi.1012241.ref015","doi-asserted-by":"crossref","first-page":"325","DOI":"10.2307\/2333639","article-title":"Some Distance Properties of Latent Root and Vector Methods Used in Multivariate Analysis","volume":"53","author":"JC Gower","year":"1966","journal-title":"Biometrika"},{"issue":"2","key":"pcbi.1012241.ref016","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1007\/BF02294026","article-title":"The Analytical Solution of the Additive Constant Problem","volume":"48","author":"F Cailliez","year":"1983","journal-title":"Psychometrika"},{"issue":"1","key":"pcbi.1012241.ref017","doi-asserted-by":"crossref","first-page":"290","DOI":"10.1890\/0012-9658(2001)082[0290:FMMTCD]2.0.CO;2","article-title":"Fitting Multivariate Models to Community Data: A Comment on Distance-Based Redundancy Analysis","volume":"82","author":"BH McArdle","year":"2001","journal-title":"Ecology"},{"issue":"1","key":"pcbi.1012241.ref018","first-page":"32","article-title":"A New Method for Non-Parametric Multivariate Analysis of Variance","volume":"26","author":"MJ Anderson","year":"2001","journal-title":"Austral Ecology"},{"key":"pcbi.1012241.ref019","article-title":"Statistical Properties of Multivariate Distance Matrix Regression for High-Dimensional Data Analysis","volume":"3","author":"NJ Schork","year":"2012","journal-title":"Frontiers in Genetics"},{"issue":"4","key":"pcbi.1012241.ref020","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1016\/j.cels.2016.08.011","article-title":"A Single-Cell Transcriptomic Map of the Human and Mouse Pancreas Reveals Inter- and Intra-cell Population Structure","volume":"3","author":"M Baron","year":"2016","journal-title":"Cell Systems"},{"issue":"2","key":"pcbi.1012241.ref021","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1101\/gr.212720.116","article-title":"Single-Cell Transcriptomes Identify Human Islet Cell Signatures and Reveal Cell-Type-Specific Expression Changes in Type 2 Diabetes","volume":"27","author":"N Lawlor","year":"2017","journal-title":"Genome Research"},{"issue":"4","key":"pcbi.1012241.ref022","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1016\/j.cels.2016.09.002","article-title":"A Single-Cell Transcriptome Atlas of the Human Pancreas","volume":"3","author":"MJ Muraro","year":"2016","journal-title":"Cell Systems"},{"issue":"4","key":"pcbi.1012241.ref023","doi-asserted-by":"crossref","first-page":"593","DOI":"10.1016\/j.cmet.2016.08.020","article-title":"Single-Cell Transcriptome Profiling of Human Pancreatic Islets in Health and Type 2 Diabetes","volume":"24","author":"\u00c5 Segerstolpe","year":"2016","journal-title":"Cell Metabolism"},{"key":"pcbi.1012241.ref024","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1186\/s13059-016-0947-7","article-title":"Pooling across Cells to Normalize Single-Cell RNA Sequencing Data with Many Zero Counts","volume":"17","author":"ATL Lun","year":"2016","journal-title":"Genome Biology"},{"issue":"7","key":"pcbi.1012241.ref025","doi-asserted-by":"crossref","first-page":"e1010184","DOI":"10.1371\/journal.pcbi.1010184","article-title":"AC-PCoA: Adjustment for Confounding Factors Using Principal Coordinate Analysis","volume":"18","author":"Y Wang","year":"2022","journal-title":"PLOS Computational Biology"},{"issue":"1","key":"pcbi.1012241.ref026","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1093\/biostatistics\/kxj037","article-title":"Adjusting Batch Effects in Microarray Expression Data Using Empirical Bayes Methods","volume":"8","author":"WE Johnson","year":"2007","journal-title":"Biostatistics (Oxford, England)"},{"issue":"5","key":"pcbi.1012241.ref027","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1038\/nbt.4091","article-title":"Batch Effects in Single-Cell RNA-sequencing Data Are Corrected by Matching Mutual Nearest Neighbors","volume":"36","author":"L Haghverdi","year":"2018","journal-title":"Nature Biotechnology"},{"issue":"12","key":"pcbi.1012241.ref028","doi-asserted-by":"crossref","first-page":"1289","DOI":"10.1038\/s41592-019-0619-0","article-title":"Fast, Sensitive and Accurate Integration of Single-Cell Data with Harmony","volume":"16","author":"I Korsunsky","year":"2019","journal-title":"Nature Methods"},{"issue":"1","key":"pcbi.1012241.ref029","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1186\/s13059-019-1850-9","article-title":"A Benchmark of Batch-Effect Correction Methods for Single-Cell RNA Sequencing Data","volume":"21","author":"HTN Tran","year":"2020","journal-title":"Genome Biology"},{"issue":"1","key":"pcbi.1012241.ref030","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1038\/s41592-021-01336-8","article-title":"Benchmarking Atlas-Level Data Integration in Single-Cell Genomics","volume":"19","author":"MD Luecken","year":"2022","journal-title":"Nature Methods"},{"issue":"2","key":"pcbi.1012241.ref031","doi-asserted-by":"crossref","first-page":"106","DOI":"10.14348\/molcells.2023.0009","article-title":"Integration of Single-Cell RNA-Seq Datasets: A Review of Computational Methods","volume":"46","author":"Y Ryu","year":"2023","journal-title":"Molecules and Cells"},{"key":"pcbi.1012241.ref032","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","article-title":"Silhouettes: A Graphical Aid to the Interpretation and Validation of Cluster Analysis","volume":"20","author":"PJ Rousseeuw","year":"1987","journal-title":"Journal of Computational and Applied Mathematics"},{"issue":"1","key":"pcbi.1012241.ref033","doi-asserted-by":"crossref","first-page":"165","DOI":"10.3233\/JAD-190283","article-title":"Longitudinal Mapping of Cortical Thickness Measurements: An Alzheimer\u2019s Disease Neuroimaging Initiative-Based Evaluation Study","volume":"71","author":"NJ Tustison","year":"2019","journal-title":"Journal of Alzheimer\u2019s Disease"},{"key":"pcbi.1012241.ref034","doi-asserted-by":"crossref","first-page":"117129","DOI":"10.1016\/j.neuroimage.2020.117129","article-title":"Longitudinal ComBat: A Method for Harmonizing Longitudinal Multi-Scanner Imaging Data","volume":"220","author":"JC Beer","year":"2020","journal-title":"NeuroImage"},{"issue":"8","key":"pcbi.1012241.ref035","doi-asserted-by":"crossref","first-page":"653","DOI":"10.1016\/S1474-4422(21)00095-8","article-title":"2021 MAGNIMS-CMSC-NAIMS Consensus Recommendations on the Use of MRI in Patients with Multiple Sclerosis","volume":"20","author":"MP Wattjes","year":"2021","journal-title":"The Lancet Neurology"},{"issue":"6","key":"pcbi.1012241.ref036","doi-asserted-by":"crossref","first-page":"1310","DOI":"10.1109\/TMI.2010.2046908","article-title":"N4ITK: Improved N3 Bias Correction","volume":"29","author":"NJ Tustison","year":"2010","journal-title":"IEEE Transactions on Medical Imaging"},{"issue":"12","key":"pcbi.1012241.ref037","doi-asserted-by":"crossref","first-page":"1566","DOI":"10.1016\/j.acra.2013.09.010","article-title":"Multi-Atlas Skull-Stripping","volume":"20","author":"J Doshi","year":"2013","journal-title":"Academic Radiology"},{"key":"pcbi.1012241.ref038","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1016\/j.nicl.2014.08.008","article-title":"Statistical Normalization Techniques for Magnetic Resonance Imaging","volume":"6","author":"RT Shinohara","year":"2014","journal-title":"NeuroImage: Clinical"},{"key":"pcbi.1012241.ref039","doi-asserted-by":"crossref","DOI":"10.3389\/fninf.2013.00027","article-title":"Multi-Atlas Segmentation with Joint Label Fusion and Corrective Learning\u2014an Open Source Implementation","volume":"7","author":"H Wang","year":"2013","journal-title":"Frontiers in Neuroinformatics"},{"issue":"8","key":"pcbi.1012241.ref040","doi-asserted-by":"crossref","first-page":"2036","DOI":"10.1093\/brain\/awp105","article-title":"Early Diagnosis of Alzheimer\u2019s Disease Using Cortical Thickness: Impact of Cognitive Reserve","volume":"132","author":"O Querbes","year":"2009","journal-title":"Brain"},{"issue":"8","key":"pcbi.1012241.ref041","doi-asserted-by":"crossref","first-page":"e1011288","DOI":"10.1371\/journal.pcbi.1011288","article-title":"The Specious Art of Single-Cell Genomics","volume":"19","author":"T Chari","year":"2023","journal-title":"PLOS Computational Biology"},{"key":"pcbi.1012241.ref042","doi-asserted-by":"crossref","first-page":"kxad033","DOI":"10.1093\/biostatistics\/kxad033","article-title":"Similarity-Based Multimodal Regression","author":"AA Chen","year":"2023","journal-title":"Biostatistics"},{"issue":"6","key":"pcbi.1012241.ref043","doi-asserted-by":"crossref","first-page":"2382","DOI":"10.1214\/14-AOS1255","article-title":"Partial Distance Correlation with Methods for Dissimilarities","volume":"42","author":"GJ Sz\u00e9kely","year":"2014","journal-title":"The Annals of Statistics"}],"updated-by":[{"DOI":"10.1371\/journal.pcbi.1012241","type":"new_version","label":"New version","source":"publisher","updated":{"date-parts":[[2024,7,22]],"date-time":"2024-07-22T00:00:00Z","timestamp":1721606400000}}],"container-title":["PLOS Computational Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1012241","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,22]],"date-time":"2024-07-22T13:58:37Z","timestamp":1721656717000},"score":1,"resource":{"primary":{"URL":"https:\/\/dx.plos.org\/10.1371\/journal.pcbi.1012241"}},"subtitle":[],"editor":[{"given":"Matthias Helge","family":"Hennig","sequence":"first","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":43,"journal-issue":{"issue":"7","published-online":{"date-parts":[[2024,7,10]]}},"URL":"https:\/\/doi.org\/10.1371\/journal.pcbi.1012241","relation":{},"ISSN":["1553-7358"],"issn-type":[{"value":"1553-7358","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7,10]]}}}