{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,11]],"date-time":"2025-11-11T22:15:45Z","timestamp":1762899345660},"reference-count":36,"publisher":"Oxford University Press (OUP)","issue":"3","license":[{"start":{"date-parts":[[2016,10,22]],"date-time":"2016-10-22T00:00:00Z","timestamp":1477094400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/about_us\/legal\/notices"}],"funder":[{"name":"German Science Foundation","award":["BO3139\/3-1","BO3139\/2-3"],"award-info":[{"award-number":["BO3139\/3-1","BO3139\/2-3"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,2,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>To date most medical tests derived by applying classification methods to high-dimensional molecular data are hardly used in clinical practice. This is partly because the prediction error resulting when applying them to external data is usually much higher than internal error as evaluated through within-study validation procedures. We suggest the use of addon normalization and addon batch effect removal techniques in this context to reduce systematic differences between external data and the original dataset with the aim to improve prediction performance.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We evaluate the impact of addon normalization and seven batch effect removal methods on cross-study prediction performance for several common classifiers using a large collection of microarray gene expression datasets, showing that some of these techniques reduce prediction error.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and Implementation<\/jats:title>\n                  <jats:p>All investigated addon methods are implemented in our R package bapred.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Supplementary information<\/jats:title>\n                  <jats:p>Supplementary data are available at Bioinformatics online.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btw650","type":"journal-article","created":{"date-parts":[[2016,10,11]],"date-time":"2016-10-11T14:19:40Z","timestamp":1476195580000},"page":"397-404","source":"Crossref","is-referenced-by-count":23,"title":["Improving cross-study prediction through addon batch effect adjustment or addon normalization"],"prefix":"10.1093","volume":"33","author":[{"given":"Roman","family":"Hornung","sequence":"first","affiliation":[{"name":"Department of Medical Informatics, Biometry and Epidemiology, University of Munich, Munich, Germany"}]},{"given":"David","family":"Causeur","sequence":"additional","affiliation":[{"name":"Applied Mathematics Department, Agrocampus Ouest, Rennes, France"}]},{"given":"Christoph","family":"Bernau","sequence":"additional","affiliation":[{"name":"Leibniz Supercomputing Center, Garching, Germany"}]},{"given":"Anne-Laure","family":"Boulesteix","sequence":"additional","affiliation":[{"name":"Department of Medical Informatics, Biometry and Epidemiology, University of Munich, Munich, Germany"}]}],"member":"286","published-online":{"date-parts":[[2016,10,22]]},"reference":[{"key":"2023020204403222100_btw650-B1","doi-asserted-by":"crossref","first-page":"i105","DOI":"10.1093\/bioinformatics\/btu279","article-title":"Cross-study validation for the assessment of prediction algorithms","volume":"30","author":"Bernau","year":"2014","journal-title":"Bioinformatics"},{"key":"2023020204403222100_btw650-B2","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1093\/bioinformatics\/19.2.185","article-title":"A comparison of normalization methods for high density oligonucleotide array data based on variance and bias","volume":"19","author":"Bolstad","year":"2003","journal-title":"Bioinformatics"},{"key":"2023020204403222100_btw650-B3","doi-asserted-by":"crossref","first-page":"2664","DOI":"10.1093\/bioinformatics\/btt458","article-title":"On representative and illustrative comparisons with real data in bioinformatics: response to the letter to the editor by Smith et al","volume":"29","author":"Boulesteix","year":"2013","journal-title":"Bioinformatics"},{"key":"2023020204403222100_btw650-B4","doi-asserted-by":"crossref","first-page":"e61562.","DOI":"10.1371\/journal.pone.0061562","article-title":"A plea for neutral comparison studies in computational sciences","volume":"8","author":"Boulesteix","year":"2013","journal-title":"PLoS ONE"},{"key":"2023020204403222100_btw650-B5","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1080\/00031305.2015.1005128","article-title":"A statistical framework for hypothesis testing in real data comparison studies","volume":"69","author":"Boulesteix","year":"2015","journal-title":"Am. Stat"},{"key":"2023020204403222100_btw650-B6","first-page":"477","article-title":"Boosting algorithms: regularization, prediction and model fitting","volume":"22","author":"B\u00fchlmann","year":"2007","journal-title":"Stat. Sci"},{"key":"2023020204403222100_btw650-B7","first-page":"187","article-title":"Response to Mease and Wyner, evidence contrary to the statistical view of boosting","volume":"9","author":"B\u00fchlmann","year":"2008","journal-title":"J. Mach. Learn. Res"},{"key":"2023020204403222100_btw650-B8","doi-asserted-by":"crossref","first-page":"94.","DOI":"10.1186\/1471-2105-11-94","article-title":"Evaluation of statistical methods for normalization and differential expression in mRNA-Seq experiments","volume":"11","author":"Bullard","year":"2010","journal-title":"BMC Bioinformatics"},{"key":"2023020204403222100_btw650-B9","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1093\/bib\/bbq073","article-title":"An empirical assessment of validation practices for molecular classifiers","volume":"12","author":"Castaldi","year":"2011","journal-title":"Brief. Bioinf"},{"key":"2023020204403222100_btw650-B10","doi-asserted-by":"crossref","first-page":"809","DOI":"10.1021\/acs.jproteome.5b00852","article-title":"Testing and validation of computational methods for mass spectrometry","volume":"15","author":"Gatto","year":"2016","journal-title":"J. Proteome Res"},{"key":"2023020204403222100_btw650-B11","doi-asserted-by":"crossref","DOI":"10.2202\/1544-6115.1071","article-title":"Classifying gene expression profiles from pairwise mRNA comparisons","volume":"3","author":"Geman","year":"2004","journal-title":"Stat. Appl. Genet. Mol. Biol"},{"key":"2023020204403222100_btw650-B12","first-page":"583","volume-title":"Microarray Classification from Several Two-Gene Expression Comparisons","author":"Geman","year":"2008"},{"key":"2023020204403222100_btw650-B13","doi-asserted-by":"crossref","first-page":"204","DOI":"10.1093\/biostatistics\/kxr054","article-title":"Removing technical variability in RNA-seq data using conditional quantile normalization","volume":"13","author":"Hansen","year":"2012","journal-title":"Biostatistics"},{"key":"2023020204403222100_btw650-B14","author":"Hornung","year":"2016"},{"key":"2023020204403222100_btw650-B15","doi-asserted-by":"crossref","first-page":"95.","DOI":"10.1186\/s12874-015-0088-9","article-title":"A measure of the impact of CV incompleteness on prediction error estimation with application to PCA and normalization","volume":"15","author":"Hornung","year":"2015","journal-title":"BMC Med. Res. Methodol"},{"key":"2023020204403222100_btw650-B16","doi-asserted-by":"crossref","first-page":"27.","DOI":"10.1186\/s12859-015-0870-z","article-title":"Combining location-and-scale batch effect adjustment with data cleaning by latent factor adjustment","volume":"17","author":"Hornung","year":"2016","journal-title":"BMC Bioinformatics"},{"key":"2023020204403222100_btw650-B17","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1093\/biostatistics\/4.2.249","article-title":"Exploration, normalization, and summaries of high density oligonucleotide array probe level data","volume":"4","author":"Irizarry","year":"2003","journal-title":"Biostatistics"},{"key":"2023020204403222100_btw650-B18","doi-asserted-by":"crossref","first-page":"119.","DOI":"10.1186\/1471-2105-14-119","article-title":"An AUC-based permutation variable importance measure for random forests","volume":"14","author":"Janitza","year":"2013","journal-title":"BMC Bioinformatics"},{"key":"2023020204403222100_btw650-B19","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1093\/biostatistics\/kxj037","article-title":"Adjusting batch effects in microarray expression data using empirical Bayes methods","volume":"8","author":"Johnson","year":"2007","journal-title":"Biostatistics"},{"key":"2023020204403222100_btw650-B20","doi-asserted-by":"crossref","first-page":"D1113","DOI":"10.1093\/nar\/gku1057","article-title":"ArrayExpress update\u2013simplifying data submissions","volume":"43","author":"Kolesnikov","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2023020204403222100_btw650-B21","doi-asserted-by":"crossref","first-page":"e22.","DOI":"10.1371\/journal.pcbi.0040022","article-title":"Microarray based diagnosis profits from better documentation of gene expression signatures","volume":"4","author":"Kostka","year":"2008","journal-title":"PLoS Comput. Biol"},{"key":"2023020204403222100_btw650-B22","doi-asserted-by":"crossref","first-page":"1724","DOI":"10.1371\/journal.pgen.0030161","article-title":"Capturing heterogeneity in gene expression studies by surrogate variable analysis","volume":"3","author":"Leek","year":"2007","journal-title":"PLoS Genet"},{"key":"2023020204403222100_btw650-B23","author":"Li","year":"2007"},{"key":"2023020204403222100_btw650-B24","doi-asserted-by":"crossref","first-page":"278","DOI":"10.1038\/tpj.2010.57","article-title":"A comparison of batch effect removal methods for enhancement of prediction performance using MAQC-II microarray gene expression data","volume":"10","author":"Luo","year":"2010","journal-title":"Pharmacogenomics J"},{"key":"2023020204403222100_btw650-B25","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1093\/biostatistics\/kxp059","article-title":"Frozen robust multiarray analysis (fRMA)","volume":"11","author":"McCall","year":"2010","journal-title":"Biostatistics"},{"key":"2023020204403222100_btw650-B26","doi-asserted-by":"crossref","first-page":"e6.","DOI":"10.1371\/journal.pcbi.0040006","article-title":"Comprehensive analysis of affymetrix exon arrays using BioConductor","volume":"4","author":"Okoniewski","year":"2008","journal-title":"PLoS Comput. Biol"},{"key":"2023020204403222100_btw650-B27","doi-asserted-by":"crossref","first-page":"e561.","DOI":"10.7717\/peerj.561","article-title":"Removing batch effects for prediction problems with frozen surrogate variable analysis","volume":"2","author":"Parker","year":"2014","journal-title":"PeerJ"},{"key":"2023020204403222100_btw650-B28","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1016\/j.csl.2013.11.004","article-title":"Feature selection methods and their combinations in high-dimensional classification of speaker likability, intelligibility and personality traits","volume":"29","author":"Pohjalainen","year":"2015","journal-title":"Comput. Speech Lang"},{"key":"2023020204403222100_btw650-B29","doi-asserted-by":"crossref","DOI":"10.1002\/9780470685983","volume-title":"Batch Effects and Noise in Microarray Experiments: Sources and Solutions. Wiley Series in Probability and Statistics","author":"Scheerer","year":"2009"},{"key":"2023020204403222100_btw650-B30","doi-asserted-by":"crossref","first-page":"349.","DOI":"10.1186\/1471-2164-11-349","article-title":"Comparison of normalization methods for Illumina BeadChip HumanHT-12 v3","volume":"11","author":"Schmid","year":"2010","journal-title":"BMC Genomics"},{"key":"2023020204403222100_btw650-B31","author":"Seibold","year":"2016"},{"key":"2023020204403222100_btw650-B32","volume-title":"Image Processing, Analysis, and Machine Vision","author":"Sonka","year":"2014"},{"key":"2023020204403222100_btw650-B33","doi-asserted-by":"crossref","first-page":"409.","DOI":"10.1186\/1471-2105-9-409","article-title":"Normalization of Illumina Infinium whole-genome SNP data improves copy number estimates and allelic intensity ratios","volume":"9","author":"Staaf","year":"2008","journal-title":"BMC Bioinformatics"},{"key":"2023020204403222100_btw650-B34","doi-asserted-by":"crossref","first-page":"e141.","DOI":"10.1093\/nar\/gkn705","article-title":"Deep sequencing-based expression analysis shows major advances in robustness, resolution and inter-lab portability over five microarray platforms","volume":"36","author":"T Hoen","year":"2008","journal-title":"Nucleic Acids Res"},{"key":"2023020204403222100_btw650-B35","doi-asserted-by":"crossref","first-page":"3896","DOI":"10.1093\/bioinformatics\/bti631","article-title":"Simple decision rules for classifying human cancers from gene expression profiles","volume":"21","author":"Tan","year":"2005","journal-title":"Bioinformatics"},{"key":"2023020204403222100_btw650-B36","doi-asserted-by":"crossref","first-page":"564","DOI":"10.1038\/nature06915","article-title":"Enabling personalized cancer medicine through analysis of gene-expression patterns","volume":"452","author":"Van\u2019t Veer","year":"2008","journal-title":"Nature"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/3\/397\/49037310\/bioinformatics_33_3_397.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/33\/3\/397\/49037310\/bioinformatics_33_3_397.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,2]],"date-time":"2023-02-02T04:41:34Z","timestamp":1675312894000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/33\/3\/397\/2608637"}},"subtitle":[],"editor":[{"given":"Janet","family":"Kelso","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2016,10,22]]},"references-count":36,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2017,2,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btw650","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2017,2,1]]},"published":{"date-parts":[[2016,10,22]]}}}