{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T20:36:02Z","timestamp":1776285362064,"version":"3.50.1"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2014,11,26]],"date-time":"2014-11-26T00:00:00Z","timestamp":1416960000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1186\/s13321-014-0047-1","type":"journal-article","created":{"date-parts":[[2014,11,25]],"date-time":"2014-11-25T20:03:46Z","timestamp":1416945826000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":121,"title":["Reliable estimation of prediction errors for QSAR models under model uncertainty using double cross-validation"],"prefix":"10.1186","volume":"6","author":[{"given":"D\u00e9sir\u00e9e","family":"Baumann","sequence":"first","affiliation":[]},{"given":"Knut","family":"Baumann","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,11,26]]},"reference":[{"key":"47_CR1","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1016\/S1359-6446(97)01079-9","volume":"2","author":"H Kubinyi","year":"1997","unstructured":"Kubinyi H: QSAR and 3D QSAR in drug design. Part 1: methodology. Drug Discov Today. 1997, 2: 457-467. 10.1016\/S1359-6446(97)01079-9.","journal-title":"Drug Discov Today"},{"key":"47_CR2","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1016\/S0165-9936(03)00607-1","volume":"22","author":"K Baumann","year":"2003","unstructured":"Baumann K: Cross-validation as the objective function of variable selection. Trends Anal Chem. 2003, 22: 395-406. 10.1016\/S0165-9936(03)00607-1.","journal-title":"Trends Anal Chem"},{"key":"47_CR3","volume-title":"Handbook of Molecular Descriptors","author":"R Todeschini","year":"2002","unstructured":"Todeschini R, Consonni V: Handbook of Molecular Descriptors. 2002, Wiley-VCH, Berlin"},{"key":"47_CR4","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-84858-7","volume-title":"Elements of statistical Learning: Data Mining, Inference and Prediction","author":"T Hastie","year":"2009","unstructured":"Hastie T, Tibshirani R, Friedmann J: Elements of statistical Learning: Data Mining, Inference and Prediction. 2009, Springer, New York, 2","edition":"2"},{"key":"47_CR5","first-page":"109","volume-title":"The Handbook of Social Psychology","author":"F Mosteller","year":"1968","unstructured":"Mosteller F, Turkey J: Data Analysis, Including Statistics. The Handbook of Social Psychology. Edited by: Gardner L, Eliot A. 1968, Springer: Addison-Wesley, Reading, MA, USA, 109-112. 2","edition":"2"},{"key":"47_CR6","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1111\/j.2517-6161.1974.tb00994.x","volume":"36","author":"M Stone","year":"1974","unstructured":"Stone M: Cross-validatory choice and assessment of statistical predictions. J R Stat Soc Ser B Methodol. 1974, 36: 111-147.","journal-title":"J R Stat Soc Ser B Methodol"},{"key":"47_CR7","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1111\/j.1467-842X.1989.tb00988.x","volume":"31","author":"S Ganeshanandam","year":"1989","unstructured":"Ganeshanandam S, Krzanowski WJ: On selecting variables and assessing their performance in linear discriminant analysis. Aust J Stat. 1989, 31: 433-447. 10.1111\/j.1467-842X.1989.tb00988.x.","journal-title":"Aust J Stat"},{"key":"47_CR8","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1023\/A:1008987426876","volume":"10","author":"P Jonathan","year":"2000","unstructured":"Jonathan P, Krzanowski WJ, McCarthy WV: On the use of cross-validation to assess performance in multivariate prediction. Stat Comput. 2000, 10: 209-229. 10.1023\/A:1008987426876.","journal-title":"Stat Comput"},{"key":"47_CR9","doi-asserted-by":"publisher","first-page":"6562","DOI":"10.1073\/pnas.102102699","volume":"99","author":"C Ambroise","year":"2002","unstructured":"Ambroise C, McLachlan GJ: Selection bias in gene extraction on the basis of microarray gene-expression data. Proc Natl Acad Sci U S A. 2002, 99: 6562-6566. 10.1073\/pnas.102102699.","journal-title":"Proc Natl Acad Sci U S A"},{"key":"47_CR10","doi-asserted-by":"publisher","first-page":"918","DOI":"10.1002\/prot.20656","volume":"61","author":"D Soeria-Atmadja","year":"2005","unstructured":"Soeria-Atmadja D, Wallman M, Bj\u00f6rklund AK, Isaksson A, Hammerling U, Gustafsson MG: External cross-validation for unbiased evaluation of protein family detectors: application to allergens. Proteins. 2005, 61: 918-925. 10.1002\/prot.20656.","journal-title":"Proteins"},{"key":"47_CR11","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1016\/j.neuroimage.2010.11.004","volume":"56","author":"S Lemm","year":"2011","unstructured":"Lemm S, Blankertz B, Dickhaus T, M\u00fcller KR: Introduction to machine learning for brain imaging. Neuroimage. 2011, 56: 387-399. 10.1016\/j.neuroimage.2010.11.004.","journal-title":"Neuroimage"},{"key":"47_CR12","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1186\/1471-2105-7-91","volume":"7","author":"S Varma","year":"2006","unstructured":"Varma S, Simon R: Bias in error estimation when using cross-validation for model selection. BMC Bioinformatics. 2006, 7: 91-10.1186\/1471-2105-7-91.","journal-title":"BMC Bioinformatics"},{"key":"47_CR13","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1186\/1756-0381-6-5","volume":"6","author":"S Okser","year":"2013","unstructured":"Okser S, Pahikkala T, Aittokallio T: Genetic variants and their interactions in disease risk prediction - machine learning and network perspectives. BioData Min. 2013, 6: 5-10.1186\/1756-0381-6-5.","journal-title":"BioData Min"},{"key":"47_CR14","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1002\/cem.1225","volume":"23","author":"P Filzmoser","year":"2009","unstructured":"Filzmoser P, Liebmann B, Varmuza K: Repeated double cross validation. J Chemom. 2009, 23: 160-171. 10.1002\/cem.1225.","journal-title":"J Chemom"},{"key":"47_CR15","doi-asserted-by":"publisher","first-page":"921","DOI":"10.1021\/ci0342324","volume":"44","author":"JK Wegner","year":"2004","unstructured":"Wegner JK, Fr\u00f6hlich H, Zell A: Feature selection for descriptor based classification models. 1. Theory and GA-SEC algorithm. J Chem Inf Comput Sci. 2004, 44: 921-930. 10.1021\/ci0342324.","journal-title":"J Chem Inf Comput Sci"},{"key":"47_CR16","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.chemolab.2006.04.021","volume":"84","author":"E Anderssen","year":"2006","unstructured":"Anderssen E, Dyrstad K, Westad F, Martens H: Reducing over-optimism in variable selection by cross-model validation. Chemom Intell Lab Syst. 2006, 84: 69-74. 10.1016\/j.chemolab.2006.04.021.","journal-title":"Chemom Intell Lab Syst"},{"key":"47_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.chemolab.2008.01.005","volume":"93","author":"L Gidskehaug","year":"2008","unstructured":"Gidskehaug L, Anderssen E, Alsberg B: Cross model validation and optimisation of bilinear regression models. Chemom Intell Lab Syst. 2008, 93: 1-10. 10.1016\/j.chemolab.2008.01.005.","journal-title":"Chemom Intell Lab Syst"},{"key":"47_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1758-2946-6-10","volume":"6","author":"D Krstajic","year":"2014","unstructured":"Krstajic D, Buturovic LJ, Leahy DE, Thomas S: Cross-validation pitfalls when selecting and assessing regression and classification models. J Cheminform. 2014, 6: 1-15. 10.1186\/1758-2946-6-10.","journal-title":"J Cheminform"},{"key":"47_CR19","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.1021\/ci800151m","volume":"48","author":"IV Tetko","year":"2008","unstructured":"Tetko IV, Sushko I, Pandey AK, Zhu H, Tropsha A, Papa E, \u00d6berg T, Todeschini R, Fourches D, Varnek A: Critical assessment of QSAR models of environmental toxicity against Tetrahymena pyriformis: Focusing on applicability domain and overfitting by variable selection. J Chem Inf Model. 2008, 48: 1733-1746. 10.1021\/ci800151m.","journal-title":"J Chem Inf Model"},{"key":"47_CR20","doi-asserted-by":"publisher","first-page":"516","DOI":"10.1002\/minf.201200134","volume":"32","author":"M G\u00fctlein","year":"2013","unstructured":"G\u00fctlein M, Helma C, Karwath A, Kramer S: A large-scale empirical evaluation of cross-validation and external test set validation in (Q)SAR. Mol Inform. 2013, 32: 516-528. 10.1002\/minf.201200134.","journal-title":"Mol Inform"},{"key":"47_CR21","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1006\/jmps.1999.1276","volume":"44","author":"W Zucchini","year":"2000","unstructured":"Zucchini W: An introduction to model selection. J Math Psychol. 2000, 44: 41-61. 10.1006\/jmps.1999.1276.","journal-title":"J Math Psychol"},{"key":"47_CR22","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/s11306-006-0037-z","volume":"2","author":"DI Broadhurst","year":"2006","unstructured":"Broadhurst DI, Kell DB: Statistical strategies for avoiding false discoveries in metabolomics and related experiments. Metabolomics. 2006, 2: 171-196. 10.1007\/s11306-006-0037-z.","journal-title":"Metabolomics"},{"key":"47_CR23","doi-asserted-by":"publisher","first-page":"1241","DOI":"10.1007\/s00216-007-1790-1","volume":"390","author":"R Bro","year":"2008","unstructured":"Bro R, Kjeldahl K, Smilde AK, Kiers HAL: Cross-validation of component models: a critical look at current methods. Anal Bioanal Chem. 2008, 390: 1241-1251. 10.1007\/s00216-007-1790-1.","journal-title":"Anal Bioanal Chem"},{"key":"47_CR24","first-page":"1371","volume":"3","author":"J Reunanen","year":"2003","unstructured":"Reunanen J: Overfitting in making comparisons between variable selection methods. J Mach Learn Res. 2003, 3: 1371-1382.","journal-title":"J Mach Learn Res"},{"key":"47_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1021\/ci0342472","volume":"44","author":"DM Hawkins","year":"2004","unstructured":"Hawkins DM: The problem of overfitting. J Chem Inf Comput Sci. 2004, 44: 1-12. 10.1021\/ci0342472.","journal-title":"J Chem Inf Comput Sci"},{"key":"47_CR26","first-page":"2079","volume":"11","author":"GC Cawley","year":"2010","unstructured":"Cawley GC, Talbot NLC: On over-fitting in model selection and subsequent selection bias in performance evaluation. J Mach Learn Res. 2010, 11: 2079-2107.","journal-title":"J Mach Learn Res"},{"key":"47_CR27","doi-asserted-by":"publisher","first-page":"1033","DOI":"10.1002\/qsar.200530134","volume":"24","author":"K Baumann","year":"2005","unstructured":"Baumann K: Chance correlation in variable subset regression: Influence of the objective function, the selection mechanism, and ensemble averaging. QSAR Comb Sci. 2005, 24: 1033-1046. 10.1002\/qsar.200530134.","journal-title":"QSAR Comb Sci"},{"key":"47_CR28","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1007\/s10822-004-4071-5","volume":"18","author":"K Baumann","year":"2004","unstructured":"Baumann K, Stiefl N: Validation tools for variable subset regression. J Comput Aided Mol Des. 2004, 18: 549-562. 10.1007\/s10822-004-4071-5.","journal-title":"J Comput Aided Mol Des"},{"key":"47_CR29","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1007\/s10463-009-0234-4","volume":"62","author":"PM Lukacs","year":"2009","unstructured":"Lukacs PM, Burnham KP, Anderson DR: Model selection bias and Freedman\u2019s paradox. Ann Inst Stat Math. 2009, 62: 117-125. 10.1007\/s10463-009-0234-4.","journal-title":"Ann Inst Stat Math"},{"key":"47_CR30","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.tree.2003.10.013","volume":"19","author":"JB Johnson","year":"2004","unstructured":"Johnson JB, Omland KS: Model selection in ecology and evolution. Trends Ecol Evol. 2004, 19: 101-108. 10.1016\/j.tree.2003.10.013.","journal-title":"Trends Ecol Evol"},{"key":"47_CR31","doi-asserted-by":"publisher","DOI":"10.1201\/9781420035933","volume-title":"Subset Selection in Regression","author":"A Miller","year":"2002","unstructured":"Miller A: Subset Selection in Regression. 2002, Chapmann & Hall\/CRC, New York, 2","edition":"2"},{"key":"47_CR32","doi-asserted-by":"publisher","first-page":"2320","DOI":"10.1021\/ci200211n","volume":"51","author":"N Chirico","year":"2011","unstructured":"Chirico N, Gramatica P: Real external predictivity of QSAR models: how to evaluate it? Comparison of different validation criteria and proposal of using the concordance correlation coefficient. J Chem Inf Model. 2011, 51: 2320-2335. 10.1021\/ci200211n.","journal-title":"J Chem Inf Model"},{"key":"47_CR33","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1002\/qsar.200610151","volume":"26","author":"P Gramatica","year":"2007","unstructured":"Gramatica P: Principles of QSAR models validation: internal and external. QSAR Comb Sci. 2007, 26: 694-701. 10.1002\/qsar.200610151.","journal-title":"QSAR Comb Sci"},{"key":"47_CR34","doi-asserted-by":"publisher","first-page":"4297","DOI":"10.2174\/092986709789578213","volume":"16","author":"T Scior","year":"2009","unstructured":"Scior T, Medina-Franco JL, Do Q-T, Mart\u00ednez-Mayorga K, Yunes Rojas JA, Bernard P: How to recognize and workaround pitfalls in QSAR studies: a critical review. Curr Med Chem. 2009, 16: 4297-4313. 10.2174\/092986709789578213.","journal-title":"Curr Med Chem"},{"key":"47_CR35","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1002\/qsar.200430909","volume":"24","author":"AO Aptula","year":"2005","unstructured":"Aptula AO, Jeliazkova NG, Schultz TW, Cronin MTD: The better predictive model: High q2 for the training set or low root mean square error of prediction for the test set?. QSAR Comb Sci. 2005, 24: 385-396. 10.1002\/qsar.200430909.","journal-title":"QSAR Comb Sci"},{"key":"47_CR36","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1002\/qsar.200390007","volume":"22","author":"A Tropsha","year":"2003","unstructured":"Tropsha A, Gramatica P, Gombar VK: The importance of being earnest: validation is the absolute essential for successful application and interpretation of QSPR models. QSAR Comb Sci. 2003, 22: 69-77. 10.1002\/qsar.200390007.","journal-title":"QSAR Comb Sci"},{"key":"47_CR37","doi-asserted-by":"publisher","first-page":"515","DOI":"10.7326\/0003-4819-130-6-199903160-00016","volume":"130","author":"AC Justice","year":"1999","unstructured":"Justice AC, Covinsky KE, Berlin JA: Assessing the generalizability of prognostic information. Ann Intern Med. 1999, 130: 515-524. 10.7326\/0003-4819-130-6-199903160-00016.","journal-title":"Ann Intern Med"},{"key":"47_CR38","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1021\/ci025626i","volume":"43","author":"DM Hawkins","year":"2003","unstructured":"Hawkins DM, Basak SC, Mills D: Assessing model fit by cross-validation. J Chem Inf Comput Sci. 2003, 43: 579-586. 10.1021\/ci025626i.","journal-title":"J Chem Inf Comput Sci"},{"key":"47_CR39","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1007\/978-1-4757-3462-1","volume-title":"Regression Modeling Strategies: With Application to Linear Models, Logistic Regression, and Survival Analysis","author":"E Harrell Frank","year":"2001","unstructured":"Harrell Frank E: Model Validation. Regression Modeling Strategies: With Application to Linear Models, Logistic Regression, and Survival Analysis. 2001, Springer Science and Business Inc, New York, 90-10.1007\/978-1-4757-3462-1."},{"key":"47_CR40","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1016\/S0169-7439(99)00027-1","volume":"49","author":"N Faber","year":"1999","unstructured":"Faber N, Klaas M: Estimating the uncertainty in estimates of root mean square error of prediction: application to determining the size of an adequate test set in multivariate calibration. Chemom Intell Lab Syst. 1999, 49: 79-89. 10.1016\/S0169-7439(99)00027-1.","journal-title":"Chemom Intell Lab Syst"},{"key":"47_CR41","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1080\/00401706.1991.10484873","volume":"33","author":"EB Roecker","year":"1991","unstructured":"Roecker EB: Prediction error and its estimation for subset-selected models. Technometrics. 1991, 33: 459-468. 10.1080\/00401706.1991.10484873.","journal-title":"Technometrics"},{"key":"47_CR42","first-page":"188","volume":"24","author":"DM Hawkins","year":"2010","unstructured":"Hawkins DM, Kraker JJ: Determinstic fallacies and model validation. J Chem Inf Model. 2010, 24: 188-193.","journal-title":"J Chem Inf Model"},{"key":"47_CR43","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-4541-9","volume-title":"An Introduction to the Bootstrap","author":"B Efron","year":"1993","unstructured":"Efron B, Tibshirani RJ: An Introduction to the Bootstrap. 1993, Chapman & Hall\/CRC, New York"},{"key":"47_CR44","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1186\/1471-2105-9-360","volume":"9","author":"M Eklund","year":"2008","unstructured":"Eklund M, Spjuth O, Wikberg JE: The C1C2: a framework for simultaneous model selection and assessment. BMC Bioinformatics. 2008, 9: 360-373. 10.1186\/1471-2105-9-360.","journal-title":"BMC Bioinformatics"},{"key":"47_CR45","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L: Random forests. Mach Learn. 2001, 45: 5-32. 10.1023\/A:1010933404324.","journal-title":"Mach Learn"},{"key":"47_CR46","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1002\/cem.730","volume":"16","author":"K Baumann","year":"2002","unstructured":"Baumann K, Albert H, von Korff M: A systematic evaluation of the benefits and hazards of variable selection in latent variable regression. Part I. Search algorithm, theory and simulations. J Chemom. 2002, 16: 339-350. 10.1002\/cem.730.","journal-title":"J Chemom"},{"key":"47_CR47","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1214\/09-SS054","volume":"4","author":"S Arlot","year":"2010","unstructured":"Arlot S, Celisse A: A survey of cross-validation procedures for model selection. Stat Surv. 2010, 4: 40-79. 10.1214\/09-SS054.","journal-title":"Stat Surv"},{"key":"47_CR48","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1006\/jmps.1999.1279","volume":"44","author":"M Browne","year":"2000","unstructured":"Browne M: Cross-validation methods. J Math Psychol. 2000, 44: 108-132. 10.1006\/jmps.1999.1279.","journal-title":"J Math Psychol"},{"key":"47_CR49","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1080\/01621459.1993.10476299","volume":"88","author":"J Shao","year":"1993","unstructured":"Shao J: Linear model selection by cross-validation. J Am Stat Assoc. 1993, 88: 486-494. 10.1080\/01621459.1993.10476299.","journal-title":"J Am Stat Assoc"},{"key":"47_CR50","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1016\/j.cognition.2010.10.004","volume":"118","author":"E Briscoe","year":"2011","unstructured":"Briscoe E, Feldman J: Conceptual complexity and the bias\/variance tradeoff. Cognition. 2011, 118: 2-16. 10.1016\/j.cognition.2010.10.004.","journal-title":"Cognition"},{"key":"47_CR51","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1186\/1471-2105-6-50","volume":"6","author":"E Freyhult","year":"2005","unstructured":"Freyhult E, Prusis P, Lapinsh M, Wikberg JE, Moulton V, Gustafsson MG: Unbiased descriptor and parameter selection confirms the potential of proteochemometric modelling. BMC Bioinformatics. 2005, 6: 50-64. 10.1186\/1471-2105-6-50.","journal-title":"BMC Bioinformatics"},{"key":"47_CR52","doi-asserted-by":"publisher","first-page":"e16774","DOI":"10.1371\/journal.pone.0016774","volume":"6","author":"S Lise","year":"2011","unstructured":"Lise S, Buchan D, Pontil M, Jones DT: Predictions of hot spot residues at protein-protein interfaces using support vector machines. PLoS ONE. 2011, 6: e16774-10.1371\/journal.pone.0016774.","journal-title":"PLoS ONE"},{"key":"47_CR53","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1186\/1471-2105-9-319","volume":"9","author":"A Statnikov","year":"2008","unstructured":"Statnikov A, Wang L, Aliferis CF: A comprehensive comparison of random forests and support vector machines for microarray-based cancer classification. BMC Bioinformatics. 2008, 9: 319-10.1186\/1471-2105-9-319.","journal-title":"BMC Bioinformatics"},{"key":"47_CR54","doi-asserted-by":"publisher","first-page":"1193","DOI":"10.1093\/jnci\/djj330","volume":"98","author":"S Asgharzadeh","year":"2006","unstructured":"Asgharzadeh S, Pique-Regi R, Sposto R, Wang H, Yang Y, Shimada H, Matthay K, Buckley J, Ortega A, Seeger RC: Prognostic significance of gene expression profiles of metastatic neuroblastomas lacking MYCN gene amplification. J Natl Cancer Inst. 2006, 98: 1193-1203. 10.1093\/jnci\/djj330.","journal-title":"J Natl Cancer Inst"},{"key":"47_CR55","doi-asserted-by":"publisher","first-page":"1971","DOI":"10.1093\/bioinformatics\/bti292","volume":"21","author":"C Lottaz","year":"2005","unstructured":"Lottaz C, Spang R: Molecular decomposition of complex clinical phenotypes using biologically structured analysis of microarray data. Bioinformatics. 2005, 21: 1971-1978. 10.1093\/bioinformatics\/bti292.","journal-title":"Bioinformatics"},{"key":"47_CR56","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1016\/j.aca.2007.04.043","volume":"592","author":"S Smit","year":"2007","unstructured":"Smit S, van Breemen MJ, Hoefsloot HCJ, Smilde AK, Aerts JMFG, de Koster CG: Assessing the statistical validity of proteomics based biomarkers. Anal Chim Acta. 2007, 592: 210-217. 10.1016\/j.aca.2007.04.043.","journal-title":"Anal Chim Acta"},{"key":"47_CR57","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani R: Regression shrinkage and selection via the lasso. J R Stat Soc Ser B Methodol. 1996, 58: 267-288.","journal-title":"J R Stat Soc Ser B Methodol"},{"key":"47_CR58","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1111\/j.1467-9868.2007.00581.x","volume":"69","author":"M Yuan","year":"2007","unstructured":"Yuan M, Lin Y: On the non-negative garrotte estimator. J R Stat Soc Ser B Statistical Methodol. 2007, 69: 143-161. 10.1111\/j.1467-9868.2007.00581.x.","journal-title":"J R Stat Soc Ser B Statistical Methodol"},{"key":"47_CR59","doi-asserted-by":"publisher","first-page":"773","DOI":"10.1021\/ci9901338","volume":"40","author":"J Huuskonen","year":"2000","unstructured":"Huuskonen J: Estimation of aqueous solubility for a diverse set of organic compounds based on molecular topology. J Chem Inf Comput Sci. 2000, 40: 773-777. 10.1021\/ci9901338.","journal-title":"J Chem Inf Comput Sci"},{"key":"47_CR60","doi-asserted-by":"publisher","first-page":"1466","DOI":"10.1002\/jcc.21707","volume":"32","author":"CW Yap","year":"2011","unstructured":"Yap CW: PaDEL-descriptor: an open source software to calculate molecular descriptors and fingerprints. J Comput Chem. 2011, 32: 1466-1474. 10.1002\/jcc.21707.","journal-title":"J Comput Chem"},{"key":"47_CR61","first-page":"25","volume":"10","author":"V Zuber","year":"2010","unstructured":"Zuber V, Strimmer K: High-dimensional regression and variable selection using CAR scores. Stat Appl Genet Mol Biol. 2010, 10: 25-","journal-title":"Stat Appl Genet Mol Biol"},{"key":"47_CR62","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1021\/ci0499469","volume":"44","author":"R Guha","year":"2004","unstructured":"Guha R, Jurs PC: Development of QSAR models to predict and interpret the biological activity of artemisinin analogues. J Chem Inf Comput Sci. 2004, 44: 1440-1449. 10.1021\/ci0499469.","journal-title":"J Chem Inf Comput Sci"},{"key":"47_CR63","doi-asserted-by":"publisher","first-page":"1337","DOI":"10.1021\/ci800038f","volume":"48","author":"H Hong","year":"2008","unstructured":"Hong H, Xie Q, Ge W, Qian F, Fang H, Shi L, Su Z, Perkins R, Tong W: Mold(2), molecular descriptors from 2D structures for chemoinformatics and toxicoinformatics. J Chem Inf Model. 2008, 48: 1337-1344. 10.1021\/ci800038f.","journal-title":"J Chem Inf Model"},{"key":"47_CR64","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1016\/S1093-3263(01)00123-1","volume":"20","author":"A Golbraikh","year":"2002","unstructured":"Golbraikh A, Tropsha A: Beware of q2!. J Mol Graph Model. 2002, 20: 269-276. 10.1016\/S1093-3263(01)00123-1.","journal-title":"J Mol Graph Model"},{"key":"47_CR65","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2477-6","volume-title":"Plane Answers to Complex Questions","author":"R Christensen","year":"1996","unstructured":"Christensen R: Plane Answers to Complex Questions. 1996, Springer, New York, 2","edition":"2"},{"key":"47_CR66","doi-asserted-by":"publisher","first-page":"341","DOI":"10.1080\/07388940500339183","volume":"22","author":"K Clarke","year":"2005","unstructured":"Clarke K: The phantom menace: omitted variable bias in econometric research. Confl Manag Peace Sci. 2005, 22: 341-352. 10.1080\/07388940500339183.","journal-title":"Confl Manag Peace Sci"},{"key":"47_CR67","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1016\/0169-7439(90)80052-8","volume":"9","author":"R Marbach","year":"1990","unstructured":"Marbach R, Heise HM: Calibration modeling by partial least-squares and principal component regression and its optimization using an improved leverage correction for prediction testing. Chemom Intell Lab Syst. 1990, 9: 45-63. 10.1016\/0169-7439(90)80052-8.","journal-title":"Chemom Intell Lab Syst"},{"key":"47_CR68","first-page":"548","volume":"92","author":"B Efron","year":"1997","unstructured":"Efron B, Tibshirani R: Improvements on cross-validation: the .632+ bootstrap method. J Am Stat Assoc. 1997, 92: 548-560.","journal-title":"J Am Stat Assoc"},{"key":"47_CR69","doi-asserted-by":"publisher","first-page":"291","DOI":"10.2307\/1403680","volume":"60","author":"L Breiman","year":"1992","unstructured":"Breiman L, Spector P: Submodel selection and evaluation in regression. The X-random case. Int Stat Rev. 1992, 60: 291-319. 10.2307\/1403680.","journal-title":"Int Stat Rev"},{"key":"47_CR70","doi-asserted-by":"publisher","first-page":"3561","DOI":"10.1109\/TIT.2010.2048503","volume":"56","author":"H Xu","year":"2010","unstructured":"Xu H, Caramanis C, Mannor S: Robust regression and lasso. IEEE Trans Inf Theory. 2010, 56: 3561-3574. 10.1109\/TIT.2010.2048503.","journal-title":"IEEE Trans Inf Theory"},{"key":"47_CR71","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-20192-9","volume-title":"Statistics for High-Dimensional Data Methods, Theory and Applications","author":"P B\u00fchlmann","year":"2011","unstructured":"B\u00fchlmann P, van de Geer SA: Statistics for High-Dimensional Data Methods, Theory and Applications. 2011, Springer, New York"},{"key":"47_CR72","unstructured":"R: A Language and Environment for Statistical Computing. 2011, R Foundation for Statistical Computing, Vienna, Austria"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13321-014-0047-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-014-0047-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-014-0047-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-014-0047-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,5]],"date-time":"2024-06-05T09:24:12Z","timestamp":1717579452000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-014-0047-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,11,26]]},"references-count":72,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2014,12]]}},"alternative-id":["47"],"URL":"https:\/\/doi.org\/10.1186\/s13321-014-0047-1","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,11,26]]},"assertion":[{"value":"8 July 2014","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2014","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2014","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"47"}}