{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T11:50:10Z","timestamp":1777636210010,"version":"3.51.4"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2014,3,29]],"date-time":"2014-03-29T00:00:00Z","timestamp":1396051200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/2.0"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1186\/1758-2946-6-10","type":"journal-article","created":{"date-parts":[[2014,3,29]],"date-time":"2014-03-29T22:01:11Z","timestamp":1396130471000},"source":"Crossref","is-referenced-by-count":845,"title":["Cross-validation pitfalls when selecting and assessing regression and classification models"],"prefix":"10.1186","volume":"6","author":[{"given":"Damjan","family":"Krstajic","sequence":"first","affiliation":[]},{"given":"Ljubomir J","family":"Buturovic","sequence":"additional","affiliation":[]},{"given":"David E","family":"Leahy","sequence":"additional","affiliation":[]},{"given":"Simon","family":"Thomas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,3,29]]},"reference":[{"issue":"1","key":"587_CR1","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1080\/00401706.1974.10489157","volume":"16","author":"DM Allen","year":"1974","unstructured":"Allen DM: The relationship between variable selection and data agumentation and a method for prediction. Technometrics. 1974, 16 (1): 125-127. 10.1080\/00401706.1974.10489157.","journal-title":"Technometrics"},{"key":"587_CR2","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1111\/j.2517-6161.1974.tb00994.x","volume":"36, No. 2","author":"M Stone","year":"1974","unstructured":"Stone M: Cross-validatory choice and assessment of statistical predictions. J Roy Stat Soc B Met. 1974, 36, No. 2: 111-147.","journal-title":"J Roy Stat Soc B Met"},{"issue":"350","key":"587_CR3","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1080\/01621459.1975.10479865","volume":"70","author":"S Geisser","year":"1975","unstructured":"Geisser S: The predictive sample reuse method with applications. J Am Stat Assoc. 1975, 70 (350): 320-328. 10.1080\/01621459.1975.10479865.","journal-title":"J Am Stat Assoc"},{"key":"587_CR4","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-84858-7","volume-title":"The elements of statistical learning","author":"T Hastie","year":"2009","unstructured":"Hastie T, Tibshirani R, Friedman J: The elements of statistical learning. 2009, New York: Springer"},{"issue":"3","key":"587_CR5","doi-asserted-by":"publisher","first-page":"291","DOI":"10.2307\/1403680","volume":"60","author":"L Breiman","year":"1992","unstructured":"Breiman L, Spector P: Submodel selection and evaluation in regression. The x-random case. Int Stat Rev. 1992, 60 (3): 291-319. 10.2307\/1403680.","journal-title":"Int Stat Rev"},{"key":"587_CR6","first-page":"1137","volume-title":"IJCAI","author":"R Kohavi","year":"1995","unstructured":"Kohavi R: A study of cross-validation and bootstrap for accuracy estimation and model selection. IJCAI. 1995, 1137-1145."},{"issue":"3","key":"587_CR7","first-page":"27","volume":"2","author":"CC Chang","year":"2011","unstructured":"Chang CC, Lin CJ: LIBSVM: a library for support vector machines. ACM Transactions on Intelligent Systems and Technology (TIST). 2011, 2 (3): 27-","journal-title":"ACM Transactions on Intelligent Systems and Technology (TIST)"},{"issue":"2","key":"587_CR8","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1016\/j.stamet.2005.02.003","volume":"2","author":"S Dudoit","year":"2005","unstructured":"Dudoit S, van der Laan MJ: Asymptotics of cross-validated risk estimation in estimator selection and performance assessment. Stat Meth. 2005, 2 (2): 131-154. 10.1016\/j.stamet.2005.02.003.","journal-title":"Stat Meth"},{"issue":"1","key":"587_CR9","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1186\/1471-2105-7-91","volume":"7","author":"S Varma","year":"2006","unstructured":"Varma S, Simon R: Bias in error estimation when using cross-validation for model selection. BMC Bioinformatics. 2006, 7 (1): 91-10.1186\/1471-2105-7-91.","journal-title":"BMC Bioinformatics"},{"issue":"10","key":"587_CR10","doi-asserted-by":"publisher","first-page":"6562","DOI":"10.1073\/pnas.102102699","volume":"99","author":"C Ambroise","year":"2002","unstructured":"Ambroise C, McLachlan GJ: Selection bias in gene extraction on the basis of microarray gene-expression data. Proc Natl Acad Sci. 2002, 99 (10): 6562-6566. 10.1073\/pnas.102102699.","journal-title":"Proc Natl Acad Sci"},{"issue":"1","key":"587_CR11","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/j.stamet.2005.09.011","volume":"3","author":"X Zhu","year":"2006","unstructured":"Zhu X, Ambroise C, McLachlan GJ: Selection bias in working with the top genes in supervised classification of tissue samples. Stat Meth. 2006, 3 (1): 29-41. 10.1016\/j.stamet.2005.09.011.","journal-title":"Stat Meth"},{"key":"587_CR12","unstructured":"QSARdata. http:\/\/cran.r-project.org\/web\/packages\/QSARdata ,"},{"issue":"6","key":"587_CR13","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1016\/j.jmgm.2005.03.003","volume":"23","author":"L He","year":"2005","unstructured":"He L, Jurs PC: Assessing the reliability of a QSAR model\u2019s predictions. J Mol Graph Model. 2005, 23 (6): 503-523. 10.1016\/j.jmgm.2005.03.003.","journal-title":"J Mol Graph Model"},{"issue":"4","key":"587_CR14","doi-asserted-by":"publisher","first-page":"520","DOI":"10.1017\/S0317167100003759","volume":"31","author":"J Burns","year":"2004","unstructured":"Burns J, Weaver DF: A mathematical model for prediction of drug molecule diffusion across the blood\u2013brain barrier. Can J Neurol Sci. 2004, 31 (4): 520-527.","journal-title":"Can J Neurol Sci"},{"issue":"5\u20136","key":"587_CR15","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1002\/minf.201200166","volume":"32","author":"H Pham-The","year":"2013","unstructured":"Pham-The H, Gonz\u00e1lez-\u00c1lvarez I, Bermejo M, Garrigues T, Le-Thi-Thu H, Cabrera-P\u00e9rez M\u00c1: The use of rule-based and QSPR approaches in ADME profiling: a case study on caco-2 permeability. Molecular Informatics. 2013, 32 (5\u20136): 459-479.","journal-title":"Molecular Informatics"},{"issue":"3","key":"587_CR16","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1021\/ci0500132","volume":"45","author":"M Karthikeyan","year":"2005","unstructured":"Karthikeyan M, Glen RC, Bender C: General melting point prediction based on a diverse compound data set and artificial neural networks. J Chem Inf Model. 2005, 45 (3): 581-590. 10.1021\/ci0500132.","journal-title":"J Chem Inf Model"},{"issue":"1","key":"587_CR17","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1021\/jm040835a","volume":"48","author":"J Kazius","year":"2005","unstructured":"Kazius J, McGuire R, Bursi R: Derivation and validation of toxicophores for mutagenicity prediction. J Med Chem. 2005, 48 (1): 312-320. 10.1021\/jm040835a.","journal-title":"J Med Chem"},{"issue":"6","key":"587_CR18","doi-asserted-by":"publisher","first-page":"1436","DOI":"10.1021\/ci400113t","volume":"53","author":"L Goracci","year":"2013","unstructured":"Goracci L, Ceccarelli M, Bonelli D, Cruciani G: Modeling phospholipidosis induction: reliability and warnings. J Chem Inf Model. 2013, 53 (6): 1436-1446. 10.1021\/ci400113t.","journal-title":"J Chem Inf Model"},{"key":"587_CR19","unstructured":"e1071. http:\/\/cran.r-project.org\/web\/packages\/e1071 ,"},{"issue":"1","key":"587_CR20","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1080\/03610927608827333","volume":"5","author":"AE Hoerl","year":"1976","unstructured":"Hoerl AE, Kennard RW: Ridge regression iterative estimation of the biasing parameter. Commun Stat. 1976, 5 (1): 77-88. 10.1080\/03610927608827333.","journal-title":"Commun Stat"},{"issue":"1","key":"587_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v033.i01","volume":"33","author":"J Friedman","year":"2010","unstructured":"Friedman J, Hastie T, Tibshirani R: Regularization paths for generalized linear models via coordinate descent. J Stat Software. 2010, 33 (1): 1-","journal-title":"J Stat Software"},{"key":"587_CR22","unstructured":"glmnet. http:\/\/cran.r-project.org\/web\/packages\/glmnet ,"},{"key":"587_CR23","first-page":"520","volume-title":"Perspectives in probability and statistics, papers in honour of MS Bartlett","author":"H Wold","year":"1975","unstructured":"Wold H: Soft modeling by latent variables: the nonlinear iterative partial least squares approach. Perspectives in probability and statistics, papers in honour of MS Bartlett. 1975, 520-540."},{"key":"587_CR24","unstructured":"pls. http:\/\/cran.r-project.org\/web\/packages\/pls ,"},{"key":"587_CR25","volume-title":"Classification and regression trees","author":"L Breiman","year":"1984","unstructured":"Breiman L, Friedman JH, Olshen RA, Stone CJ: Classification and regression trees. 1984, Monterey, CA: Wadsworth & Brooks"},{"key":"587_CR26","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1214\/09-SS054","volume":"4","author":"S Arlot","year":"2010","unstructured":"Arlot S, Celisse A: A survey of cross-validation procedures for model selection. Stat Surv. 2010, 4: 40-79. 10.1214\/09-SS054.","journal-title":"Stat Surv"},{"issue":"3","key":"587_CR27","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1093\/biomet\/76.3.503","volume":"76","author":"P Burman","year":"1989","unstructured":"Burman P: A comparative study of ordinary cross-validation, v-fold cross-validation and the repeated learning-testing methods. Biometrika. 1989, 76 (3): 503-514.","journal-title":"Biometrika"},{"key":"587_CR28","volume-title":"arXiv preprint arXiv:0802.0566","author":"S Arlot","year":"2008","unstructured":"Arlot S: V-fold cross-validation improved: V-fold penalization. arXiv preprint arXiv:0802.0566. 2008"},{"key":"587_CR29","first-page":"281","volume":"13","author":"J Bergstra","year":"2012","unstructured":"Bergstra J, Bengio Y: Random search for hyper-parameter optimization. J Mach Learn Res. 2012, 13: 281-305.","journal-title":"J Mach Learn Res"},{"issue":"4","key":"587_CR30","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1093\/comjnl\/7.4.308","volume":"7","author":"JA Nelder","year":"1965","unstructured":"Nelder JA, Mead R: A simplex method for function minimization. Comput J. 1965, 7 (4): 308-313. 10.1093\/comjnl\/7.4.308.","journal-title":"Comput J"},{"issue":"1","key":"587_CR31","doi-asserted-by":"publisher","first-page":"822","DOI":"10.1214\/08-AOAS224","volume":"3","author":"RJ Tibshirani","year":"2009","unstructured":"Tibshirani RJ, Tibshirani R: A bias correction for the minimum error rate in cross-validation. Ann Appl Stat. 2009, 3 (1): 822-829.","journal-title":"Ann Appl Stat"},{"issue":"3","key":"587_CR32","doi-asserted-by":"publisher","first-page":"693","DOI":"10.1111\/biom.12041","volume":"69","author":"C Bernau","year":"2013","unstructured":"Bernau C, Augustin T, Boulesteix AL: Correcting the optimal resampling\u2012based error rate by estimating the error rate of wrapper algorithms. Biometrics. 2013, 69 (3): 693-702. 10.1111\/biom.12041.","journal-title":"Biometrics"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1186\/1758-2946-6-10\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1758-2946-6-10.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1758-2946-6-10.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,25]],"date-time":"2024-05-25T13:38:13Z","timestamp":1716644293000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/1758-2946-6-10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,3,29]]},"references-count":32,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2014,12]]}},"alternative-id":["587"],"URL":"https:\/\/doi.org\/10.1186\/1758-2946-6-10","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,3,29]]},"article-number":"10"}}