{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:38:23Z","timestamp":1773254303938,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,11,28]],"date-time":"2018-11-28T00:00:00Z","timestamp":1543363200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000009","name":"Foundation for the National Institutes of Health","doi-asserted-by":"publisher","award":["T32ES007329"],"award-info":[{"award-number":["T32ES007329"]}],"id":[{"id":"10.13039\/100000009","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1186\/s13321-018-0309-4","type":"journal-article","created":{"date-parts":[[2018,11,28]],"date-time":"2018-11-28T13:03:15Z","timestamp":1543410195000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["chemmodlab: a cheminformatics modeling laboratory\u00a0R package for fitting and assessing machine learning models"],"prefix":"10.1186","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8041-8524","authenticated-orcid":false,"given":"Jeremy R.","family":"Ash","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6464-9955","authenticated-orcid":false,"given":"Jacqueline M.","family":"Hughes-Oliver","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,11,28]]},"reference":[{"key":"309_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/b94608","volume-title":"The elements of statistical learning","author":"T Hastie","year":"2009","unstructured":"Hastie T, Tibshirani R, Friedman J (2009) The elements of statistical learning, 2nd edn. Springer, New York, NY. https:\/\/doi.org\/10.1007\/b94608","edition":"2"},{"key":"309_CR2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-6849-3","volume-title":"Applied predictive modeling","author":"M Kuhn","year":"2013","unstructured":"Kuhn M, Johnson K (2013) Applied predictive modeling. Springer, New York, NY. https:\/\/doi.org\/10.1007\/978-1-4614-6849-3"},{"key":"309_CR3","unstructured":"(2017) chemmodlab. https:\/\/cran.r-project.org\/web\/packages\/chemmodlab\/index.html . Accessed 29 Sept 2018"},{"issue":"5","key":"309_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v028.i05","volume":"28","author":"M Kuhn","year":"2008","unstructured":"Kuhn M (2008) Building predictive models in R using the caret package. J Stat Softw 28(5):1\u201326. https:\/\/doi.org\/10.18637\/jss.v028.i05","journal-title":"J Stat Softw"},{"key":"309_CR5","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1186\/s13321-015-0094-2","volume":"7","author":"G Tsiliki","year":"2015","unstructured":"Tsiliki G, Munteanu CR, Seoane JA, Fernandez-Lozano C, Sarimveis H, Willighagen EL (2015) RRegrs: an R package for computer-aided model selection with multiple regression models. J Cheminform 7:46. https:\/\/doi.org\/10.1186\/s13321-015-0094-2","journal-title":"J Cheminform"},{"issue":"1","key":"309_CR6","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1186\/s13321-015-0086-2","volume":"7","author":"DS Murrell","year":"2015","unstructured":"Murrell DS, Cortes-Ciriano I, van Westen GJP, Stott IP, Bender A, Malliavin TE, Glen RC (2015) Chemically Aware Model Builder (camb): an R package for property and bioactivity modelling of small molecules. J Cheminform 7(1):45. https:\/\/doi.org\/10.1186\/s13321-015-0086-2","journal-title":"J Cheminform"},{"issue":"1\u20132","key":"309_CR7","doi-asserted-by":"publisher","first-page":"61","DOI":"10.3233\/CI-2008-0016","volume":"11","author":"JM Hughes-Oliver","year":"2011","unstructured":"Hughes-Oliver JM, Brooks AD, Welch WJ, Khaledi MG, Hawkins D, Young SS, Patil K, Howell GW, Ng RT, Chu MT (2011) ChemModLab: a web-cased cheminformatics modeling laboratory. Silico Biol 11(1\u20132):61\u201381. https:\/\/doi.org\/10.3233\/CI-2008-0016","journal-title":"Silico Biol"},{"issue":"12","key":"309_CR8","doi-asserted-by":"publisher","first-page":"4977","DOI":"10.1021\/jm4004285","volume":"57","author":"A Cherkasov","year":"2014","unstructured":"Cherkasov A, Muratov EN, Fourches D, Varnek A, Baskin II, Cronin M, Dearden J, Gramatica P, Martin YC, Todeschini R, Consonni V, Kuz\u2019min VE, Cramer R, Benigni R, Yang C, Rathman J, Terfloth L, Gasteiger J, Richard A, Tropsha A (2014) QSAR modeling: where have you been? Where are you going to? J Med Chem 57(12):4977\u20135010. https:\/\/doi.org\/10.1021\/jm4004285","journal-title":"J Med Chem"},{"key":"309_CR9","unstructured":"(2007) AID 364. https:\/\/pubchem.ncbi.nlm.nih.gov\/bioassay\/364 . Accessed 29 Sept 2018"},{"issue":"2","key":"309_CR10","doi-asserted-by":"publisher","first-page":"515","DOI":"10.1021\/ci049847v","volume":"45","author":"K Liu","year":"2005","unstructured":"Liu K, Feng J, Young SS (2005) PowerMV: a software environment for molecular viewing, descriptor generation, data analysis and hit evaluation. J Chem Inf Model 45(2):515\u2013522. https:\/\/doi.org\/10.1021\/ci049847v","journal-title":"J Chem Inf Model"},{"issue":"3","key":"309_CR11","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1021\/ci00063a011","volume":"29","author":"FR Burden","year":"1989","unstructured":"Burden FR (1989) Molecular identification number for substructure searches. J Chem Inf Model 29(3):225\u2013227. https:\/\/doi.org\/10.1021\/ci00063a011","journal-title":"J Chem Inf Model"},{"issue":"5","key":"309_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18637\/jss.v018.i05","volume":"18","author":"R Guha","year":"2007","unstructured":"Guha R (2007) Chemical informatics functionality in R. J Stat Softw 18(5):1\u201316. https:\/\/doi.org\/10.18637\/jss.v018.i05","journal-title":"J Stat Softw"},{"key":"309_CR13","unstructured":"Guha R (2018) fingerprint: functions to operate on binary fingerprint data. R package version 3.5.7"},{"issue":"2","key":"309_CR14","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1021\/ci025584y","volume":"43","author":"C Steinbeck","year":"2003","unstructured":"Steinbeck C, Han Y, Kuhn S, Horlacher O, Luttmann E, Willighagen E (2003) The Chemistry Development Kit (CDK): an open-source Java library for chemo- and bioinformatics. J Chem Inf Comput Sci 43(2):493\u2013500. https:\/\/doi.org\/10.1021\/ci025584y","journal-title":"J Chem Inf Comput Sci"},{"issue":"1","key":"309_CR15","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1186\/s13321-017-0220-4","volume":"9","author":"EL Willighagen","year":"2017","unstructured":"Willighagen EL, Mayfield JW, Alvarsson J, Berg A, Carlsson L, Jeliazkova N, Kuhn S, Pluskal T, Rojas-Chert\u00f3 M, Spjuth O et al (2017) The Chemistry Development Kit (CDK) v2. 0: atom typing, depiction, molecular formulas, and substructure searching. J Cheminform 9(1):33","journal-title":"J Cheminform"},{"issue":"6","key":"309_CR16","doi-asserted-by":"publisher","first-page":"974","DOI":"10.1021\/ci990071l","volume":"39","author":"ES Goll","year":"1999","unstructured":"Goll ES, Jurs PC (1999) Prediction of the normal boiling points of organic compounds from molecular structures with a computational neural network model. J Chem Inf Comput Sci 39(6):974\u2013983. https:\/\/doi.org\/10.1021\/ci990071l","journal-title":"J Chem Inf Comput Sci"},{"key":"309_CR17","unstructured":"(2018) rcdk. https:\/\/cran.r-project.org\/web\/packages\/rcdk\/index.html . Accessed 29 Sept 2018"},{"key":"309_CR18","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-7138-7","volume-title":"An introduction to statistical learning","author":"G James","year":"2013","unstructured":"James G, Witten D, Hastie T, Tibshirani R (2013) An introduction to statistical learning. Springer, New York, NY. https:\/\/doi.org\/10.1007\/978-1-4614-7138-7"},{"issue":"15","key":"309_CR19","doi-asserted-by":"publisher","first-page":"3301","DOI":"10.1093\/bioinformatics\/bti499","volume":"21","author":"AM Molinaro","year":"2005","unstructured":"Molinaro AM, Simon R, Pfeiffer RM (2005) Prediction error estimation: a comparison of resampling methods. Bioinformatics 21(15):3301\u20133307. https:\/\/doi.org\/10.1093\/bioinformatics\/bti499","journal-title":"Bioinformatics"},{"issue":"11","key":"309_CR20","doi-asserted-by":"publisher","first-page":"3735","DOI":"10.1016\/j.csda.2009.04.009","volume":"53","author":"JH Kim","year":"2009","unstructured":"Kim JH (2009) Estimating classification error rate: repeated cross-validation, repeated hold-out and bootstrap. Comput Stat Data Anal 53(11):3735\u20133745. https:\/\/doi.org\/10.1016\/j.csda.2009.04.009","journal-title":"Comput Stat Data Anal"},{"issue":"4","key":"309_CR21","doi-asserted-by":"publisher","first-page":"2668","DOI":"10.1214\/11-AOAS491","volume":"5","author":"H Shen","year":"2011","unstructured":"Shen H, Welch WJ, Hughes-Oliver JM (2011) Efficient, adaptive cross-validation for tuning and comparing models, with application to drug discovery. Ann Appl Stat 5(4):2668\u20132687. https:\/\/doi.org\/10.1214\/11-AOAS491","journal-title":"Ann Appl Stat"},{"issue":"4","key":"309_CR22","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1016\/S1093-3263(01)00123-1","volume":"20","author":"A Golbraikh","year":"2002","unstructured":"Golbraikh A, Tropsha A (2002) Beware of q2!. J Mol Graph Model 20(4):269\u2013276","journal-title":"J Mol Graph Model"},{"issue":"6\u20137","key":"309_CR23","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1002\/minf.201000061","volume":"29","author":"A Tropsha","year":"2010","unstructured":"Tropsha A (2010) Best practices for QSAR model development, validation, and exploitation. Mol Inform 29(6\u20137):476\u2013488. https:\/\/doi.org\/10.1002\/minf.201000061","journal-title":"Mol Inform"},{"issue":"5","key":"309_CR24","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1002\/qsar.200610151","volume":"26","author":"P Gramatica","year":"2007","unstructured":"Gramatica P (2007) Principles of QSAR models validation: Internal and external. QSAR Comb Sci 26(5):694\u2013701. https:\/\/doi.org\/10.1002\/qsar.200610151","journal-title":"QSAR Comb Sci"},{"key":"309_CR25","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1111\/j.2517-6161.1974.tb00994.x","volume":"36","author":"M Stone","year":"1974","unstructured":"Stone M (1974) Cross-validatory choice and assessment of statistical predictions. J R Stat Soc Series B 36:111\u2013147","journal-title":"J R Stat Soc Series B"},{"issue":"382","key":"309_CR26","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1080\/01621459.1983.10477973","volume":"78","author":"B Efron","year":"1983","unstructured":"Efron B (1983) Estimating the error rate of a prediction rule: improvement on cross-validation. J Am Stat Assoc 78(382):316\u2013331","journal-title":"J Am Stat Assoc"},{"issue":"Jul","key":"309_CR27","first-page":"2079","volume":"11","author":"GC Cawley","year":"2010","unstructured":"Cawley GC, Talbot NL (2010) On over-fitting in model selection and subsequent selection bias in performance evaluation. J Mach Learn Res 11(Jul):2079\u20132107","journal-title":"J Mach Learn Res"},{"issue":"10","key":"309_CR28","doi-asserted-by":"publisher","first-page":"6562","DOI":"10.1073\/pnas.102102699","volume":"99","author":"C Ambroise","year":"2002","unstructured":"Ambroise C, McLachlan GJ (2002) Selection bias in gene extraction on the basis of microarray gene-expression data. Proc Natl Acad Sci USA 99(10):6562\u20136. https:\/\/doi.org\/10.1073\/pnas.102102699","journal-title":"Proc Natl Acad Sci USA"},{"issue":"9","key":"309_CR29","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1080\/1062936X.2013.792875","volume":"24","author":"G Xu","year":"2013","unstructured":"Xu G, Hughes-Oliver J, Brooks J, Baynes R (2013) Predicting skin permeability from complex chemical mixtures: incorporation of an expanded QSAR model. SAR QSAR Environ Res 24(9):711\u2013731. https:\/\/doi.org\/10.1080\/1062936X.2013.792875","journal-title":"SAR QSAR Environ Res"},{"key":"309_CR30","unstructured":"Kohavi R (1995) A study of cross-validation and bootstrap for accuracy estimation and model selection. In: International joint conference on artificial intelligence, Montreal, Canada, vol 14, pp 1137\u20131145"},{"issue":"2","key":"309_CR31","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1021\/ci025626i","volume":"43","author":"DM Hawkins","year":"2003","unstructured":"Hawkins DM, Basak SC, Mills D (2003) Assessing model fit by cross-validation. J Chem Inf Comput Sci 43(2):579\u2013586. https:\/\/doi.org\/10.1021\/ci025626i","journal-title":"J Chem Inf Comput Sci"},{"issue":"1","key":"309_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1021\/ci0342472","volume":"44","author":"DM Hawkins","year":"2004","unstructured":"Hawkins DM (2004) The problem of overfitting. J Chem Inf Comput Sci 44(1):1\u201312. https:\/\/doi.org\/10.1021\/ci0342472","journal-title":"J Chem Inf Comput Sci"},{"issue":"1","key":"309_CR33","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/j.chemolab.2006.03.001","volume":"87","author":"JJ Kraker","year":"2007","unstructured":"Kraker JJ, Hawkins DM, Basak SC, Natarajan R, Mills D (2007) Quantitative Structure\u2013Activity Relationship (QSAR) modeling of juvenile hormone activity: comparison of validation procedures. Chemom Intell Lab Syst 87(1):33\u201342. https:\/\/doi.org\/10.1016\/j.chemolab.2006.03.001","journal-title":"Chemom Intell Lab Syst"},{"issue":"1","key":"309_CR34","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1186\/1758-2946-6-10","volume":"6","author":"D Krstajic","year":"2014","unstructured":"Krstajic D, Buturovic LJ, Leahy DE, Thomas S (2014) Cross-validation pitfalls when selecting and assessing regression and classification models. J Cheminform 6(1):10. https:\/\/doi.org\/10.1186\/1758-2946-6-10","journal-title":"J Cheminform"},{"key":"309_CR35","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1016\/j.jclinepi.2015.04.005","volume":"69","author":"EW Steyerberg","year":"2016","unstructured":"Steyerberg EW, Harrell FE Jr (2016) Prediction models need appropriate internal, internal\u2013external, and external validation. J Clin Epidemiol 69:245\u20137. https:\/\/doi.org\/10.1016\/j.jclinepi.2015.04.005","journal-title":"J Clin Epidemiol"},{"key":"309_CR36","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1021\/ci950274j","volume":"36","author":"SK Kearsley","year":"1996","unstructured":"Kearsley SK, Sallamack S, Fluder EM, Andose JD, Mosley RT, Sheridan RP (1996) Chemical similarity using physiochemical property descriptors. J Chem Inf Model 36:118\u2013127. https:\/\/doi.org\/10.1021\/ci950274j","journal-title":"J Chem Inf Model"},{"key":"309_CR37","volume-title":"The collected works of John W. Tukey: multiple comparisons","author":"JW Tukey","year":"1994","unstructured":"Tukey JW, Berringer DR (1994) The collected works of John W. Tukey: multiple comparisons, vol VIII, 8th edn. Chapman & Hall, New York, NY","edition":"8"},{"issue":"3","key":"309_CR38","doi-asserted-by":"publisher","first-page":"307","DOI":"10.2307\/3001469","volume":"12","author":"CY Kramer","year":"1956","unstructured":"Kramer CY (1956) Extension of multiple range tests to group means with unequal numbers of replications. Biometrics 12(3):307. https:\/\/doi.org\/10.2307\/3001469","journal-title":"Biometrics"},{"key":"309_CR39","unstructured":"Beygelzimer A, Kakadet S, Langford J, Arya S, Mount D, Li S (2013) FNN: fast nearest neighbor search algorithms and applications. https:\/\/CRAN.R-project.org\/package=FNN . Accessed 29 Sept 2018"},{"issue":"1\u20137","key":"309_CR40","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/S0169-7552(98)00110-X","volume":"30","author":"S Brin","year":"1998","unstructured":"Brin S, Page L (1998) The anatomy of a large-scale hypertextual web search engine. Comput Netw ISDN Syst 30(1\u20137):107\u2013117. https:\/\/doi.org\/10.1016\/S0169-7552(98)00110-X","journal-title":"Comput Netw ISDN Syst"},{"key":"309_CR41","volume-title":"Multivariate statistical quality control using R","author":"E Santos-Fern\u00e1ndez","year":"2013","unstructured":"Santos-Fern\u00e1ndez E (2013) Multivariate statistical quality control using R, vol 14. Springer, New York, NY"},{"issue":"34","key":"309_CR42","doi-asserted-by":"publisher","first-page":"3494","DOI":"10.2174\/138161207782794257","volume":"13","author":"A Tropsha","year":"2007","unstructured":"Tropsha A, Golbraikh A (2007) Predictive QSAR modeling workflow, model applicability domains, and virtual screening. Curr Pharm Des 13(34):3494\u2013504","journal-title":"Curr Pharm Des"},{"issue":"5","key":"309_CR43","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1002\/minf.201501019","volume":"35","author":"M Mathea","year":"2016","unstructured":"Mathea M, Klingspohn W, Baumann K (2016) Chemoinformatic classification methods and their applicability domain. Mol Inform 35(5):160\u2013180. https:\/\/doi.org\/10.1002\/minf.201501019","journal-title":"Mol Inform"},{"issue":"10","key":"309_CR44","doi-asserted-by":"publisher","first-page":"1361","DOI":"10.1289\/ehp.5758","volume":"111","author":"L Eriksson","year":"2003","unstructured":"Eriksson L, Jaworska J, Worth AP, Cronin MT, McDowell RM, Gramatica P (2003) Methods for reliability and uncertainty assessment and for applicability evaluations of classification-and regression-based QSARs. Environ Health Perspect 111(10):1361. https:\/\/doi.org\/10.1289\/ehp.5758","journal-title":"Environ Health Perspect"},{"key":"309_CR45","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718461","volume-title":"Multivariate statistical process control with industrial applications","author":"RL Mason","year":"2002","unstructured":"Mason RL, Young JC (2002) Multivariate statistical process control with industrial applications, vol 9. SIAM, Philadelphia, PA. https:\/\/doi.org\/10.1137\/1.9780898718461"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-018-0309-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13321-018-0309-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-018-0309-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T20:50:54Z","timestamp":1720817454000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-018-0309-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11,28]]},"references-count":45,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["309"],"URL":"https:\/\/doi.org\/10.1186\/s13321-018-0309-4","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,11,28]]},"assertion":[{"value":"30 July 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 November 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"57"}}