{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,5]],"date-time":"2026-04-05T14:11:34Z","timestamp":1775398294740,"version":"3.50.1"},"reference-count":90,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,1,16]],"date-time":"2018-01-16T00:00:00Z","timestamp":1516060800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","award":["PTDC\/EEI-ESS\/4923\/2014"],"award-info":[{"award-number":["PTDC\/EEI-ESS\/4923\/2014"]}]},{"name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","award":["SFRH\/BD\/111654\/2015"],"award-info":[{"award-number":["SFRH\/BD\/111654\/2015"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1186\/s13321-017-0256-5","type":"journal-article","created":{"date-parts":[[2018,1,16]],"date-time":"2018-01-16T12:55:57Z","timestamp":1516107357000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":108,"title":["An automated framework for QSAR model building"],"prefix":"10.1186","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5207-7136","authenticated-orcid":false,"given":"Samina","family":"Kausar","sequence":"first","affiliation":[]},{"given":"Andre O.","family":"Falcao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,1,16]]},"reference":[{"key":"256_CR1","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1021\/ci9003865","volume":"50","author":"S Agarwal","year":"2010","unstructured":"Agarwal S, Dugar D, Sengupta S (2010) Ranking chemical structures for drug discovery: a new machine learning approach. J Chem Inf Model 50:716\u2013731","journal-title":"J Chem Inf Model"},{"key":"256_CR2","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0083922","author":"KY Hsin","year":"2013","unstructured":"Hsin KY, Ghosh S, Kitano H (2013) Combining machine learning systems and multiple docking simulation packages to improve docking prediction reliability for network pharmacology. PLoS ONE. https:\/\/doi.org\/10.1371\/journal.pone.0083922","journal-title":"PLoS ONE"},{"issue":"2","key":"256_CR3","doi-asserted-by":"publisher","first-page":"145","DOI":"10.18178\/ijmlc.2016.6.2.589","volume":"6","author":"A Matsumoto","year":"2016","unstructured":"Matsumoto A, Aoki S, Ohwada H (2016) Comparison of random forest and SVM for raw data in drug discovery: prediction of radiation protection and toxicity case study. Int J Mach Learn Comput 6(2):145\u2013148. https:\/\/doi.org\/10.18178\/ijmlc.2016.6.2.589","journal-title":"Int J Mach Learn Comput"},{"issue":"3","key":"256_CR4","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1517\/17460441.2016.1146250","volume":"11","author":"AN Lima","year":"2016","unstructured":"Lima AN, Philot EA, Goulart Trossini GH, Barbour Scott LP, Maltarollo VG, Honorio KM (2016) Use of machine learning approaches for novel drug discovery. Expert Opin Drug Discov 11(3):225\u2013239. https:\/\/doi.org\/10.1517\/17460441.2016.1146250","journal-title":"Expert Opin Drug Discov"},{"key":"256_CR5","doi-asserted-by":"publisher","DOI":"10.17226\/11970","author":"E Mantus","year":"2007","unstructured":"Mantus E (2007) Toxicity testing in the 21st century. Alttox Org. https:\/\/doi.org\/10.17226\/11970","journal-title":"Alttox Org"},{"issue":"7252","key":"256_CR6","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1038\/460208a","volume":"460","author":"T Hartung","year":"2009","unstructured":"Hartung T (2009) Toxicology for the twenty-first century. Nature 460(7252):208\u2013212. https:\/\/doi.org\/10.1038\/460208a","journal-title":"Nature"},{"issue":"3","key":"256_CR7","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1016\/j.yrtph.2015.02.011","volume":"71","author":"G Patlewicz","year":"2015","unstructured":"Patlewicz G, Simon TW, Rowlands JC, Budinsky RA, Becker RA (2015) Proposing a scientific confidence framework to help support the application of adverse outcome pathways for regulatory purposes. Regul Toxicol Pharmacol 71(3):463\u2013477. https:\/\/doi.org\/10.1016\/j.yrtph.2015.02.011","journal-title":"Regul Toxicol Pharmacol"},{"key":"256_CR8","doi-asserted-by":"publisher","unstructured":"Viceconti M, Henney A, Morley-Fletcher E (2016) In silico clinical trials: how computer simulation will transform the biomedical industry. Avicenna Coordination Support Action. https:\/\/doi.org\/10.13140\/RG.2.1.2756.6164","DOI":"10.13140\/RG.2.1.2756.6164"},{"key":"256_CR9","unstructured":"Jaworska J, Aldenberg T, Nikolova N (2005) Review of methods for QSAR applicability domain estimation by the training set. Technical report. The European Commission\u2014Joint Research Centre Institute for Health and Consumer Protection-ECVAM"},{"key":"256_CR10","doi-asserted-by":"publisher","unstructured":"Serafimova R, Gatnik MF, Worth A (2010) Review of QSAR models and software tools for predicting genotoxicity and carcinogenicity. Publications Office of the European Union. JRC scientific and technical reports. https:\/\/doi.org\/10.2788\/26123","DOI":"10.2788\/26123"},{"issue":"December 2014","key":"256_CR11","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1080\/10629369508234003","volume":"3:3","author":"M Zeeman","year":"1995","unstructured":"Zeeman M, Auer C, Clements R, Nabholz J, RS B (1995) U.S. EPA regulatory perspectives on the use of qsar for new and existing chemical evaluations. SAR QSAR Environ Res 3:3(December 2014):179\u2013201. https:\/\/doi.org\/10.1080\/10629369508234003","journal-title":"SAR QSAR Environ Res"},{"issue":"3","key":"256_CR12","doi-asserted-by":"publisher","first-page":"200","DOI":"10.1186\/1479-7364-5-3-200","volume":"5","author":"LG Valerio","year":"2011","unstructured":"Valerio LG (2011) In silico toxicology models and databases as FDA Critical Path Initiative toolkits. Hum. Genomics 5(3):200\u2013207","journal-title":"Hum. Genomics"},{"key":"256_CR13","unstructured":"Martin T (2016) User \u2019 s Guide for T.E.S.T. (version 4.2) (Toxicity Estimation Software Tool)"},{"key":"256_CR14","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1289\/ehp.0901157","volume":"1","author":"C Rud\u00e9n","year":"2010","unstructured":"Rud\u00e9n C, Hansson SO (2010) Registration, evaluation, and authorization of chemicals (REACH) is but the first step\u2014how far will it take us? Six further steps to improve the European chemicals legislation. Environ. Health Perspect. 1:6\u201310. https:\/\/doi.org\/10.1289\/ehp.0901157","journal-title":"Environ. Health Perspect."},{"key":"256_CR15","doi-asserted-by":"publisher","unstructured":"Directorate E, Meeting J, The OF, Committee C, Working THE, On P, Series O, Testing ON (2004) OECD Environment health and safety publications series on testing and assessment. Assessment, 20\u201321. https:\/\/doi.org\/10.1787\/9789264079151-en","DOI":"10.1787\/9789264079151-en"},{"issue":"1","key":"256_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1021\/ci9903206","volume":"40","author":"AR Katritzky","year":"2000","unstructured":"Katritzky AR, Maran U, Lobanov VS, Karelson M (2000) Structurally diverse quantitative structure\u2013property relationship correlations of technologically relevant physical properties. J Chem Inf Model 40(1):1\u201318. https:\/\/doi.org\/10.1021\/ci9903206","journal-title":"J Chem Inf Model"},{"issue":"12","key":"256_CR17","doi-asserted-by":"publisher","first-page":"1333","DOI":"10.2174\/1568026023392922","volume":"2","author":"AR Katritzky","year":"2002","unstructured":"Katritzky AR, Fara DC, Petrukhin RO, Tatham DB, Maran U, Lomaka A, Karelson M (2002) The present utility and future potential for medicinal chemistry of QSAR\/QSPR with whole molecule descriptors. Curr Top Med Chem 2(12):1333\u20131356. https:\/\/doi.org\/10.2174\/1568026023392922","journal-title":"Curr Top Med Chem"},{"key":"256_CR18","doi-asserted-by":"crossref","DOI":"10.1201\/b10419","volume-title":"Three dimensional QSAR: applications in pharmacology and toxicology","author":"JP Doucet","year":"2010","unstructured":"Doucet JP, Panaye A (2010) Three dimensional QSAR: applications in pharmacology and toxicology. CRC Press, Boca Raton"},{"issue":"6","key":"256_CR19","doi-asserted-by":"publisher","first-page":"849","DOI":"10.1089\/10665270260518317","volume":"9","author":"S Doniger","year":"2002","unstructured":"Doniger S, Hofmann T, Yeh J (2002) Predicting CNS permeability of drug molecules: comparison of neural network and support vector machine algorithms. J Comput Biol 9(6):849\u2013864. https:\/\/doi.org\/10.1089\/10665270260518317","journal-title":"J Comput Biol"},{"issue":"34","key":"256_CR20","doi-asserted-by":"publisher","first-page":"3494","DOI":"10.2174\/138161207782794257","volume":"13","author":"A Tropsha","year":"2007","unstructured":"Tropsha A, Golbraikh A (2007) Predictive QSAR modeling workflow, model applicability domains, and virtual screening. Curr Pharm Des 13(34):3494\u2013504. https:\/\/doi.org\/10.2174\/138161207782794257","journal-title":"Curr Pharm Des"},{"key":"256_CR21","unstructured":"Puzyn T, Leszczynski J, Cronin MT (2009) Recent advances in QSAR studies: methods and applications (challenges and advances in computational chemistry and physics), 2010 Edition edn. Springer, Berlin https:\/\/www.amazon.com\/Recent-Advances-QSAR-Studies-Computational\/dp\/1402097824"},{"issue":"6\u20137","key":"256_CR22","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1002\/minf.201000061","volume":"29","author":"A Tropsha","year":"2010","unstructured":"Tropsha A (2010) Best practices for QSAR model development, validation, and exploitation. Mol Inf 29(6\u20137):476\u2013488. https:\/\/doi.org\/10.1002\/minf.201000061","journal-title":"Mol Inf"},{"key":"256_CR23","doi-asserted-by":"publisher","first-page":"1825","DOI":"10.4155\/fmc-2016-0093","volume":"8","author":"SL Dixon","year":"2016","unstructured":"Dixon SL, Duan J, Smith E, Bargen CDV, Repasky MP (2016) AutoQSAR: an automated machine learning tool for best-practice QSAR modeling. Fut Med Chem 8:1825\u20131839","journal-title":"Fut Med Chem"},{"key":"256_CR24","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-015-0058-6","author":"P Carri\u00f3","year":"2015","unstructured":"Carri\u00f3 P, L\u00f3pez O, Sanz F, Pastor M (2015) eTOXlab, an open source modeling framework for implementing predictive models in production environments. J Cheminform. https:\/\/doi.org\/10.1186\/s13321-015-0058-6","journal-title":"J Cheminform"},{"key":"256_CR25","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1007\/s10822-011-9440-2","volume":"25","author":"K Pandey","year":"2011","unstructured":"Pandey K, Rupp M (2011) Online chemical modeling environment (OCHEM): web platform for data storage, model development and publishing of chemical information. J Comput Aided Mol Des 25:533\u2013554. https:\/\/doi.org\/10.1007\/s10822-011-9440-2","journal-title":"J Comput Aided Mol Des"},{"key":"256_CR26","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1007\/s10822-013-9648-4","volume":"27","author":"R Cox","year":"2013","unstructured":"Cox R, Green DVS, Luscombe CN, Malcolm N, Pickett SD (2013) QSAR workbench: automating QSAR modeling to drive compound design. J Comput Aided Mol design 27:321\u2013336. https:\/\/doi.org\/10.1007\/s10822-013-9648-4","journal-title":"J Comput Aided Mol design"},{"issue":"5","key":"256_CR27","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1021\/ja025304v","volume":"125","author":"JM Stevenson","year":"2003","unstructured":"Stevenson JM, Mulready PD (2003) Pipeline Pilot 2.1. J Am Chem Soc 125(5):1437\u20131438","journal-title":"J Am Chem Soc"},{"key":"256_CR28","unstructured":"Green DVS, Pickett SD, Keefer CE, Bizon C, Woody N, Chakravorty S (2008) Automated predictive modelling: modeller\u2019s utopia or fools\u2019 gold? http:\/\/www.soci.org\/News\/Fine-Chemoinformatics-SAR . Accessed 19 Dec 2017"},{"issue":"18","key":"256_CR29","doi-asserted-by":"publisher","first-page":"1965","DOI":"10.2174\/156802612804910331","volume":"12","author":"MP Mazanetz","year":"2012","unstructured":"Mazanetz MP, Marmon RJ, Reisser CBT, Morao I (2012) Drug discovery applications for KNIME: an open source data mining platform. Curr Top Med Chem 12(18):1965\u201379. https:\/\/doi.org\/10.2174\/156802612804910331","journal-title":"Curr Top Med Chem"},{"issue":"2","key":"256_CR30","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1021\/acs.chemrestox.5b00480","volume":"29","author":"CL Mellor","year":"2016","unstructured":"Mellor CL, Steinmetz FP, Cronin MTD (2016) Using molecular initiating events to develop a structural alert based screening workflow for nuclear receptor ligands associated with hepatic steatosis. Chem Res Toxicol 29(2):203\u2013212. https:\/\/doi.org\/10.1021\/acs.chemrestox.5b00480","journal-title":"Chem Res Toxicol"},{"key":"256_CR31","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-015-0108-0","author":"Y Gilad","year":"2015","unstructured":"Gilad Y, Nadassy K, Senderowitz H (2015) A reliable computational workflow for the selection of optimal screening libraries. J Cheminform. https:\/\/doi.org\/10.1186\/s13321-015-0108-0","journal-title":"J Cheminform"},{"key":"256_CR32","doi-asserted-by":"publisher","unstructured":"Nicola G, Berthold MR, Hedrick MP, Gilson MK (2015) Connecting proteins with drug-like compounds: open source drug discovery workflows with BindingDB and KNIME. https:\/\/doi.org\/10.1093\/database\/bav087","DOI":"10.1093\/database\/bav087"},{"issue":"WEB. SERV. ISS.","key":"256_CR33","doi-asserted-by":"publisher","first-page":"729","DOI":"10.1093\/nar\/gkl320","volume":"34","author":"D Hull","year":"2006","unstructured":"Hull D, Wolstencroft K, Stevens R, Goble C, Pocock MR, Li P, Oinn T (2006) Taverna: a tool for building and running workflows of services. Nucleic Acids Res 34(WEB. SERV. ISS.):729\u2013732. https:\/\/doi.org\/10.1093\/nar\/gkl320","journal-title":"Nucleic Acids Res"},{"issue":"10","key":"256_CR34","doi-asserted-by":"publisher","first-page":"1451","DOI":"10.1101\/gr.4086505","volume":"15","author":"B Giardine","year":"2005","unstructured":"Giardine B, Riemer C, Hardison RC, Burhans R, Elnitski L, Shah P, Zhang Y, Blankenberg D, Albert I, Taylor J, Miller W, Kent WJ, Nekrutenko A (2005) Galaxy: a platform for interactive large-scale genome analysis. Genome Res 15(10):1451\u20131455. https:\/\/doi.org\/10.1101\/gr.4086505","journal-title":"Genome Res"},{"key":"256_CR35","doi-asserted-by":"publisher","unstructured":"Altintas I, Berkley C, Jaeger E, Jones M, Ludascher B, Mock S (2004) Kepler: an extensible system for design and execution of scientific workflows. In: 16th international conference on scientific and statistical database management. Petros Nomikos conference center, Santorini Island, Greece I, pp 423\u2013424. https:\/\/doi.org\/10.1109\/SSDM.2004.1311241","DOI":"10.1109\/SSDM.2004.1311241"},{"issue":"3","key":"256_CR36","doi-asserted-by":"publisher","first-page":"1033","DOI":"10.1016\/S1053-8119(03)00185-X","volume":"19","author":"DE Rex","year":"2003","unstructured":"Rex DE, Ma JQ, Toga AW (2003) The LONI pipeline processing environment. NeuroImage 19(3):1033\u20131048. https:\/\/doi.org\/10.1016\/S1053-8119(03)00185-X","journal-title":"NeuroImage"},{"issue":"1","key":"256_CR37","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1145\/1656274.1656280","volume":"11","author":"MR Berthold","year":"2009","unstructured":"Berthold MR, Cebron N, Dill F, Gabriel TR, K\u00f6tter T, Meinl T, Ohl P, Thiel K, Wiswedel B (2009) KNIME\u2014the Konstanz information miner. SIGKDD Explor 11(1):26\u201331. https:\/\/doi.org\/10.1145\/1656274.1656280","journal-title":"SIGKDD Explor"},{"issue":"2","key":"256_CR38","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1021\/ci025584y","volume":"43","author":"C Steinbeck","year":"2003","unstructured":"Steinbeck C, Han Y, Kuhn S, Horlacher O, Luttmann E, Willighagen E (2003) The Chemistry Development Kit (CDK): an open-source Java library for chemo- and bioinformatics. J Chem Inf Comput Sci 43(2):493\u2013500. https:\/\/doi.org\/10.1021\/ci025584y","journal-title":"J Chem Inf Comput Sci"},{"key":"256_CR39","unstructured":"Landrum G (2017) RDKit Documentation. Release 2017, pp 1\u2013125. http:\/\/www.rdkit.org . Accessed 19 Dec 2017"},{"issue":"7","key":"256_CR40","doi-asserted-by":"publisher","first-page":"1739","DOI":"10.1021\/jm0306430","volume":"47","author":"RA Friesner","year":"2004","unstructured":"Friesner RA, Banks JL, Murphy RB, Halgren TA, Klicic JJ, Mainz DT, Repasky MP, Knoll EH, Shelley M, Perry JK, Shaw DE, Francis P, Shenkin PS (2004) Glide: a new approach for rapid, accurate docking and scoring. 1. method and assessment of docking accuracy. J Med Chem 47(7):1739\u20131749. https:\/\/doi.org\/10.1021\/jm0306430 . arXiv:1011.1669v3","journal-title":"J Med Chem"},{"issue":"5","key":"256_CR41","doi-asserted-by":"publisher","first-page":"370","DOI":"10.1111\/j.1747-0285.2006.00384.x","volume":"67","author":"SL Dixon","year":"2006","unstructured":"Dixon SL, Smondyrev AM, Rao SN (2006) PHASE: a novel approach to pharmacophore modeling and 3D database searching. Chem Biol Drug Des 67(5):370\u2013372. https:\/\/doi.org\/10.1111\/j.1747-0285.2006.00384.x","journal-title":"Chem Biol Drug Des"},{"issue":"D1","key":"256_CR42","doi-asserted-by":"publisher","first-page":"1083","DOI":"10.1093\/nar\/gkt1031","volume":"42","author":"AP Bento","year":"2014","unstructured":"Bento AP, Gaulton A, Hersey A, Bellis LJ, Chambers J, Davies M, Kr\u00fcger FA, Light Y, Mak L, McGlinchey S, Nowotka M, Papadatos G, Santos R, Overington JP (2014) The ChEMBL bioactivity database: an update. Nucleic Acids Res 42(D1):1083\u20131090. https:\/\/doi.org\/10.1093\/nar\/gkt1031","journal-title":"Nucleic Acids Res"},{"issue":"21\u201322","key":"256_CR43","doi-asserted-by":"publisher","first-page":"1188","DOI":"10.1016\/j.drudis.2012.05.016","volume":"17","author":"AJ Williams","year":"2012","unstructured":"Williams AJ, Harland L, Groth P, Pettifer S, Chichester C, Willighagen EL, Evelo CT, Blomberg N, Ecker G, Goble C, Mons B (2012) Open PHACTS: semantic interoperability for drug discovery. Drug Discov Today 17(21\u201322):1188\u20131198. https:\/\/doi.org\/10.1016\/j.drudis.2012.05.016","journal-title":"Drug Discov Today"},{"issue":"12","key":"256_CR44","doi-asserted-by":"publisher","first-page":"4977","DOI":"10.1021\/jm4004285","volume":"57","author":"A Cherkasov","year":"2015","unstructured":"Cherkasov A, Muratov EN, Fourches D, Varnek A, Igor I, Cronin M, Dearden J, Gramatica P, Martin YC, Consonni V, Kuz VE, Cramer R (2015) QSAR modeling: where have you been? Where are you going to? J Med Chem 57(12):4977\u20135010. https:\/\/doi.org\/10.1021\/jm4004285.QSAR","journal-title":"J Med Chem"},{"issue":"2","key":"256_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1758-2946-5-9","volume":"5","author":"AL Teixeira","year":"2013","unstructured":"Teixeira AL, Leal JP, Falcao AO (2013) Random forests for feature selection in QSPR models\u2014an application for predicting standard enthalpy of formation of hydrocarbons. J Cheminform 5(2):1. https:\/\/doi.org\/10.1186\/1758-2946-5-9","journal-title":"J Cheminform"},{"issue":"June","key":"256_CR46","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1093\/nar\/gkp456","volume":"37","author":"Y Wang","year":"2009","unstructured":"Wang Y, Xiao J, Suzek TO, Zhang J, Wang J, Bryant SH (2009) PubChem: a public information system for analyzing bioactivities of small molecules. Nucleic Acids Res 37(June):623\u2013633. https:\/\/doi.org\/10.1093\/nar\/gkp456","journal-title":"Nucleic Acids Res"},{"key":"256_CR47","doi-asserted-by":"publisher","unstructured":"Wang Y, Xiao J, Suzek TO, Zhang J, Wang J, Zhou Z, Han L, Karapetyan K, Dracheva S, Shoemaker BA, Bolton E, Gindulyte A, Bryant SH (2012) PubChem\u2019s BioAssay Database 40 (December 2011). https:\/\/doi.org\/10.1093\/nar\/gkr1132","DOI":"10.1093\/nar\/gkr1132"},{"key":"256_CR48","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1177\/107385840000600408","volume":"6","author":"BL Roth","year":"2000","unstructured":"Roth BL, Lopez E, Patel S, Kroeze WK (2000) The multiplicity of serotonin receptors: uselessly diverse molecules or an embarrassment of riches? Neuroscientist 6:252\u2013262","journal-title":"Neuroscientist"},{"issue":"7","key":"256_CR49","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1021\/ci100176x","volume":"50","author":"D Fourches","year":"2010","unstructured":"Fourches D, Muratov E, Tropsha a (2010) Trust but verify: on the importance of chemical structure curation in chemoinformatics and QSAR modeling research. J Chem Inf Model 50(7):1189\u20131204","journal-title":"J Chem Inf Model"},{"issue":"11\u201312","key":"256_CR50","doi-asserted-by":"publisher","first-page":"1337","DOI":"10.1002\/qsar.200810084","volume":"27","author":"D Young","year":"2008","unstructured":"Young D, Martin T, Venkatapathy R, Harten P (2008) Are the chemical structures in your QSAR correct? QSAR Comb Sci 27(11\u201312):1337\u20131345. https:\/\/doi.org\/10.1002\/qsar.200810084","journal-title":"QSAR Comb Sci"},{"issue":"9\u201310","key":"256_CR51","doi-asserted-by":"publisher","first-page":"827","DOI":"10.1002\/minf.201300076","volume":"32","author":"D Fourches","year":"2013","unstructured":"Fourches D, Tropsha A (2013) Using graph indices for the analysis and comparison of chemical datasets. Mol Inform 32(9\u201310):827\u2013842. https:\/\/doi.org\/10.1002\/minf.201300076","journal-title":"Mol Inform"},{"issue":"1","key":"256_CR52","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1021\/ci400572x","volume":"54","author":"A Golbraikh","year":"2014","unstructured":"Golbraikh A, Muratov E, Fourches D, Tropsha A (2014) Data set modelability by QSAR. J Chem Inf Model 54(1):1\u20134. https:\/\/doi.org\/10.1021\/ci400572x arXiv:NIHMS150003","journal-title":"J Chem Inf Model"},{"key":"256_CR53","doi-asserted-by":"publisher","unstructured":"Golbraikh A, Fourches D, Sedykh A, Muratov E, Liepina I, Tropsha A (2014) Modelability criteria: statistical characteristics estimating feasibility to build predictive QSAR models for a dataset. In: Practical aspects of computational chemistry II. Springer, Boston. pp. 187\u2013230 https:\/\/doi.org\/10.1007\/978-1-4899-7445-7_7","DOI":"10.1007\/978-1-4899-7445-7_7"},{"issue":"1","key":"256_CR54","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1021\/acs.jcim.5b00539","volume":"56","author":"G Marcou","year":"2016","unstructured":"Marcou G, Horvath D, Varnek A (2016) Kernel target alignment parameter: a new modelability measure for regression tasks. J Chem Inf Model 56(1):6\u201311. https:\/\/doi.org\/10.1021\/acs.jcim.5b00539","journal-title":"J Chem Inf Model"},{"key":"256_CR55","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.540130415","volume-title":"Concepts and applications of molecular similarity","author":"MA Johnson","year":"1990","unstructured":"Johnson MA, Maggiora GM (1990) Concepts and applications of molecular similarity. Wiley, New York. https:\/\/doi.org\/10.1002\/jcc.540130415"},{"issue":"5","key":"256_CR56","doi-asserted-by":"publisher","first-page":"1218","DOI":"10.1021\/ci010291a","volume":"41","author":"A Yasri","year":"2001","unstructured":"Yasri A, Hartsough D (2001) Toward an optimal procedure for variable selection and QSAR model building. J Chem Inf Comput Sci 41(5):1218\u20131227. https:\/\/doi.org\/10.1021\/ci010291a","journal-title":"J Chem Inf Comput Sci"},{"issue":"December 2012","key":"256_CR57","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1080\/10629360902949567","volume":"20","author":"JC Dearden","year":"2009","unstructured":"Dearden JC, Cronin MTD, Kaiser KLE (2009) How not to develop a quantitative structure\u2013activity or structure\u2013property relationship (QSAR\/QSPR). SAR QSAR Environ Res 20(December 2012):241\u2013266. https:\/\/doi.org\/10.1080\/10629360902949567","journal-title":"SAR QSAR Environ Res"},{"key":"256_CR58","doi-asserted-by":"publisher","first-page":"688","DOI":"10.1002\/9783527613106","volume-title":"Handbook of molecular descriptors","author":"R Todeschini","year":"2008","unstructured":"Todeschini R, Consonni V (2008) Handbook of molecular descriptors. Wiley-VCH Verlag GmbH, Weinheim, p 688. https:\/\/doi.org\/10.1002\/9783527613106"},{"key":"256_CR59","doi-asserted-by":"publisher","unstructured":"Karelson M (2000) Molecular descriptors in QSAR\/QSPR (March), 35168. https:\/\/doi.org\/10.1002\/1521-3773(20010316)40:6<1136::AID-ANIE1136>3.0.CO;2-M","DOI":"10.1002\/1521-3773(20010316)40:6<1136::AID-ANIE1136>3.0.CO;2-M"},{"issue":"5","key":"256_CR60","doi-asserted-by":"publisher","first-page":"658","DOI":"10.1590\/S0100-40422001000500013","volume":"24","author":"AC Gaudio","year":"2001","unstructured":"Gaudio AC, Zandonade E (2001) Proposition, validation and analysis of QSAR models. Quim Nova 24(5):658\u2013671. https:\/\/doi.org\/10.1590\/S0100-40422001000500013","journal-title":"Quim Nova"},{"issue":"6","key":"256_CR61","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1590\/S0103-50532002000600004","volume":"13","author":"MMC Ferreira","year":"2002","unstructured":"Ferreira MMC (2002) Multivariate QSAR. J Braz Chem Soc 13(6):742\u2013753. https:\/\/doi.org\/10.1590\/S0103-50532002000600004","journal-title":"J Braz Chem Soc"},{"issue":"1","key":"256_CR62","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1021\/ci0342472","volume":"44","author":"DM Hawkins","year":"2004","unstructured":"Hawkins DM (2004) The problem of overfitting. J Chem Inf Comput Sci 44(1):1\u201312. https:\/\/doi.org\/10.1021\/ci0342472","journal-title":"J Chem Inf Comput Sci"},{"issue":"5","key":"256_CR63","doi-asserted-by":"publisher","first-page":"1978","DOI":"10.3390\/ijms10051978","volume":"10","author":"P Liu","year":"2009","unstructured":"Liu P, Long W (2009) Current mathematical methods used in QSAR\/QSPR studies. Int J Mol Sci 10(5):1978\u20131998. https:\/\/doi.org\/10.3390\/ijms10051978","journal-title":"Int J Mol Sci"},{"issue":"18","key":"256_CR64","doi-asserted-by":"publisher","first-page":"1606","DOI":"10.2174\/156802608786786552","volume":"8","author":"MP Gonz\u00e1lez","year":"2008","unstructured":"Gonz\u00e1lez MP, Ter\u00e1n C, Sa\u00edz-Urra L, Teijeira M (2008) Variable selection methods in QSAR: an overview. Curr Top Med Chem 8(18):1606\u20131627. https:\/\/doi.org\/10.2174\/156802608786786552","journal-title":"Curr Top Med Chem"},{"issue":"14","key":"256_CR65","doi-asserted-by":"publisher","first-page":"2225","DOI":"10.1016\/j.patrec.2010.03.014","volume":"31","author":"R Genuer","year":"2012","unstructured":"Genuer R, Poggi J-M, Tuleau-Malot C (2012) Variable selection using random forests. Pattern Recognit Lett 31(14):2225\u20132236","journal-title":"Pattern Recognit Lett"},{"key":"256_CR66","doi-asserted-by":"publisher","DOI":"10.1002\/9783527645121","volume-title":"Statistical modelling of molecular descriptors in QSAR\/QSPR","author":"M Dehmer","year":"2012","unstructured":"Dehmer M, Varmuza K, Bonchev D, Emmert-streib F (2012) Statistical modelling of molecular descriptors in QSAR\/QSPR. Wiley-VCH Verlag GmbH, Weinheim"},{"issue":"12","key":"256_CR67","doi-asserted-by":"publisher","first-page":"1172","DOI":"10.1002\/qsar.200610093","volume":"25","author":"J Gola","year":"2006","unstructured":"Gola J, Obrezanova O, Champness E, Segall M (2006) ADMET property prediction: the state of the art and current challenges. QSAR Comb Sci 25(12):1172\u20131180. https:\/\/doi.org\/10.1002\/qsar.200610093","journal-title":"QSAR Comb Sci"},{"issue":"3","key":"256_CR68","doi-asserted-by":"publisher","first-page":"213","DOI":"10.2174\/138620706776055539","volume":"9","author":"AZ Dudek","year":"2006","unstructured":"Dudek AZ, Arodz T, Galvez J (2006) Computational methods in developing quantitative structure\u2013activity relationships (QSAR): a review. Comb Chem High Throughput Screen 9(3):213\u2013228. https:\/\/doi.org\/10.2174\/138620706776055539","journal-title":"Comb Chem High Throughput Screen"},{"key":"256_CR69","first-page":"32","volume":"6729","author":"R Genuer","year":"2008","unstructured":"Genuer R, Poggi J-M, Tuleau C (2008) Random forests: some methodological insights. INRIA 6729:32","journal-title":"INRIA"},{"issue":"1","key":"256_CR70","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman L (2001) Random forests. Mach Learn 45(1):5\u201332. https:\/\/doi.org\/10.1023\/A:1010933404324","journal-title":"Mach Learn"},{"key":"256_CR71","first-page":"1063","volume":"13","author":"G Biau","year":"2012","unstructured":"Biau G (2012) Analysis of a random forests model. J Mach Learn Res 13:1063\u20131095","journal-title":"J Mach Learn Res"},{"key":"256_CR72","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1186\/1471-2105-9-307","volume":"9","author":"C Strobl","year":"2008","unstructured":"Strobl C, Boulesteix A-L, Kneib T, Augustin T, Zeileis A (2008) Conditional variable importance for random forests. BMC Bioinformatics 9:307. https:\/\/doi.org\/10.1186\/1471-2105-9-307","journal-title":"BMC Bioinformatics"},{"key":"256_CR73","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1186\/1471-2105-11-110","volume":"11","author":"KK Nicodemus","year":"2010","unstructured":"Nicodemus KK, Malley JD, Strobl C, Ziegler A (2010) The behaviour of random forest permutation-based variable importance measures under predictor correlation. BMC Bioinform 11:110","journal-title":"BMC Bioinform"},{"key":"256_CR74","doi-asserted-by":"publisher","unstructured":"Yee LC, Wei YC (2012) Current modeling methods used in QSAR\/QSPR. Statistical modeling of molecular descriptor in QSAR\/QSPR, pp 1\u201331. https:\/\/doi.org\/10.3390\/ijms10051978","DOI":"10.3390\/ijms10051978"},{"issue":"6","key":"256_CR75","doi-asserted-by":"publisher","first-page":"1413","DOI":"10.1021\/ci200409x","volume":"52","author":"A Varnek","year":"2012","unstructured":"Varnek A, Baskin I (2012) Machine learning methods for property prediction in chemoinformatics: Quo vadis? J Chem Inf Model 52(6):1413\u20131437. https:\/\/doi.org\/10.1021\/ci200409x","journal-title":"J Chem Inf Model"},{"issue":"25","key":"256_CR76","doi-asserted-by":"publisher","first-page":"4289","DOI":"10.2174\/092986712802884259","volume":"19","author":"JC Gertrudes","year":"2012","unstructured":"Gertrudes JC, Maltarollo VG, Silva Ra, Oliveira PR, Hon\u00f3rio KM, da Silva aBF (2012) Machine learning techniques and drug design. Curr Med Chem 19(25):4289\u201397. https:\/\/doi.org\/10.2174\/092986712802884259","journal-title":"Curr Med Chem"},{"issue":"16","key":"256_CR77","doi-asserted-by":"publisher","first-page":"1913","DOI":"10.2174\/1568026614666140929124203","volume":"14","author":"D Dobchev","year":"2014","unstructured":"Dobchev D, Pillai G, Karelson M (2014) In silico machine learning methods in drug development. Curr Top Med Chem 14(16):1913\u20131922. https:\/\/doi.org\/10.2174\/1568026614666140929124203","journal-title":"Curr Top Med Chem"},{"issue":"1","key":"256_CR78","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1186\/1471-2105-9-319","volume":"9","author":"A Statnikov","year":"2008","unstructured":"Statnikov A, Wang L, Aliferis C (2008) A comprehensive comparison of random forests and support vector machines for microarray-based cancer classification. BMC Bioinform 9(1):319. https:\/\/doi.org\/10.1186\/1471-2105-9-319","journal-title":"BMC Bioinform"},{"issue":"3","key":"256_CR79","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1023\/A:1022627411411","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes C, Vapnik V (1995) Support-vector networks. Mach Learn 20(3):273\u2013297. https:\/\/doi.org\/10.1023\/A:1022627411411","journal-title":"Mach Learn"},{"key":"256_CR80","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1186\/1471-2210-10-6","volume":"10","author":"A-N Spiess","year":"2010","unstructured":"Spiess A-N, Neumeyer N (2010) An evaluation of R2 as an inadequate measure for nonlinear models in pharmacological and biochemical research: a Monte Carlo approach. BMC Pharmacol 10:6. https:\/\/doi.org\/10.1186\/1471-2210-10-6","journal-title":"BMC Pharmacol"},{"issue":"5","key":"256_CR81","doi-asserted-by":"publisher","first-page":"1984","DOI":"10.1021\/ci060132x","volume":"46","author":"S Zhang","year":"2006","unstructured":"Zhang S, Golbraikh A, Oloff S, Kohn H, Tropsha A (2006) A novel automated lazy learning QSAR (ALL-QSAR) approach: method development, applications, and virtual screening of chemical databases using validated ALL-QSAR models. J Chem Inf Model 46(5):1984\u20131995. https:\/\/doi.org\/10.1021\/ci060132x","journal-title":"J Chem Inf Model"},{"issue":"5","key":"256_CR82","doi-asserted-by":"publisher","first-page":"397","DOI":"10.1111\/j.1747-0285.2010.01029.x","volume":"76","author":"G Melagraki","year":"2010","unstructured":"Melagraki G, Afantitis A, Sarimveis H, Igglessi-Markopoulou O, Koutentis PA, Kollias G (2010) In silico exploration for identifying structure\u2013activity relationship of MEK inhibition and oral bioavailability for isothiazole derivatives. Chem Biol Drug Des 76(5):397\u2013406. https:\/\/doi.org\/10.1111\/j.1747-0285.2010.01029.x","journal-title":"Chem Biol Drug Des"},{"key":"256_CR83","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1042\/bj3570263","volume":"357","author":"A Cortes","year":"2001","unstructured":"Cortes A, Cascante M, Cardenas ML, Cornish-Bowden A (2001) Relationships between inhibition constants, inhibitor concentrations for 50% inhibition and types of inhibition: new ways of analysing data. Biochem J 357:263\u2013268","journal-title":"Biochem J"},{"issue":"4","key":"256_CR84","doi-asserted-by":"publisher","first-page":"597","DOI":"10.1124\/pr.55.4.4","volume":"55","author":"RR Neubig","year":"2003","unstructured":"Neubig RR, Spedding M, Kenakin T, Christopoulos A (2003) International union of pharmacology committee on receptor nomenclature and drug classification. XXXVIII. Update on terms and symbols in quantitative pharmacology. Pharmacol Rev 55(4):597\u2013606. https:\/\/doi.org\/10.1124\/pr.55.4.4","journal-title":"Pharmacol Rev"},{"issue":"1","key":"256_CR85","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1021\/ci600332j","volume":"47","author":"CL Brace","year":"2007","unstructured":"Brace CL, Melville JL, Pickett SD, Hirst JD (2007) Contemporary QSAR classifiers compared. J Chem Inf Model 47(1):219\u2013227. https:\/\/doi.org\/10.1021\/ci600332j","journal-title":"J Chem Inf Model"},{"key":"256_CR86","unstructured":"R Development Core Team (2011) R: a language and environment for statistical computing. R Foundation for Statistical Computing. http:\/\/www.R-project.org\/"},{"issue":"8","key":"256_CR87","doi-asserted-by":"publisher","first-page":"1291","DOI":"10.1016\/j.drudis.2016.06.013","volume":"21","author":"AU Khan","year":"2016","unstructured":"Khan AU (2016) Descriptors and their selection methods in QSAR analysis: paradigm for drug design. Drug Discov Today 21(8):1291\u20131302","journal-title":"Drug Discov Today"},{"issue":"2","key":"256_CR88","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1016\/j.ejmech.2010.11.029","volume":"46","author":"A Afantitis","year":"2011","unstructured":"Afantitis A, Melagraki G, Koutentis PA, Sarimveis H, Kollias G (2011) Ligand\u2014based virtual screening procedure for the prediction and the identification of novel $$\\beta$$ \u03b2 -amyloid aggregation inhibitors using Kohonen maps and Counterpropagation Artificial Neural Networks. Eur J Med Chem 46(2):497\u2013508. https:\/\/doi.org\/10.1016\/j.ejmech.2010.11.029","journal-title":"Eur J Med Chem"},{"key":"256_CR89","doi-asserted-by":"publisher","unstructured":"Viira B, Garc\u00eda-Sosa AT, Maran U (2017) QDB archive #202. QsarDB repository. https:\/\/doi.org\/10.15152\/QDB.202","DOI":"10.15152\/QDB.202"},{"key":"256_CR90","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1016\/j.jmgm.2017.06.019","volume":"76","author":"B Viira","year":"2017","unstructured":"Viira B, Garc\u00eda-Sosa AT, Maran U (2017) Chemical structure and correlation analysis of HIV-1 NNRT and NRT inhibitors and database-curated, published inhibition constants with chemical structure in diverse datasets. J Mol Graph Model 76:205\u2013223. https:\/\/doi.org\/10.1016\/j.jmgm.2017.06.019","journal-title":"J Mol Graph Model"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-017-0256-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13321-017-0256-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-017-0256-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,25]],"date-time":"2020-10-25T17:49:47Z","timestamp":1603648187000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-017-0256-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,1,16]]},"references-count":90,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["256"],"URL":"https:\/\/doi.org\/10.1186\/s13321-017-0256-5","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,1,16]]},"assertion":[{"value":"31 May 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2017","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 January 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"1"}}