{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:27:26Z","timestamp":1772119646630,"version":"3.50.1"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2023,10,7]],"date-time":"2023-10-07T00:00:00Z","timestamp":1696636800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,7]],"date-time":"2023-10-07T00:00:00Z","timestamp":1696636800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100002322","name":"CAPES","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003593","name":"CNPq","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001807","name":"FAPESP","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100001807","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100004901","name":"FAPEMIG","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100004901","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Pr\u00f3-Reitoria de Pesquisa of the Universidade Federal de Minas Gerais"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Comput Aided Mol Des"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s10822-023-00536-y","type":"journal-article","created":{"date-parts":[[2023,10,7]],"date-time":"2023-10-07T06:02:34Z","timestamp":1696658554000},"page":"735-754","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["MASSA Algorithm: an automated rational sampling of training and test subsets for QSAR modeling"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7480-7198","authenticated-orcid":false,"given":"Gabriel Corr\u00eaa","family":"Ver\u00edssimo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5183-7906","authenticated-orcid":false,"given":"Simone Queiroz","family":"Pantale\u00e3o","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8089-2958","authenticated-orcid":false,"given":"Philipe de Olveira","family":"Fernandes","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0861-6681","authenticated-orcid":false,"given":"Jadson Castro","family":"Gertrudes","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6933-7590","authenticated-orcid":false,"given":"Thales","family":"Kronenberger","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6938-0676","authenticated-orcid":false,"given":"Kathia Maria","family":"Honorio","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9675-5907","authenticated-orcid":false,"given":"Vin\u00edcius Gon\u00e7alves","family":"Maltarollo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,7]]},"reference":[{"key":"536_CR1","doi-asserted-by":"publisher","first-page":"10520","DOI":"10.1021\/acs.chemrev.8b00728","volume":"119","author":"X Yang","year":"2019","unstructured":"Yang X, Wang Y, Byrne R et al (2019) Concepts of artificial intelligence for computer-assisted drug discovery. Chem Rev 119:10520\u201310594. https:\/\/doi.org\/10.1021\/acs.chemrev.8b00728","journal-title":"Chem Rev"},{"key":"536_CR2","doi-asserted-by":"publisher","first-page":"1241","DOI":"10.1007\/s00044-014-1193-8","volume":"24","author":"VH Masand","year":"2015","unstructured":"Masand VH, Mahajan DT, Nazeruddin GM et al (2015) Effect of information leakage and method of splitting (rational and random) on external predictive ability and behavior of different statistical parameters of QSAR model. Med Chem Res 24:1241\u20131264. https:\/\/doi.org\/10.1007\/s00044-014-1193-8","journal-title":"Med Chem Res"},{"key":"536_CR3","doi-asserted-by":"publisher","first-page":"1011","DOI":"10.1080\/1062936X.2017.1397056","volume":"28","author":"MF Andrada","year":"2017","unstructured":"Andrada MF, Vega-Hissi EG, Estrada MR, Garro Martinez JC (2017) Impact assessment of the rational selection of training and test sets on the predictive ability of QSAR models. SAR QSAR Environ Res 28:1011\u20131023. https:\/\/doi.org\/10.1080\/1062936X.2017.1397056","journal-title":"SAR QSAR Environ Res"},{"key":"536_CR4","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1517\/17460441.1.2.103","volume":"1","author":"DE Clark","year":"2006","unstructured":"Clark DE (2006) What has computer-aided molecular design ever done for drug discovery? Expert Opin Drug Discov 1:103\u2013110. https:\/\/doi.org\/10.1517\/17460441.1.2.103","journal-title":"Expert Opin Drug Discov"},{"key":"536_CR5","unstructured":"International Council for Harmonisation of Technical Requirements for Pharmaceuticals for Human Use (2017) Assessment and Control of DNA Reactive (Mutagenic) Impurities in Pharmaceuticals to Limit Potential Carcinogenic Risk"},{"key":"536_CR6","doi-asserted-by":"publisher","first-page":"2570","DOI":"10.1021\/ci300338w","volume":"52","author":"TM Martin","year":"2012","unstructured":"Martin TM, Harten P, Young DM et al (2012) Does Rational selection of training and test sets improve the outcome of QSAR modeling? J Chem Inf Model 52:2570\u20132578. https:\/\/doi.org\/10.1021\/ci300338w","journal-title":"J Chem Inf Model"},{"key":"536_CR7","doi-asserted-by":"publisher","first-page":"4977","DOI":"10.1021\/jm4004285","volume":"57","author":"A Cherkasov","year":"2014","unstructured":"Cherkasov A, Muratov EN, Fourches D et al (2014) QSAR modeling: where have you been? Where are you going to? J Med Chem 57:4977\u20135010. https:\/\/doi.org\/10.1021\/jm4004285","journal-title":"J Med Chem"},{"key":"536_CR8","doi-asserted-by":"publisher","first-page":"3525","DOI":"10.1039\/D0CS00098A","volume":"49","author":"EN Muratov","year":"2020","unstructured":"Muratov EN, Bajorath J, Sheridan RP et al (2020) QSAR without borders. Chem Soc Rev 49:3525\u20133564. https:\/\/doi.org\/10.1039\/D0CS00098A","journal-title":"Chem Soc Rev"},{"key":"536_CR9","doi-asserted-by":"publisher","first-page":"795","DOI":"10.1007\/s11224-011-9757-4","volume":"22","author":"T Puzyn","year":"2011","unstructured":"Puzyn T, Mostrag-Szlichtyng A, Gajewicz A et al (2011) Investigating the influence of data splitting on the predictive ability of QSAR\/QSPR models. Struct Chem 22:795\u2013804. https:\/\/doi.org\/10.1007\/s11224-011-9757-4","journal-title":"Struct Chem"},{"key":"536_CR10","doi-asserted-by":"publisher","first-page":"168","DOI":"10.1002\/cem.1310","volume":"24","author":"KH Esbensen","year":"2010","unstructured":"Esbensen KH, Geladi P (2010) Principles of proper validation: use and abuse of re-sampling for validation. J Chemom 24:168\u2013187. https:\/\/doi.org\/10.1002\/cem.1310","journal-title":"J Chemom"},{"key":"536_CR11","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1021\/ci025626i","volume":"43","author":"DM Hawkins","year":"2003","unstructured":"Hawkins DM, Basak SC, Mills D (2003) Assessing model fit by cross-validation. J Chem Inf Comput Sci 43:579\u2013586. https:\/\/doi.org\/10.1021\/ci025626i","journal-title":"J Chem Inf Comput Sci"},{"key":"536_CR12","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1023\/A:1021372108686","volume":"5","author":"A Golbraikh","year":"2000","unstructured":"Golbraikh A, Tropsha A (2000) Predictive QSAR modeling based on diversity sampling of experimental datasets for the training and test set selection. Mol Divers 5:231\u2013243. https:\/\/doi.org\/10.1023\/A:1021372108686","journal-title":"Mol Divers"},{"key":"536_CR13","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1023\/A:1025386326946","volume":"17","author":"A Golbraikh","year":"2003","unstructured":"Golbraikh A, Shen M, Xiao Z et al (2003) Rational selection of training and test sets for the development of validated QSAR models. J Comput Aided Mol Des 17:241\u2013253. https:\/\/doi.org\/10.1023\/A:1025386326946","journal-title":"J Comput Aided Mol Des"},{"key":"536_CR14","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/0169-7439(95)00077-1","volume":"33","author":"W Wu","year":"1996","unstructured":"Wu W, Walczak B, Massart DL et al (1996) Artificial neural networks in classification of NIR spectral data: design of the training set. Chemom Intell Lab Syst 33:35\u201346. https:\/\/doi.org\/10.1016\/0169-7439(95)00077-1","journal-title":"Chemom Intell Lab Syst"},{"key":"536_CR15","doi-asserted-by":"publisher","first-page":"4378","DOI":"10.1080\/07391102.2017.1417161","volume":"36","author":"T Kronenberger","year":"2018","unstructured":"Kronenberger T, Windsh\u00fcgel B, Wrenger C et al (2018) On the relationship of anthranilic derivatives structure and the FXR (Farnesoid X receptor) agonist activity. J Biomol Struct Dyn 36:4378\u20134391. https:\/\/doi.org\/10.1080\/07391102.2017.1417161","journal-title":"J Biomol Struct Dyn"},{"key":"536_CR16","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1016\/j.jmgm.2019.04.007","volume":"90","author":"GC Ver\u00edssimo","year":"2019","unstructured":"Ver\u00edssimo GC, Menezes Dutra EF, Teotonio Dias AL et al (2019) HQSAR and random forest-based QSAR models for anti-T. vaginalis activities of nitroimidazoles derivatives. J Mol Graph Model 90:180\u2013191. https:\/\/doi.org\/10.1016\/j.jmgm.2019.04.007","journal-title":"J Mol Graph Model"},{"key":"536_CR17","doi-asserted-by":"publisher","first-page":"1436","DOI":"10.1080\/07391102.2016.1185379","volume":"35","author":"RA Gomes","year":"2017","unstructured":"Gomes RA, Genesi GL, Maltarollo VG, Trossini GHG (2017) Quantitative structure\u2013activity relationships (HQSAR, CoMFA, and CoMSIA) studies for COX-2 selective inhibitors. J Biomol Struct Dyn 35:1436\u20131445. https:\/\/doi.org\/10.1080\/07391102.2016.1185379","journal-title":"J Biomol Struct Dyn"},{"key":"536_CR18","doi-asserted-by":"publisher","DOI":"10.1080\/073911021935321","author":"PO de Fernandes","year":"2021","unstructured":"de Fernandes PO, Martins JPA, de Melo EB et al (2021) Quantitative structure-activity relationship and machine learning studies of 2-thiazolylhydrazone derivatives with anti-Cryptococcus neoformans activity. J Biomol Struct Dyn. https:\/\/doi.org\/10.1080\/073911021935321","journal-title":"J Biomol Struct Dyn"},{"key":"536_CR19","doi-asserted-by":"publisher","first-page":"135","DOI":"10.4155\/fmc-2016-0179","volume":"9","author":"T Kronenberger","year":"2017","unstructured":"Kronenberger T, Asse LR, Wrenger C et al (2017) Studies of Staphylococcus aureus FabI inhibitors: fragment-based approach based on holographic structure\u2013activity relationship analyses. Future Med Chem 9:135\u2013151. https:\/\/doi.org\/10.4155\/fmc-2016-0179","journal-title":"Future Med Chem"},{"key":"536_CR20","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1080\/07391102.2019.1574603","volume":"38","author":"GM Ferreira","year":"2020","unstructured":"Ferreira GM, de Magalh\u00e3es JG, Maltarollo VG et al (2020) QSAR studies on the human sirtuin 2 inhibition by non-covalent 7,5,2-anilinobenzamide derivatives. J Biomol Struct Dyn 38:354\u2013363. https:\/\/doi.org\/10.1080\/07391102.2019.1574603","journal-title":"J Biomol Struct Dyn"},{"key":"536_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.4018\/IJQSPR.2019100101","volume":"4","author":"VG Maltarollo","year":"2019","unstructured":"Maltarollo VG (2019) Classification of Staphylococcus aureus FabI inhibitors by machine learning techniques. IJQSPR 4:1\u201314. https:\/\/doi.org\/10.4018\/IJQSPR.2019100101","journal-title":"IJQSPR"},{"key":"536_CR22","doi-asserted-by":"publisher","first-page":"283","DOI":"10.3109\/14756366.2015.1021250","volume":"31","author":"MC Primi","year":"2016","unstructured":"Primi MC, Maltarollo VG, Magalh\u00e3es JG et al (2016) Convergent QSAR studies on a series of NK3 receptor antagonists for schizophrenia treatment. J Enzyme Inhib Med Chem 31:283\u2013294. https:\/\/doi.org\/10.3109\/14756366.2015.1021250","journal-title":"J Enzyme Inhib Med Chem"},{"key":"536_CR23","doi-asserted-by":"publisher","first-page":"eaap7885","DOI":"10.1126\/sciadv.aap7885","volume":"4","author":"M Popova","year":"2018","unstructured":"Popova M, Isayev O, Tropsha A (2018) Deep reinforcement learning for de novo drug design. Sci Adv 4:eaap7885. https:\/\/doi.org\/10.1126\/sciadv.aap7885","journal-title":"Sci Adv"},{"key":"536_CR24","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1038\/s42256-019-0030-7","volume":"1","author":"G Schneider","year":"2019","unstructured":"Schneider G (2019) Mind and machine in drug design. Nat Mach Intell 1:128\u2013130. https:\/\/doi.org\/10.1038\/s42256-019-0030-7","journal-title":"Nat Mach Intell"},{"key":"536_CR25","doi-asserted-by":"publisher","first-page":"1947","DOI":"10.1007\/s10462-021-10058-4","volume":"55","author":"S Dara","year":"2022","unstructured":"Dara S, Dhamercherla S, Jadav SS et al (2022) Machine learning in drug discovery: a review. Artif Intell Rev 55:1947\u20131999. https:\/\/doi.org\/10.1007\/s10462-021-10058-4","journal-title":"Artif Intell Rev"},{"key":"536_CR26","doi-asserted-by":"publisher","first-page":"2538","DOI":"10.1021\/acs.jcim.9b00295","volume":"59","author":"P Ambure","year":"2019","unstructured":"Ambure P, Halder AK, Gonz\u00e1lez D\u00edaz H, Cordeiro MNDS (2019) QSAR-Co: an open source software for developing robust multitasking or multitarget classification-based QSAR models. J Chem Inf Model 59:2538\u20132544. https:\/\/doi.org\/10.1021\/acs.jcim.9b00295","journal-title":"J Chem Inf Model"},{"key":"536_CR27","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1186\/s13321-021-00508-0","volume":"13","author":"AK Halder","year":"2021","unstructured":"Halder AK, Dias Soeiro Cordeiro MN (2021) QSAR-Co-X: an open source toolkit for multitarget QSAR modelling. J Cheminform 13:29. https:\/\/doi.org\/10.1186\/s13321-021-00508-0","journal-title":"J Cheminform"},{"key":"536_CR28","unstructured":"Ver\u00edssimo GC (2021) MASSA Algorithm: Molecular data set sampling for training-test separation"},{"key":"536_CR29","unstructured":"Landrum G (2021) RDkit: 2021_03_3 (Q1 2021) Release"},{"key":"536_CR30","unstructured":"Vos NJ de (2015) KModes categorical clustering library"},{"key":"536_CR31","unstructured":"Python Software Foundation argparse\u2014Parser for command-line options, arguments and sub-commands\u2014Python 3.9.7 documentation. https:\/\/docs.python.org\/3\/library\/argparse.html. Accessed 5 Oct 2021"},{"key":"536_CR32","unstructured":"scikit-learn: machine learning in Python\u2014scikit-learn 1.0 documentation. https:\/\/scikit-learn.org\/stable\/index.html. Accessed 5 Oct 2021"},{"key":"536_CR33","unstructured":"sklearn.decomposition.PCA. In: scikit-learn. https:\/\/www.scikit-learn\/stable\/modules\/generated\/sklearn.decomposition.PCA.html. Accessed 5 Oct 2021"},{"key":"536_CR34","unstructured":"scipy.cluster.hierarchy.linkage\u2014SciPy v1.7.1 Manual. https:\/\/docs.scipy.org\/doc\/scipy\/reference\/generated\/scipy.cluster.hierarchy.linkage.html. Accessed 8 Oct 2021"},{"key":"536_CR35","unstructured":"scipy.cluster.hierarchy.maxdists\u2014SciPy v1.8.0 Manual. https:\/\/docs.scipy.org\/doc\/scipy\/reference\/generated\/scipy.cluster.hierarchy.maxdists.html. Accessed 22 Mar 2022"},{"key":"536_CR36","unstructured":"scipy.cluster.hierarchy.fcluster\u2014SciPy v1.7.1 Manual. https:\/\/docs.scipy.org\/doc\/scipy\/reference\/generated\/scipy.cluster.hierarchy.fcluster.html. Accessed 8 Oct 2021"},{"key":"536_CR37","unstructured":"scipy.cluster.hierarchy.dendrogram\u2014SciPy v1.7.1 Manual. https:\/\/docs.scipy.org\/doc\/scipy\/reference\/generated\/scipy.cluster.hierarchy.dendrogram.html. Accessed 8 Oct 2021"},{"key":"536_CR38","unstructured":"sklearn.model_selection.train_test_split. In: scikit-learn. https:\/\/www.scikit-learn\/stable\/modules\/generated\/sklearn.model_selection.train_test_split.html. Accessed 9 Oct 2021"},{"key":"536_CR39","doi-asserted-by":"publisher","first-page":"5541","DOI":"10.1021\/jm0497141","volume":"47","author":"JJ Sutherland","year":"2004","unstructured":"Sutherland JJ, O\u2019Brien LA, Weaver DF (2004) A Comparison of methods for modeling quantitative structure\u2212activity relationships. J Med Chem 47:5541\u20135554. https:\/\/doi.org\/10.1021\/jm0497141","journal-title":"J Med Chem"},{"key":"536_CR40","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1016\/j.ejmech.2016.03.009","volume":"115","author":"C-J Liu","year":"2016","unstructured":"Liu C-J, Yu S-L, Liu Y-P et al (2016) Synthesis, cytotoxic activity evaluation and HQSAR study of novel isosteviol derivatives as potential anticancer agents. Eur J Med Chem 115:26\u201340. https:\/\/doi.org\/10.1016\/j.ejmech.2016.03.009","journal-title":"Eur J Med Chem"},{"key":"536_CR41","doi-asserted-by":"publisher","first-page":"4609","DOI":"10.1016\/j.bmc.2007.04.015","volume":"15","author":"NF Valadares","year":"2007","unstructured":"Valadares NF, Castilho MS, Polikarpov I, Garratt RC (2007) 2D QSAR studies on thyroid hormone receptor ligands. Bioorg Med Chem 15:4609\u20134617. https:\/\/doi.org\/10.1016\/j.bmc.2007.04.015","journal-title":"Bioorg Med Chem"},{"key":"536_CR42","doi-asserted-by":"publisher","first-page":"3310","DOI":"10.1016\/j.bmcl.2009.04.072","volume":"19","author":"M Ye","year":"2009","unstructured":"Ye M, Dawson MI (2009) Studies of cannabinoid-1 receptor antagonists for the treatment of obesity: hologram QSAR model for biarylpyrazolyl oxadiazole ligands. Bioorg Med Chem Lett 19:3310\u20133315. https:\/\/doi.org\/10.1016\/j.bmcl.2009.04.072","journal-title":"Bioorg Med Chem Lett"},{"key":"536_CR43","doi-asserted-by":"publisher","DOI":"10.1016\/j.colsurfa.2019.124226","volume":"586","author":"L Jiao","year":"2020","unstructured":"Jiao L, Wang Y, Qu L et al (2020) Hologram QSAR study on the critical micelle concentration of Gemini surfactants. Colloids Surf, A 586:124226. https:\/\/doi.org\/10.1016\/j.colsurfa.2019.124226","journal-title":"Colloids Surf, A"},{"key":"536_CR44","unstructured":"Dassault Syst\u00e8mes Biovia Corp (2020) BIOVIA discovery studio visualizer 2021"},{"key":"536_CR45","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1021\/ci100031x","volume":"50","author":"PCD Hawkins","year":"2010","unstructured":"Hawkins PCD, Skillman AG, Warren GL et al (2010) Conformer generation with OMEGA: algorithm and validation using high quality structures from the protein databank and Cambridge structural database. J Chem Inf Model 50:572\u2013584. https:\/\/doi.org\/10.1021\/ci100031x","journal-title":"J Chem Inf Model"},{"key":"536_CR46","doi-asserted-by":"crossref","unstructured":"OMEGA 2.5.1.4. OpenEye Scientific Software, Santa Fe","DOI":"10.1038\/scientificamerican09261846-5j"},{"key":"536_CR47","unstructured":"QUACPAC 1.6.3.1. OpenEye Scientific Software, Santa Fe"},{"key":"536_CR48","doi-asserted-by":"crossref","unstructured":"Burns J, Spiekermann K, Bhattacharjee H, et al (2023) Machine Learning Validation via Rational Dataset Sampling with astartes","DOI":"10.21105\/joss.05996"},{"key":"536_CR49","unstructured":"TRIPOS Associates Inc (2012) Sybyl-X Molecular Modeling Software Packages"},{"key":"536_CR50","doi-asserted-by":"publisher","DOI":"10.1145\/16562741656280","author":"MR Berthold","year":"2009","unstructured":"Berthold MR, Cebron N, Dill F et al (2009) KNIME\u2014the Konstanz information miner: version 2.0 and beyond. ACM SIGKDD Explor Newsl. https:\/\/doi.org\/10.1145\/16562741656280","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"536_CR51","doi-asserted-by":"publisher","first-page":"1301","DOI":"10.1007\/s11030-021-10261-z","volume":"25","author":"PO Fernandes","year":"2021","unstructured":"Fernandes PO, Martins DM, de Souza BA et al (2021) Molecular insights on ABL kinase activation using tree-based machine learning models and molecular docking. Mol Divers 25:1301\u20131314. https:\/\/doi.org\/10.1007\/s11030-021-10261-z","journal-title":"Mol Divers"},{"key":"536_CR52","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Varoquaux G, Gramfort A et al (2011) Scikit-learn: machine learning in Python. J Mach Learn Res 12:2825\u20132830","journal-title":"J Mach Learn Res"},{"key":"536_CR53","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1038\/s41592-019-0686-2","volume":"17","author":"P Virtanen","year":"2020","unstructured":"Virtanen P, Gommers R, Oliphant TE et al (2020) SciPy 1.0: fundamental algorithms for scientific computing in Python. Nat Methods 17:261\u2013272. https:\/\/doi.org\/10.1038\/s41592-019-0686-2","journal-title":"Nat Methods"},{"key":"536_CR54","doi-asserted-by":"publisher","first-page":"2320","DOI":"10.1021\/ci200211n","volume":"51","author":"N Chirico","year":"2011","unstructured":"Chirico N, Gramatica P (2011) Real external predictivity of QSAR models: how to evaluate it? comparison of different validation criteria and proposal of using the concordance correlation coefficient. J Chem Inf Model 51:2320\u20132335. https:\/\/doi.org\/10.1021\/ci200211n","journal-title":"J Chem Inf Model"},{"key":"536_CR55","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1016\/S1093-3263(01)00123-1","volume":"20","author":"A Golbraikh","year":"2002","unstructured":"Golbraikh A, Tropsha A (2002) Beware of q2! J Mol Graph Model 20:269\u2013276. https:\/\/doi.org\/10.1016\/S1093-3263(01)00123-1","journal-title":"J Mol Graph Model"},{"key":"536_CR56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-17281-1","volume-title":"A primer on QSAR\/QSPR modeling","author":"K Roy","year":"2015","unstructured":"Roy K, Kar S, Das RN (2015) A primer on QSAR\/QSPR modeling. Springer International Publishing, Cham"},{"key":"536_CR57","doi-asserted-by":"publisher","first-page":"186","DOI":"10.1021\/ci000066d","volume":"41","author":"LM Shi","year":"2001","unstructured":"Shi LM, Fang H, Tong W et al (2001) QSAR models using a large diverse set of estrogens. J Chem Inf Comput Sci 41:186\u2013195. https:\/\/doi.org\/10.1021\/ci000066d","journal-title":"J Chem Inf Comput Sci"},{"key":"536_CR58","doi-asserted-by":"publisher","first-page":"1127","DOI":"10.1021\/acs.jcim.6b00088","volume":"56","author":"P Gramatica","year":"2016","unstructured":"Gramatica P, Sangion A (2016) A historical excursus on the statistical validation parameters for QSAR models: a clarification concerning metrics and terminology. J Chem Inf Model 56:1127\u20131131. https:\/\/doi.org\/10.1021\/acs.jcim.6b00088","journal-title":"J Chem Inf Model"},{"key":"536_CR59","doi-asserted-by":"publisher","DOI":"10.1016\/j.comtox.2021.100178","volume":"20","author":"S-Y Bae","year":"2021","unstructured":"Bae S-Y, Lee J, Jeong J et al (2021) Effective data-balancing methods for class-imbalanced genotoxicity datasets using machine learning algorithms and molecular fingerprints. Comput Toxicol 20:100178. https:\/\/doi.org\/10.1016\/j.comtox.2021.100178","journal-title":"Comput Toxicol"},{"key":"536_CR60","doi-asserted-by":"publisher","first-page":"929","DOI":"10.1080\/17460441.2022.2114451","volume":"17","author":"GC Ver\u00edssimo","year":"2022","unstructured":"Ver\u00edssimo GC, Serafim MSM, Kronenberger T et al (2022) Designing drugs when there is low data availability: one-shot learning and other approaches to face the issues of a long-term concern. Expert Opin Drug Discov 17:929\u2013947. https:\/\/doi.org\/10.1080\/17460441.2022.2114451","journal-title":"Expert Opin Drug Discov"},{"key":"536_CR61","doi-asserted-by":"publisher","first-page":"4070","DOI":"10.1021\/acs.jcim.9b00476","volume":"59","author":"P Ambure","year":"2019","unstructured":"Ambure P, Gajewicz-Skretna A, Cordeiro MNDS, Roy K (2019) New workflow for QSAR model development from small data sets: small dataset curator and small dataset modeler. integration of data curation, exhaustive double cross-validation, and a set of optimal model selection techniques. J Chem Inf Model 59:4070\u20134076. https:\/\/doi.org\/10.1021\/acs.jcim.9b00476","journal-title":"J Chem Inf Model"}],"container-title":["Journal of Computer-Aided Molecular Design"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10822-023-00536-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10822-023-00536-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10822-023-00536-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,23]],"date-time":"2023-12-23T09:04:25Z","timestamp":1703322265000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10822-023-00536-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,7]]},"references-count":61,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["536"],"URL":"https:\/\/doi.org\/10.1007\/s10822-023-00536-y","relation":{"has-preprint":[{"id-type":"doi","id":"10.26434\/chemrxiv-2022-dct7l-v2","asserted-by":"object"},{"id-type":"doi","id":"10.26434\/chemrxiv-2022-dct7l-v3","asserted-by":"object"},{"id-type":"doi","id":"10.26434\/chemrxiv-2022-dct7l","asserted-by":"object"}]},"ISSN":["0920-654X","1573-4951"],"issn-type":[{"value":"0920-654X","type":"print"},{"value":"1573-4951","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,7]]},"assertion":[{"value":"6 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests (financial or non-financial) to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}