{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T22:46:27Z","timestamp":1777502787558,"version":"3.51.4"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T00:00:00Z","timestamp":1736985600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T00:00:00Z","timestamp":1736985600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100000057","name":"National Institute of General Medical Sciences","doi-asserted-by":"publisher","award":["T32GM135122"],"award-info":[{"award-number":["T32GM135122"]}],"id":[{"id":"10.13039\/100000057","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000057","name":"National Institute of General Medical Sciences","doi-asserted-by":"publisher","award":["T32GM08633"],"award-info":[{"award-number":["T32GM08633"]}],"id":[{"id":"10.13039\/100000057","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Intramural research program of the NCATS"},{"name":"NIH","award":["R01GM140154"],"award-info":[{"award-number":["R01GM140154"]}]},{"name":"NSF","award":["DMS2344256"],"award-info":[{"award-number":["DMS2344256"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"DOI":"10.1186\/s13321-025-00948-y","type":"journal-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T17:24:41Z","timestamp":1737048281000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["One size does not fit all: revising traditional paradigms for assessing accuracy of QSAR models used for virtual screening"],"prefix":"10.1186","volume":"17","author":[{"given":"James","family":"Wellnitz","sequence":"first","affiliation":[]},{"given":"Sankalp","family":"Jain","sequence":"additional","affiliation":[]},{"given":"Joshua E.","family":"Hochuli","sequence":"additional","affiliation":[]},{"given":"Travis","family":"Maxfield","sequence":"additional","affiliation":[]},{"given":"Eugene N.","family":"Muratov","sequence":"additional","affiliation":[]},{"given":"Alexander","family":"Tropsha","sequence":"additional","affiliation":[]},{"given":"Alexey V.","family":"Zakharov","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,16]]},"reference":[{"key":"948_CR1","doi-asserted-by":"publisher","first-page":"3525","DOI":"10.1039\/D0CS00098A","volume":"49","author":"EN Muratov","year":"2020","unstructured":"Muratov EN, Bajorath J, Sheridan RP et al (2020) QSAR without borders. Chem Soc Rev 49:3525\u20133564. https:\/\/doi.org\/10.1039\/D0CS00098A","journal-title":"Chem Soc Rev"},{"key":"948_CR2","doi-asserted-by":"publisher","first-page":"6007","DOI":"10.1021\/acs.jcim.0c00884","volume":"60","author":"VB Siramshetty","year":"2020","unstructured":"Siramshetty VB, Nguyen D-T, Martinez NJ et al (2020) Critical assessment of artificial intelligence methods for prediction of hERG channel inhibition in the \u201cBig Data\u201d Era. J Chem Inf Model 60:6007\u20136019. https:\/\/doi.org\/10.1021\/acs.jcim.0c00884","journal-title":"J Chem Inf Model"},{"key":"948_CR3","doi-asserted-by":"publisher","first-page":"653","DOI":"10.1021\/acs.jcim.0c01164","volume":"61","author":"S Jain","year":"2021","unstructured":"Jain S, Siramshetty VB, Alves VM et al (2021) Large-scale modeling of multispecies acute toxicity end points using consensus of multitask deep learning methods. J Chem Inf Model 61:653\u2013663. https:\/\/doi.org\/10.1021\/acs.jcim.0c01164","journal-title":"J Chem Inf Model"},{"key":"948_CR4","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1002\/minf.201000061","volume":"29","author":"A Tropsha","year":"2010","unstructured":"Tropsha A (2010) Best practices for QSAR model development, validation, and exploitation. Mol Inform 29:476\u2013488. https:\/\/doi.org\/10.1002\/minf.201000061","journal-title":"Mol Inform"},{"key":"948_CR5","first-page":"1","volume-title":"Handbook of computational chemistry","author":"A Golbraikh","year":"2016","unstructured":"Golbraikh A, Wang XS, Zhu H, Tropsha A (2016) Predictive QSAR modeling: methods and applications in drug discovery and chemical risk assessment. In: Leszczynski J (ed) Handbook of computational chemistry. Springer, Netherlands, pp 1\u201348"},{"key":"948_CR6","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1023\/A:1025386326946","volume":"17","author":"A Golbraikh","year":"2003","unstructured":"Golbraikh A, Shen M, Xiao Z et al (2003) Rational selection of training and test sets for the development of validated QSAR models. J Comput Aided Mol Des 17:241\u2013253. https:\/\/doi.org\/10.1023\/A:1025386326946","journal-title":"J Comput Aided Mol Des"},{"key":"948_CR7","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/978-1-4939-7899-1_6","volume-title":"Computational toxicology: methods and protocols","author":"S Kar","year":"2018","unstructured":"Kar S, Roy K, Leszczynski J (2018) Applicability domain: a step toward confident predictions and decidability for QSAR modeling. In: Nicolotti O (ed) Computational toxicology: methods and protocols. Springer, New York, pp 141\u2013169"},{"key":"948_CR8","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1016\/j.jmgm.2017.01.008","volume":"72","author":"U Norinder","year":"2017","unstructured":"Norinder U, Boyer S (2017) Binary classification of imbalanced datasets using conformal prediction. J Mol Graph Model 72:256\u2013265. https:\/\/doi.org\/10.1016\/j.jmgm.2017.01.008","journal-title":"J Mol Graph Model"},{"key":"948_CR9","doi-asserted-by":"publisher","first-page":"3213","DOI":"10.1021\/acs.jcim.0c01439","volume":"61","author":"O Casanova-Alvarez","year":"2021","unstructured":"Casanova-Alvarez O, Morales-Helguera A, Cabrera-P\u00e9rez M\u00c1 et al (2021) A novel automated framework for QSAR modeling of highly imbalanced leishmania high-throughput screening data. J Chem Inf Model 61:3213\u20133231. https:\/\/doi.org\/10.1021\/acs.jcim.0c01439","journal-title":"J Chem Inf Model"},{"key":"948_CR10","unstructured":"(2008) Chemoinformatics approaches to virtual screening. The Royal Society of Chemistry"},{"key":"948_CR11","doi-asserted-by":"publisher","first-page":"D1100","DOI":"10.1093\/nar\/gkr777","volume":"40","author":"A Gaulton","year":"2012","unstructured":"Gaulton A, Bellis LJ, Bento AP et al (2012) ChEMBL: a large-scale bioactivity database for drug discovery. Nucleic Acids Res 40:D1100\u2013D1107. https:\/\/doi.org\/10.1093\/nar\/gkr777","journal-title":"Nucleic Acids Res"},{"key":"948_CR12","doi-asserted-by":"publisher","first-page":"D1373","DOI":"10.1093\/nar\/gkac956","volume":"51","author":"S Kim","year":"2023","unstructured":"Kim S, Chen J, Cheng T et al (2023) PubChem 2023 update. Nucleic Acids Res 51:D1373\u2013D1380. https:\/\/doi.org\/10.1093\/nar\/gkac956","journal-title":"Nucleic Acids Res"},{"key":"948_CR13","doi-asserted-by":"publisher","first-page":"667","DOI":"10.1038\/s41589-022-01233-x","volume":"19","author":"A Cherkasov","year":"2023","unstructured":"Cherkasov A (2023) The \u2018Big Bang\u2019 of the chemical universe. Nat Chem Biol 19:667\u2013668. https:\/\/doi.org\/10.1038\/s41589-022-01233-x","journal-title":"Nat Chem Biol"},{"key":"948_CR14","unstructured":"eMolecules Explore. http:\/\/www.emolecules.com\/explore. Accessed 5 Mar 2023"},{"key":"948_CR15","unstructured":"REAL Space-Enamine. https:\/\/enamine.net\/compound-collections\/real-compounds\/real-space-navigator. Accessed 5 Mar 2023"},{"key":"948_CR16","doi-asserted-by":"publisher","first-page":"4799","DOI":"10.1038\/s41596-021-00597-z","volume":"16","author":"BJ Bender","year":"2021","unstructured":"Bender BJ, Gahbauer S, Luttens A et al (2021) A practical guide to large-scale docking. Nat Protoc 16:4799\u20134832. https:\/\/doi.org\/10.1038\/s41596-021-00597-z","journal-title":"Nat Protoc"},{"key":"948_CR17","doi-asserted-by":"crossref","unstructured":"Neves BJ, Braga RC, Melo-Filho CC et al (2018) QSAR-based virtual screening: advances and applications in drug discovery. Front Pharmacol 9","DOI":"10.3389\/fphar.2018.01275"},{"key":"948_CR18","doi-asserted-by":"publisher","first-page":"468","DOI":"10.1021\/acsptsci.2c00049","volume":"5","author":"JE Hochuli","year":"2022","unstructured":"Hochuli JE, Jain S, Melo-Filho C et al (2022) Allosteric binders of ACE2 are promising anti-SARS-CoV-2 agents. ACS Pharmacol Transl Sci 5:468\u2013478. https:\/\/doi.org\/10.1021\/acsptsci.2c00049","journal-title":"ACS Pharmacol Transl Sci"},{"key":"948_CR19","doi-asserted-by":"publisher","first-page":"1675","DOI":"10.1021\/acsptsci.1c00176","volume":"4","author":"S Jain","year":"2021","unstructured":"Jain S, Talley DC, Baljinnyam B et al (2021) Hybrid in silico approach reveals novel inhibitors of multiple SARS-CoV-2 variants. ACS Pharmacol Transl Sci 4:1675\u20131688. https:\/\/doi.org\/10.1021\/acsptsci.1c00176","journal-title":"ACS Pharmacol Transl Sci"},{"key":"948_CR20","doi-asserted-by":"publisher","first-page":"31365","DOI":"10.1073\/pnas.2005463117","volume":"117","author":"RPM Abrams","year":"2020","unstructured":"Abrams RPM, Yasgar A, Teramoto T et al (2020) Therapeutic candidates for the Zika virus identified by a high-throughput screen for Zika protease inhibitors. Proc Natl Acad Sci 117:31365\u201331375. https:\/\/doi.org\/10.1073\/pnas.2005463117","journal-title":"Proc Natl Acad Sci"},{"key":"948_CR21","doi-asserted-by":"publisher","first-page":"7828","DOI":"10.3390\/ijms21217828","volume":"21","author":"J Spiegel","year":"2020","unstructured":"Spiegel J, Senderowitz H (2020) Evaluation of QSAR equations for virtual screening. Int J Mol Sci 21:7828. https:\/\/doi.org\/10.3390\/ijms21217828","journal-title":"Int J Mol Sci"},{"key":"948_CR22","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1186\/s13321-021-00519-x","volume":"13","author":"M Matveieva","year":"2021","unstructured":"Matveieva M, Polishchuk P (2021) Benchmarks for interpretation of QSAR models. J Cheminformatics 13:41. https:\/\/doi.org\/10.1186\/s13321-021-00519-x","journal-title":"J Cheminformatics"},{"key":"948_CR23","doi-asserted-by":"publisher","first-page":"488","DOI":"10.1021\/ci600426e","volume":"47","author":"J-F Truchon","year":"2007","unstructured":"Truchon J-F, Bayly CI (2007) Evaluating virtual screening methods: good and bad metrics for the \u201cEarly Recognition\u201d Problem. J Chem Inf Model 47:488\u2013508. https:\/\/doi.org\/10.1021\/ci600426e","journal-title":"J Chem Inf Model"},{"key":"948_CR24","doi-asserted-by":"publisher","first-page":"705","DOI":"10.1021\/ci400737s","volume":"54","author":"AV Zakharov","year":"2014","unstructured":"Zakharov AV, Peach ML, Sitzmann M, Nicklaus MC (2014) QSAR modeling of imbalanced high-throughput screening data in PubChem. J Chem Inf Model 54:705\u2013712. https:\/\/doi.org\/10.1021\/ci400737s","journal-title":"J Chem Inf Model"},{"key":"948_CR25","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla NV, Bowyer KW, Hall LO, Kegelmeyer WP (2002) SMOTE: synthetic minority over-sampling technique. J Artif Intell Res 16:321\u2013357. https:\/\/doi.org\/10.1613\/jair.953","journal-title":"J Artif Intell Res"},{"key":"948_CR26","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1007\/s42979-020-00156-5","volume":"1","author":"C Kumari","year":"2020","unstructured":"Kumari C, Abulaish M, Subbarao N (2020) Using SMOTE to deal with class-imbalance problem in bioactivity data to predict mTOR inhibitors. SN Comput Sci 1:150. https:\/\/doi.org\/10.1007\/s42979-020-00156-5","journal-title":"SN Comput Sci"},{"key":"948_CR27","doi-asserted-by":"publisher","first-page":"5957","DOI":"10.1021\/acs.jcim.0c00565","volume":"60","author":"EL C\u00e1ceres","year":"2020","unstructured":"C\u00e1ceres EL, Mew NC, Keiser MJ (2020) Adding stochastic negative examples into machine learning improves molecular bioactivity prediction. J Chem Inf Model 60:5957\u20135970. https:\/\/doi.org\/10.1021\/acs.jcim.0c00565","journal-title":"J Chem Inf Model"},{"key":"948_CR28","doi-asserted-by":"publisher","first-page":"P41","DOI":"10.1186\/1758-2946-2-S1-P41","volume":"2","author":"I Sushko","year":"2010","unstructured":"Sushko I, Novotarskyi S, Pandey A et al (2010) Applicability domain for classification problems. J Cheminformatics 2:P41. https:\/\/doi.org\/10.1186\/1758-2946-2-S1-P41","journal-title":"J Cheminformatics"},{"key":"948_CR29","doi-asserted-by":"publisher","first-page":"1395","DOI":"10.1021\/ci0100144","volume":"41","author":"RP Sheridan","year":"2001","unstructured":"Sheridan RP, Singh SB, Fluder EM, Kearsley SK (2001) Protocols for bridging the peptide to nonpeptide gap in topological similarity searches. J Chem Inf Comput Sci 41:1395\u20131406. https:\/\/doi.org\/10.1021\/ci0100144","journal-title":"J Chem Inf Comput Sci"},{"key":"948_CR30","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to information retrieval","author":"CD Manning","year":"2008","unstructured":"Manning CD, Raghavan P, Sch\u00fctze H (2008) Introduction to information retrieval, illustrated edn. Cambridge University Press, New York","edition":"illustrated"},{"key":"948_CR31","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1016\/S1093-3263(00)00061-9","volume":"18","author":"SJ Edgar","year":"2000","unstructured":"Edgar SJ, Holliday JD, Willett P (2000) Effectiveness of retrieval in similarity searches of chemical databases: a review of performance measures. J Mol Graph Model 18:343\u2013357. https:\/\/doi.org\/10.1016\/S1093-3263(00)00061-9","journal-title":"J Mol Graph Model"},{"key":"948_CR32","doi-asserted-by":"crossref","unstructured":"Gupta D, Loane R, Gayen S, Demner-Fushman D (2022) Medical image retrieval via nearest neighbor search on pre-trained image features","DOI":"10.2139\/ssrn.4240384"},{"key":"948_CR33","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1038\/sj.tpj.6500084","volume":"2","author":"PM Nadkarni","year":"2002","unstructured":"Nadkarni PM (2002) An introduction to information retrieval: applications in genomics. Pharmacogenomics J 2:96\u2013102. https:\/\/doi.org\/10.1038\/sj.tpj.6500084","journal-title":"Pharmacogenomics J"},{"key":"948_CR34","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1016\/S0306-4573(01)00033-4","volume":"38","author":"D Byrd","year":"2002","unstructured":"Byrd D, Crawford T (2002) Problems of music information retrieval in the real world. Inf Process Manag 38:249\u2013272. https:\/\/doi.org\/10.1016\/S0306-4573(01)00033-4","journal-title":"Inf Process Manag"},{"key":"948_CR35","first-page":"45","volume-title":"Machine learning for evolution strategies","author":"O Kramer","year":"2016","unstructured":"Kramer O (2016) Scikit-Learn. Machine learning for evolution strategies. Springer International Publishing, Cham, pp 45\u201353"},{"key":"948_CR36","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1021\/ci100050t","volume":"50","author":"D Rogers","year":"2010","unstructured":"Rogers D, Hahn M (2010) Extended-connectivity fingerprints. J Chem Inf Model 50:742\u2013754. https:\/\/doi.org\/10.1021\/ci100050t","journal-title":"J Chem Inf Model"},{"key":"948_CR37","unstructured":"Landrum G, Tosco P, Kelley B et al (2023) rdkit\/rdkit: 2023_03_1 (Q1 2023) Release"},{"key":"948_CR38","doi-asserted-by":"crossref","unstructured":"Chen T, Guestrin C (2016) XGBoost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. pp 785\u2013794","DOI":"10.1145\/2939672.2939785"},{"key":"948_CR39","unstructured":"Keras: Deep Learning for humans. https:\/\/keras.io\/. Accessed 22 Dec 2024"},{"key":"948_CR40","doi-asserted-by":"publisher","first-page":"4387","DOI":"10.1021\/acs.jcim.4c00412","volume":"64","author":"J Wellnitz","year":"2024","unstructured":"Wellnitz J, Martin H-J, Anwar Hossain M et al (2024) STOPLIGHT: a hit scoring calculator. J Chem Inf Model 64:4387\u20134391. https:\/\/doi.org\/10.1021\/acs.jcim.4c00412","journal-title":"J Chem Inf Model"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-025-00948-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s13321-025-00948-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-025-00948-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T17:24:49Z","timestamp":1737048289000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-025-00948-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,16]]},"references-count":40,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["948"],"URL":"https:\/\/doi.org\/10.1186\/s13321-025-00948-y","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,16]]},"assertion":[{"value":"15 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"AT and ENM are co-founders of Predictive, LLC, which develops novel alternative methods and software for toxicity prediction. All other authors declare they have nothing to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"7"}}