{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T15:50:50Z","timestamp":1781884250558,"version":"3.54.5"},"reference-count":97,"publisher":"American Chemical Society (ACS)","issue":"6","license":[{"start":{"date-parts":[[2021,6,8]],"date-time":"2021-06-08T00:00:00Z","timestamp":1623110400000},"content-version":"unspecified","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2021,6,28]]},"DOI":"10.1021\/acs.jcim.1c00160","type":"journal-article","created":{"date-parts":[[2021,6,8]],"date-time":"2021-06-08T13:23:15Z","timestamp":1623158595000},"page":"2623-2640","source":"Crossref","is-referenced-by-count":155,"title":["GHOST: Adjusting the Decision Threshold to Handle Imbalanced Data in Machine Learning"],"prefix":"10.1021","volume":"61","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3232-271X","authenticated-orcid":true,"given":"Carmen","family":"Esposito","sequence":"first","affiliation":[{"name":"Laboratory of Physical Chemistry, ETH Zurich, Vladimir-Prelog-Weg 2, 8093 Zurich, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6279-4481","authenticated-orcid":true,"given":"Gregory A.","family":"Landrum","sequence":"additional","affiliation":[{"name":"Laboratory of Physical Chemistry, ETH Zurich, Vladimir-Prelog-Weg 2, 8093 Zurich, Switzerland"},{"name":"T5 Informatics GmbH, Spalenring 11, 4055 Basel, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5824-2764","authenticated-orcid":true,"given":"Nadine","family":"Schneider","sequence":"additional","affiliation":[{"name":"Novartis Institutes for BioMedical Research, Novartis Pharma AG, Novartis Campus, 4002 Basel, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2562-7080","authenticated-orcid":true,"given":"Nikolaus","family":"Stiefl","sequence":"additional","affiliation":[{"name":"Novartis Institutes for BioMedical Research, Novartis Pharma AG, Novartis Campus, 4002 Basel, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1893-4031","authenticated-orcid":true,"given":"Sereina","family":"Riniker","sequence":"additional","affiliation":[{"name":"Laboratory of Physical Chemistry, ETH Zurich, Vladimir-Prelog-Weg 2, 8093 Zurich, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"316","published-online":{"date-parts":[[2021,6,8]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"publisher","DOI":"10.1145\/1007730.1007734"},{"key":"ref2\/cit2","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2016.12.035"},{"key":"ref3\/cit3","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007601015854"},{"key":"ref4\/cit4","doi-asserted-by":"publisher","DOI":"10.1126\/sciadv.aao1659"},{"key":"ref5\/cit5","doi-asserted-by":"publisher","DOI":"10.1145\/3343440"},{"key":"ref6\/cit6","first-page":"332","volume":"1","author":"Abd Elrahman S. M.","year":"2013","journal-title":"J. Network Innovative Computing"},{"key":"ref7\/cit7","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2013.07.007"},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.3233\/IDA-2002-6504"},{"key":"ref9\/cit9","doi-asserted-by":"publisher","DOI":"10.1021\/ci100050t"},{"key":"ref10\/cit10","doi-asserted-by":"publisher","DOI":"10.5121\/ijdkp.2015.5201"},{"key":"ref11\/cit11","unstructured":"Powers, D. M. W. Evaluation: From Precision, Recall and F-Measure to ROC, Informedness, Markedness and Correlation, 2010.  arXiv:2010.16061, https:\/\/arxiv.org\/abs\/2010.16061 (accessed 2021-06-01)."},{"key":"ref12\/cit12","doi-asserted-by":"publisher","DOI":"10.1142\/S0218001409007326"},{"key":"ref13\/cit13","first-page":"42","volume":"2","author":"Ganganwar V.","year":"2012","journal-title":"International J. Emerging Technology Advanced Engineering"},{"key":"ref14\/cit14","doi-asserted-by":"publisher","DOI":"10.1111\/j.0824-7935.2004.t01-1-00228.x"},{"key":"ref15\/cit15","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2011.06.013"},{"key":"ref16\/cit16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-98074-4_6"},{"key":"ref17\/cit17","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2011.2161285"},{"key":"ref18\/cit18","unstructured":"Elkan, C. Foundations of Cost-Sensitive Learning.  Proceedings of the 17th International Joint Conference on Artificial Intelligence; 2001; 973\u2013978."},{"key":"ref19\/cit19","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.37"},{"key":"ref20\/cit20","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2015.10.031"},{"key":"ref21\/cit21","doi-asserted-by":"publisher","DOI":"10.1007\/s13748-014-0045-6"},{"key":"ref22\/cit22","doi-asserted-by":"publisher","DOI":"10.1007\/s00726-010-0595-2"},{"key":"ref23\/cit23","doi-asserted-by":"publisher","DOI":"10.1109\/ICIS.2016.7550920"},{"key":"ref24\/cit24","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2013.05.059"},{"key":"ref25\/cit25","unstructured":"Sheng, V. S.; Ling, C. X. Thresholding for Making Classifiers Cost-Sensitive.  Proceedings of the 21st National Conference on AI; 2006; pp 476\u2013481."},{"key":"ref26\/cit26","doi-asserted-by":"publisher","DOI":"10.1093\/oxfordjournals.pan.a004868"},{"key":"ref27\/cit27","doi-asserted-by":"publisher","DOI":"10.1016\/j.bdr.2015.12.001"},{"key":"ref28\/cit28","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-6759-9_9"},{"key":"ref29\/cit29","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.9b01162"},{"key":"ref30\/cit30","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.0c00525"},{"key":"ref31\/cit31","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-013-0913-8"},{"key":"ref32\/cit32","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2005.10.010"},{"key":"ref33\/cit33","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2014.12.007"},{"key":"ref34\/cit34","doi-asserted-by":"publisher","DOI":"10.1021\/ci400737s"},{"key":"ref35\/cit35","doi-asserted-by":"publisher","DOI":"10.1021\/ci500190p"},{"key":"ref36\/cit36","doi-asserted-by":"publisher","DOI":"10.1039\/C6GC02744J"},{"key":"ref37\/cit37","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.8b00297"},{"key":"ref38\/cit38","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-019-0383-2"},{"key":"ref39\/cit39","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.9b01162"},{"key":"ref40\/cit40","first-page":"371","volume":"9","author":"Shafer G.","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref41\/cit41","unstructured":"Landrum, G. Working with Unbalanced Data, Part I. 2018. http:\/\/rdkit.blogspot.com\/2018\/11\/working-with-unbalanced-data-part-i.html (accessed 2021-05-20)."},{"key":"ref42\/cit42","doi-asserted-by":"publisher","DOI":"10.1007\/BF00058655"},{"key":"ref43\/cit43","doi-asserted-by":"publisher","DOI":"10.1177\/001316446002000104"},{"key":"ref44\/cit44","doi-asserted-by":"publisher","DOI":"10.1007\/s10822-014-9759-6"},{"key":"ref45\/cit45","doi-asserted-by":"publisher","DOI":"10.2307\/2529310"},{"key":"ref46\/cit46","first-page":"2825","volume":"12","author":"Pedregosa F.","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"ref47\/cit47","doi-asserted-by":"crossref","unstructured":"Macskassy, S.; Provost, F. Confidence Bands for ROC Curves: Methods and an Empirical Study.  Proceedings of the First Workshop on ROC Analysis in AI; 2004.","DOI":"10.21236\/ADA453849"},{"key":"ref48\/cit48","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkw1074"},{"key":"ref49\/cit49","doi-asserted-by":"publisher","DOI":"10.1021\/ci400466r"},{"key":"ref50\/cit50","doi-asserted-by":"publisher","DOI":"10.1186\/1758-2946-5-26"},{"key":"ref51\/cit51","doi-asserted-by":"publisher","DOI":"10.1021\/ci3001277"},{"key":"ref52\/cit52","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.5b00559"},{"key":"ref53\/cit53","unstructured":"DrugMatrix. https:\/\/ntp.niehs.nih.gov\/data\/drugmatrix\/ (accessed 2021-05-20)."},{"key":"ref54\/cit54","doi-asserted-by":"publisher","DOI":"10.1021\/ci400084k"},{"key":"ref55\/cit55","unstructured":"Landrum, G. RDKit: Open-Source Cheminformatics, Version 2020.03.1. https:\/\/www.rdkit.org (accessed 2021-06-01)."},{"key":"ref56\/cit56","unstructured":"Kelley, B. DescriptaStorus. https:\/\/github.com\/bp-kelley\/descriptastorus (accessed 2021-06-01)."},{"key":"ref57\/cit57","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939785"},{"key":"ref58\/cit58","doi-asserted-by":"publisher","DOI":"10.1613\/jair.953"},{"key":"ref59\/cit59","unstructured":"Chen, C.; Liaw, A.; Breiman, L. Using Random Forest to Learn Imbalanced Data. 2004; Vol. 110, p 24."},{"key":"ref60\/cit60","first-page":"1","volume":"18","author":"Lema\u00eetre G.","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref61\/cit61","doi-asserted-by":"publisher","DOI":"10.1016\/j.xphs.2020.09.055"},{"key":"ref62\/cit62","doi-asserted-by":"publisher","DOI":"10.1016\/j.jmgm.2017.01.008"},{"key":"ref63\/cit63","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36755-1_29"},{"key":"ref64\/cit64","unstructured":"Papadopoulos, H.; Vovk, V.; Gammerman, A. Qualified Prediction for Large Data Sets in the Case of Pattern Recognition.  ICMLA; 2002; pp 159\u2013163."},{"key":"ref65\/cit65","unstructured":"Lindsay, D.; Nouretdinov, I.; Gammerman, A. Mondrian Confidence Machine; Technical Report; 2003."},{"key":"ref66\/cit66","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44722-2_25"},{"key":"ref67\/cit67","unstructured":"KnowTox\nNotebook. https:\/\/github.com\/volkamerlab\/knowtox_manuscript_SI (accessed 2021-06-01)."},{"key":"ref68\/cit68","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-020-00422-x"},{"key":"ref69\/cit69","unstructured":"Linusson, H. nonconformist. https:\/\/github.com\/donlnz\/nonconformist (accessed 2021-06-01)."},{"key":"ref70\/cit70","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4380-9_16"},{"key":"ref71\/cit71","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-019-0686-2"},{"key":"ref72\/cit72","first-page":"3","volume":"8","author":"Bonferroni C.","year":"1936","journal-title":"Pubblicazioni del R Istituto Superiore di Scienze Economiche e Commericiali di Firenze"},{"key":"ref73\/cit73","doi-asserted-by":"crossref","unstructured":"Perlich, C. Encyclopedia of Machine Learning; Springer: US, 2011; pp 577\u2013580.","DOI":"10.1007\/978-0-387-30164-8_446"},{"key":"ref74\/cit74","doi-asserted-by":"publisher","DOI":"10.1007\/s11280-012-0178-0"},{"key":"ref75\/cit75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-27400-3_11"},{"key":"ref76\/cit76","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-020-00468-x"},{"key":"ref77\/cit77","doi-asserted-by":"publisher","DOI":"10.1109\/34.75512"},{"key":"ref78\/cit78","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.187"},{"key":"ref79\/cit79","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24694-7_32"},{"key":"ref80\/cit80","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-13059-5_22"},{"key":"ref81\/cit81","unstructured":"Holte, R. C.; Acker, L.; Porter, B. W. Concept Learning and the Problem of Small Disjuncts.  Proceedings of the 11th IJCAI; 1989; pp 813\u2013818."},{"key":"ref82\/cit82","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-1280-0_9"},{"key":"ref83\/cit83","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-13529-3_18"},{"key":"ref84\/cit84","volume-title":"Deep Learning for the Life Sciences: Applying Deep Learning to Genomics, Microscopy, Drug Discovery, and More","author":"Ramsundar B.","year":"2019"},{"key":"ref85\/cit85","doi-asserted-by":"publisher","DOI":"10.1039\/C7SC02664A"},{"key":"ref86\/cit86","unstructured":"Tox21\nChallenge. http:\/\/tripod.nih.gov\/tox21\/challenge\/ (accessed 2021-05-20)."},{"key":"ref87\/cit87","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkr1132"},{"key":"ref88\/cit88","unstructured":"Ramsundar, B.; Kearnes, S.; Riley, P.; Webster, D.; Konerding, D.; Pande, V. Massively Multitask Networks for Drug Discovery. 2015,  arXiv preprint arXiv:1502.02072. https:\/\/arxiv.org\/abs\/1502.02072 (accessed 2021-06-01)."},{"key":"ref89\/cit89","doi-asserted-by":"publisher","DOI":"10.1021\/ci8002649"},{"key":"ref90\/cit90","unstructured":"Drummond, C.; Holte, R. C. C4.5, Class Imbalance, and Cost Sensitivity: Why Under-sampling Beats Over-sampling.  Workshop on Learning from Imbalanced Datasets II; 2003; pp 1\u20138."},{"key":"ref91\/cit91","doi-asserted-by":"publisher","DOI":"10.1145\/312129.312220"},{"key":"ref92\/cit92","doi-asserted-by":"publisher","DOI":"10.1145\/1007730.1007735"},{"key":"ref93\/cit93","unstructured":"Wang, B. X.; Japkowicz, N. Imbalanced Data Set Learning with Synthetic Samples.  Proceedings of the IRIS Machine Learning Workshop; 2004"},{"key":"ref94\/cit94","doi-asserted-by":"publisher","DOI":"10.1007\/b106715"},{"key":"ref95\/cit95","doi-asserted-by":"publisher","DOI":"10.1021\/ci5001168"},{"key":"ref96\/cit96","doi-asserted-by":"publisher","DOI":"10.1007\/s10472-013-9378-2"},{"key":"ref97\/cit97","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.7b00159"}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.1c00160","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.1c00160","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T04:33:31Z","timestamp":1682483611000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.1c00160"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,8]]},"references-count":97,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2021,6,28]]}},"alternative-id":["10.1021\/acs.jcim.1c00160"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.1c00160","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,8]]}}}