{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T04:06:28Z","timestamp":1776917188517,"version":"3.51.2"},"reference-count":85,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T00:00:00Z","timestamp":1762905600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T00:00:00Z","timestamp":1762905600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100019180","name":"HORIZON EUROPE European Research Council","doi-asserted-by":"publisher","award":["ReMINDER, 101077879"],"award-info":[{"award-number":["ReMINDER, 101077879"]}],"id":[{"id":"10.13039\/100019180","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100019180","name":"HORIZON EUROPE European Research Council","doi-asserted-by":"publisher","award":["ReMINDER, 101077879"],"award-info":[{"award-number":["ReMINDER, 101077879"]}],"id":[{"id":"10.13039\/100019180","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Cheminform"],"DOI":"10.1186\/s13321-025-01108-y","type":"journal-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T14:23:22Z","timestamp":1762957402000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["How evaluation choices distort the outcome of generative drug discovery"],"prefix":"10.1186","volume":"17","author":[{"given":"R\u0131za","family":"\u00d6z\u00e7elik","sequence":"first","affiliation":[]},{"given":"Francesca","family":"Grisoni","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,12]]},"reference":[{"issue":"9","key":"1108_CR1","doi-asserted-by":"publisher","first-page":"844","DOI":"10.1001\/jama.2020.1166","volume":"323","author":"OJ Wouters","year":"2020","unstructured":"Wouters OJ, McKee M, Luyten J (2020) Estimated research and development investment needed to bring a new medicine to market, 2009\u20132018. JAMA 323(9):844\u2013853","journal-title":"JAMA"},{"key":"1108_CR2","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/j.jhealeco.2016.01.012","volume":"47","author":"JA DiMasi","year":"2016","unstructured":"DiMasi JA, Grabowski HG, Hansen RW (2016) Innovation in the pharmaceutical industry: new estimates of r &d costs. J Health Econ 47:20\u201333","journal-title":"J Health Econ"},{"issue":"1","key":"1108_CR3","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1002\/(SICI)1098-1128(199601)16:1<3::AID-MED1>3.0.CO;2-6","volume":"16","author":"RS Bohacek","year":"1996","unstructured":"Bohacek RS, McMartin C, Guida WC (1996) The art and practice of structure-based drug design: a molecular modeling perspective. Med Res Rev 16(1):3\u201350","journal-title":"Med Res Rev"},{"issue":"4","key":"1108_CR4","doi-asserted-by":"publisher","first-page":"688","DOI":"10.1016\/j.cell.2020.01.021","volume":"180","author":"JM Stokes","year":"2020","unstructured":"Stokes JM, Yang K, Swanson K, Jin W, Cubillos-Ruiz A, Donghia NM, MacNair CR, French S, Carfrae LA, Bloom-Ackermann Z et al (2020) A deep learning approach to antibiotic discovery. Cell 180(4):688\u2013702","journal-title":"Cell"},{"issue":"11","key":"1108_CR5","doi-asserted-by":"publisher","first-page":"1342","DOI":"10.1038\/s41589-023-01349-8","volume":"19","author":"G Liu","year":"2023","unstructured":"Liu G, Catacutan DB, Rathod K, Swanson K, Jin W, Mohammed JC, Chiappino-Pepe A, Syed SA, Fragis M, Rachwalski K et al (2023) Deep learning-guided discovery of an antibiotic targeting acinetobacter baumannii. Nat Chem Biol 19(11):1342\u20131350","journal-title":"Nat Chem Biol"},{"issue":"7997","key":"1108_CR6","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1038\/s41586-023-06887-8","volume":"626","author":"F Wong","year":"2024","unstructured":"Wong F, Zheng EJ, Valeri JA, Donghia NM, Anahtar MN, Omori S, Li A, Cubillos-Ruiz A, Krishnan A, Jin W et al (2024) Discovery of a structural class of antibiotics with explainable deep learning. Nature 626(7997):177\u2013185","journal-title":"Nature"},{"issue":"2","key":"1108_CR7","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1038\/s42256-022-00448-w","volume":"4","author":"WJ Godinez","year":"2022","unstructured":"Godinez WJ, Ma EJ, Chao AT, Pei L, Skewes-Cox P, Canham SM, Jenkins JL, Young JM, Martin EJ, Guiguemde WA (2022) Design of potent antimalarials with generative chemistry. Nature Mach Intell 4(2):180\u2013186","journal-title":"Nature Mach Intell"},{"issue":"3","key":"1108_CR8","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1039\/D1DD00024A","volume":"1","author":"F Wan","year":"2022","unstructured":"Wan F, Kontogiorgos-Heintz D, de la Fuente-Nunez C (2022) Deep generative models for peptide design. Dig Discov 1(3):195\u2013208","journal-title":"Dig Discov"},{"issue":"1","key":"1108_CR9","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1038\/s41467-022-35692-6","volume":"14","author":"M. Moret","year":"2023","unstructured":"Moret M., Pachon\u00a0Angona I, Cotos L., Yan S, Atz K., Brunner C, Baumgartner M., Grisoni F., Schneider G. (2023) Leveraging molecular structure and bioactivity with chemical language models for de novo drug design. Nat Commun 14(1):114","journal-title":"Nat Commun"},{"issue":"1","key":"1108_CR10","doi-asserted-by":"publisher","first-page":"6891","DOI":"10.1038\/s41467-022-34692-w","volume":"13","author":"Y Li","year":"2022","unstructured":"Li Y, Zhang L, Wang Y, Zou J, Yang R, Luo X, Wu C, Yang W, Tian C, Xu H et al (2022) Generative deep learning enables the discovery of a potent and selective ripk1 inhibitor. Nat Commun 13(1):6891","journal-title":"Nat Commun"},{"issue":"1","key":"1108_CR11","doi-asserted-by":"publisher","first-page":"7946","DOI":"10.1038\/s41467-024-52060-8","volume":"15","author":"L Isigkeit","year":"2024","unstructured":"Isigkeit L, H\u00f6rmann T, Schallmayer E, Scholz K, Lillich FF, Ehrler JH, Hufnagel B, B\u00fcchner J, Marschner JA, Pabel J et al (2024) Automated design of multi-target ligands by generative deep learning. Nat Commun 15(1):7946","journal-title":"Nat Commun"},{"key":"1108_CR12","doi-asserted-by":"crossref","unstructured":"Xia Y, Wu K, Deng P, Liu R, Zhang Y, Guo H, Cui Y, Pei Q, Wu L, Xie S, et\u00a0al. (2024) \u201cTarget-aware molecule generation for drug design using a chemical language model,\u201d bioRxiv, pp 2024\u201301","DOI":"10.1101\/2024.01.08.574635"},{"key":"1108_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00894-021-04674-8","volume":"27","author":"Y Bian","year":"2021","unstructured":"Bian Y, Xie X-Q (2021) Generative chemistry: drug discovery with deep learning generative models. J Mol Model 27:1\u201318","journal-title":"J Mol Model"},{"issue":"6","key":"1108_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.drudis.2024.103992","volume":"29","author":"A Gangwal","year":"2024","unstructured":"Gangwal A, Lavecchia A (2024) Unlocking the potential of generative ai in drug discovery. Drug Discov Today 29(6):103992","journal-title":"Drug Discov Today"},{"issue":"6","key":"1108_CR15","doi-asserted-by":"publisher","first-page":"bbab344","DOI":"10.1093\/bib\/bbab344","volume":"22","author":"Y. Cheng,","year":"2021","unstructured":"Cheng, Y., Gong Y, Liu Y, Song B, Zou Q. (2021) Molecular design in drug discovery a comprehensive review of deep generative models. Brief Bioinform 22(6):bbab344","journal-title":"Brief Bioinform"},{"key":"1108_CR16","volume":"3","author":"A Volkamer","year":"2023","unstructured":"Volkamer A, Riniker S, Nittinger E, Lanini J, Grisoni F, Evertsson E, Rodr\u00edguez-P\u00e9rez R, Schneider N (2023) Machine learning for small molecule drug discovery in academia and industry. Artif Intell Life Sci 3:100056","journal-title":"Artif Intell Life Sci"},{"issue":"8","key":"1108_CR17","doi-asserted-by":"publisher","first-page":"960","DOI":"10.1038\/s41589-024-01679-1","volume":"20","author":"DB Catacutan","year":"2024","unstructured":"Catacutan DB, Alexander J, Arnold A, Stokes JM (2024) Machine learning in preclinical drug discovery. Nat Chem Biol 20(8):960\u2013973","journal-title":"Nat Chem Biol"},{"issue":"4","key":"1108_CR18","doi-asserted-by":"publisher","first-page":"875","DOI":"10.1021\/acs.jcim.6b00754","volume":"57","author":"W Yuan","year":"2017","unstructured":"Yuan W, Jiang D, Nambiar DK, Liew LP, Hay MP, Bloomstein J, Lu P, Turner B, Le Q-T, Tibshirani R et al (2017) Chemical space mimicry for drug discovery. J Chem Inf Model 57(4):875\u2013882","journal-title":"J Chem Inf Model"},{"issue":"1\u20132","key":"1108_CR19","doi-asserted-by":"publisher","first-page":"1700153","DOI":"10.1002\/minf.201700153","volume":"37","author":"D Merk","year":"2018","unstructured":"Merk D, Friedrich L, Grisoni F, Schneider G (2018) De novo design of bioactive small molecules by artificial intelligence. Mol Inf 37(1\u20132):1700153","journal-title":"Mol Inf"},{"issue":"24","key":"1108_CR20","doi-asserted-by":"publisher","first-page":"eabg3338","DOI":"10.1126\/sciadv.abg3338","volume":"7","author":"F Grisoni","year":"2021","unstructured":"Grisoni F, Huisman BJ, Button AL, Moret M, Atz K, Merk D, Schneider G (2021) Combining generative artificial intelligence and on-chip synthesis for de novo drug design. Sci Adv 7(24):eabg3338","journal-title":"Sci Adv"},{"issue":"12","key":"1108_CR21","doi-asserted-by":"publisher","first-page":"8170","DOI":"10.1021\/acs.jmedchem.3c00485","volume":"66","author":"M Ballarotto","year":"2023","unstructured":"Ballarotto M, Willems S, Stiller T, Nawa F, Marschner JA, Grisoni F, Merk D (2023) De novo design of nurr1 agonists via fragment-augmented generative deep learning in low-data regime. J Med Chem 66(12):8170\u20138177","journal-title":"J Med Chem"},{"issue":"6","key":"1108_CR22","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1038\/s42256-024-00843-5","volume":"6","author":"Y Du","year":"2024","unstructured":"Du Y, Jamasb AR, Guo J, Fu T, Harris C, Wang Y, Duan C, Li\u00f2 P, Schwaller P, Blundell TL (2024) Machine learning-aided generative molecular design. Nature Mach Intell 6(6):589\u2013604","journal-title":"Nature Mach Intell"},{"issue":"1","key":"1108_CR23","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1021\/acscentsci.7b00512","volume":"4","author":"MH Segler","year":"2018","unstructured":"Segler MH, Kogej T, Tyrchan C, Waller MP (2018) Generating focused molecule libraries for drug discovery with recurrent neural networks. ACS Cent Sci 4(1):120\u2013131","journal-title":"ACS Cent Sci"},{"issue":"2","key":"1108_CR24","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1021\/acscentsci.7b00572","volume":"4","author":"R G\u00f3mez-Bombarelli","year":"2018","unstructured":"G\u00f3mez-Bombarelli R, Wei JN, Duvenaud D, Hern\u00e1ndez-Lobato JM, S\u00e1nchez-Lengeling B, Sheberla D, Aguilera-Iparraguirre J, Hirzel TD, Adams RP, Aspuru-Guzik A (2018) Automatic chemical design using a data-driven continuous representation of molecules. ACS Cent Sci 4(2):268\u2013276","journal-title":"ACS Cent Sci"},{"issue":"11","key":"1108_CR25","doi-asserted-by":"publisher","first-page":"5343","DOI":"10.1021\/acs.jcim.0c01496","volume":"61","author":"T Sousa","year":"2021","unstructured":"Sousa T, Correia J, Pereira V, Rocha M (2021) Generative deep learning for targeted compound design. J Chem Inf Model 61(11):5343\u20135361","journal-title":"J Chem Inf Model"},{"key":"1108_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.sbi.2023.102527","volume":"79","author":"F Grisoni","year":"2023","unstructured":"Grisoni F (2023) Chemical language models for de novo drug design: Challenges and opportunities. Curr Opin Struct Biol 79:102527","journal-title":"Curr Opin Struct Biol"},{"issue":"1","key":"1108_CR27","doi-asserted-by":"publisher","first-page":"6176","DOI":"10.1038\/s41467-024-50469-9","volume":"15","author":"R \u00d6z\u00e7elik","year":"2024","unstructured":"\u00d6z\u00e7elik R, de Ruiter S, Criscuolo E, Grisoni F (2024) Chemical language modeling with structured state space sequence models. Nat Commun 15(1):6176","journal-title":"Nat Commun"},{"issue":"1\u20132","key":"1108_CR28","doi-asserted-by":"publisher","first-page":"1700111","DOI":"10.1002\/minf.201700111","volume":"37","author":"A Gupta","year":"2018","unstructured":"Gupta A, M\u00fcller AT, Huisman BJ, Fuchs JA, Schneider P, Schneider G (2018) Generative recurrent networks for de novo drug design. Mol Inf 37(1\u20132):1700111","journal-title":"Mol Inf"},{"key":"1108_CR29","doi-asserted-by":"publisher","DOI":"10.3389\/fphar.2020.565644","volume":"11","author":"D Polykovskiy","year":"2020","unstructured":"Polykovskiy D, Zhebrak A, Sanchez-Lengeling B, Golovanov S, Tatanov O, Belyaev S, Kurbanov R, Artamonov A, Aladinskiy V, Veselov M et al (2020) Molecular sets (moses): a benchmarking platform for molecular generation models. Front Pharmacol 11:565644","journal-title":"Front Pharmacol"},{"issue":"3","key":"1108_CR30","doi-asserted-by":"publisher","first-page":"1096","DOI":"10.1021\/acs.jcim.8b00839","volume":"59","author":"N Brown","year":"2019","unstructured":"Brown N, Fiscato M, Segler MH, Vaucher AC (2019) Guacamol: benchmarking models for de novo molecular design. J Chem Inf Model 59(3):1096\u20131108","journal-title":"J Chem Inf Model"},{"key":"1108_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13321-019-0341-z","volume":"11","author":"J Ar\u00fas-Pous","year":"2019","unstructured":"Ar\u00fas-Pous J, Blaschke T, Ulander S, Reymond J-L, Chen H, Engkvist O (2019) Exploring the gdb-13 chemical space using deep generative models. J Cheminform 11:1\u201314","journal-title":"J Cheminform"},{"issue":"1","key":"1108_CR32","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1021\/acs.jcim.4c02232","volume":"65","author":"D Nie","year":"2024","unstructured":"Nie D, Zhao H, Zhang O, Weng G, Zhang H, Jin J, Lin H, Huang Y, Liu L, Li D et al (2024) Durian: a comprehensive benchmark for structure-based 3d molecular generation. J Chem Inf Model 65(1):173\u2013186","journal-title":"J Chem Inf Model"},{"issue":"1","key":"1108_CR33","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1186\/s13321-024-00861-w","volume":"16","author":"M Thomas","year":"2024","unstructured":"Thomas M, O\u2019Boyle NM, Bender A, De Graaf C (2024) Molscore: a scoring, evaluation and benchmarking framework for generative models in de novo drug design. J Cheminform 16(1):64","journal-title":"J Cheminform"},{"key":"1108_CR34","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/j.ddtec.2020.09.003","volume":"32","author":"P Renz","year":"2019","unstructured":"Renz P, Van Rompaey D, Wegner JK, Hochreiter S, Klambauer G (2019) On failure modes in molecule generation and optimization. Drug Discov Today Technol 32:55\u201363","journal-title":"Drug Discov Today Technol"},{"key":"1108_CR35","doi-asserted-by":"crossref","unstructured":"Bender A, Schneider N, Segler M, Patrick Walters W, Engkvist O, Rodrigues T (2022) \u201cEvaluation guidelines for machine learning tools in the chemical sciences,\u201d. Nat Rev Chem 6(6):428\u2013422","DOI":"10.1038\/s41570-022-00391-9"},{"key":"1108_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2022.105403","volume":"145","author":"DD Martinelli","year":"2022","unstructured":"Martinelli DD (2022) Generative machine learning for de novo drug discovery: a systematic review. Comput Biol Med 145:105403","journal-title":"Comput Biol Med"},{"key":"1108_CR37","unstructured":"Lin H, Huang Y, Liu M, Li X, Ji S, Li SZ (2022)\u201cDiffBP: generative Diffusion of 3D Molecules for Target Protein Binding"},{"key":"1108_CR38","unstructured":"Cremer J, Irwin R, Tibot A, Janet JP, Olsson S, Clevert D-A (2025) \u201cFLOWR: flow Matching for Structure-Aware De Novo, Interaction- and Fragment-Based Ligand Generation"},{"key":"1108_CR39","unstructured":"\u201cAn evaluation of unconditional 3D molecular generation methods.\u201d"},{"key":"1108_CR40","unstructured":"Peng X, Guan J, Liu Q, Ma J (2023)\u201cMolDiff: addressing the Atom-Bond Inconsistency Problem in 3D Molecule Diffusion Generation"},{"key":"1108_CR41","unstructured":"\u201cDiGress: Discrete Denoising diffusion for graph generation,\u201d May 2023"},{"key":"1108_CR42","unstructured":"Liu G, Xu J, Luo T, Jiang M (2024) Graph Diffusion Transformers for Multi-Conditional Molecular Generation"},{"key":"1108_CR43","unstructured":"Shi C, Xu M, Zhu Z, Zhang W, Zhang M, Tang J (2020) GraphAF: a Flow-based Autoregressive Model for Molecular Graph Generation"},{"key":"1108_CR44","first-page":"12409","volume":"35","author":"Y Verma","year":"2022","unstructured":"Verma Y, Kaski S, Heinonen M, Garg V (2022) Modular Flows: differential Molecular Generation. Adv Neural Inf Process Syst 35:12409\u201312421","journal-title":"Adv Neural Inf Process Syst"},{"key":"1108_CR45","unstructured":"Luo Y, Yan K, Ji S (2021) GraphDF: a Discrete Flow Model for Molecular Graph Generation. In: Proceedings of the 38th International Conference on Machine Learning. pp 7192\u20137203, PMLR"},{"key":"1108_CR46","doi-asserted-by":"crossref","unstructured":"Zang C, Wang F (2020) MoFlow: an Invertible Flow Model for Generating Molecular Graphs. In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, KDD \u201920, (New York, NY, USA). pp 617\u2013626, Association for Computing Machinery","DOI":"10.1145\/3394486.3403104"},{"issue":"9","key":"1108_CR47","doi-asserted-by":"publisher","first-page":"759","DOI":"10.1038\/s42256-021-00368-1","volume":"3","author":"MA Skinnider","year":"2021","unstructured":"Skinnider MA, Stacey RG, Wishart DS, Foster LJ (2021) Chemical language models enable navigation in sparsely populated chemical space. Nature Mach Intell 3(9):759\u2013770","journal-title":"Nature Mach Intell"},{"issue":"1","key":"1108_CR48","doi-asserted-by":"publisher","first-page":"3293","DOI":"10.1038\/s41467-022-30839-x","volume":"13","author":"D Flam-Shepherd","year":"2022","unstructured":"Flam-Shepherd D, Zhu K, Aspuru-Guzik A (2022) Language models can learn complex molecular distributions. Nat Commun 13(1):3293","journal-title":"Nat Commun"},{"issue":"1","key":"1108_CR49","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1021\/ci00057a005","volume":"28","author":"D Weininger","year":"1988","unstructured":"Weininger D (1988) Smiles, a chemical language and information system. 1. introduction to methodology and encoding rules. J Chem Inf Comput Sci 28(1):31\u201336","journal-title":"J Chem Inf Comput Sci"},{"key":"1108_CR50","doi-asserted-by":"crossref","unstructured":"Krenn M, Ai Q, Barthel S, Carson N, Frei A, Frey NC, Friederich P, Gaudin T, Gayle AA, Jablonka KM et al (2022) Selfies and the future of molecular string representations. Patterns 3(10):2","DOI":"10.1016\/j.patter.2022.100588"},{"issue":"8","key":"1108_CR51","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"issue":"8","key":"1108_CR52","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I et al (2019) Language models are unsupervised multitask learners. OpenAI blog 1(8):9","journal-title":"OpenAI blog"},{"issue":"9","key":"1108_CR53","doi-asserted-by":"publisher","first-page":"2064","DOI":"10.1021\/acs.jcim.1c00600","volume":"62","author":"V Bagal","year":"2021","unstructured":"Bagal V, Aggarwal R, Vinod P, Priyakumar UD (2021) Molgpt: molecular generation using a transformer-decoder model. J Chem Inf Model 62(9):2064\u20132076","journal-title":"J Chem Inf Model"},{"key":"1108_CR54","unstructured":"Bahdanau D, Cho KH, Bengio Y(2015) Neural machine translation by jointly learning to align and translate. In: 3rd International Conference on Learning Representations, ICLR 2015"},{"key":"1108_CR55","unstructured":"Gu A, Goel K, R\u00e9 C (2022) Efficiently modeling long sequences with structured state spaces. In: The International Conference on Learning Representations (ICLR)"},{"issue":"D1","key":"1108_CR56","doi-asserted-by":"publisher","first-page":"D945","DOI":"10.1093\/nar\/gkw1074","volume":"45","author":"A Gaulton","year":"2017","unstructured":"Gaulton A, Hersey A, Nowotka M, Bento AP, Chambers J, Mendez D, Mutowo P, Atkinson F, Bellis LJ, Cibri\u00e1n-Uhalte E et al (2017) The chembl database in 2017. Nucleic Acids Res 45(D1):D945\u2013D954","journal-title":"Nucleic Acids Res"},{"key":"1108_CR57","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s13321-016-0187-6","volume":"9","author":"J Sun","year":"2017","unstructured":"Sun J, Jeliazkova N, Chupakhin V, Golib-Dzib J-F, Engkvist O, Carlsson L, Wegner J, Ceulemans H, Georgiev I, Jeliazkov V et al (2017) Excape-db: an integrated large scale dataset facilitating big data analysis in chemogenomics. J Cheminform 9:1\u20139","journal-title":"J Cheminform"},{"issue":"1","key":"1108_CR58","doi-asserted-by":"publisher","first-page":"25","DOI":"10.2174\/187152706784111551","volume":"5","author":"P. Sokoloff,","year":"2006","unstructured":"Sokoloff, P., Diaz J., Foll B.\u00a0L., Guillin O., Leriche L., Bezard, E., Gross C. (2006) The dopamine d3 receptor: a therapeutic target for the treatment of neuropsychiatric disorders. CNS Neurol Disord Drug Targets CNS Neurol Disord 5(1):25\u201343","journal-title":"CNS Neurol Disord Drug Targets CNS Neurol Disord"},{"issue":"7","key":"1108_CR59","doi-asserted-by":"publisher","first-page":"463","DOI":"10.1038\/nrc.2016.49","volume":"16","author":"XZ Zhou","year":"2016","unstructured":"Zhou XZ, Lu KP (2016) The isomerase pin1 controls numerous cancer-driving pathways and is a unique drug target. Nat Rev Cancer 16(7):463\u2013478","journal-title":"Nat Rev Cancer"},{"issue":"5","key":"1108_CR60","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1038\/nrc3691","volume":"14","author":"D Feldman","year":"2014","unstructured":"Feldman D, Krishnan AV, Swami S, Giovannucci E, Feldman BJ (2014) The role of vitamin d in reducing cancer risk and progression. Nat Rev Cancer 14(5):342\u2013357","journal-title":"Nat Rev Cancer"},{"issue":"16","key":"1108_CR61","doi-asserted-by":"publisher","first-page":"8683","DOI":"10.1021\/acs.jmedchem.9b02147","volume":"63","author":"C Cai","year":"2020","unstructured":"Cai C, Wang S, Xu Y, Zhang W, Tang K, Ouyang Q, Lai L, Pei J (2020) Transfer learning for drug discovery. J Med Chem 63(16):8683\u20138694","journal-title":"J Med Chem"},{"issue":"9","key":"1108_CR62","doi-asserted-by":"publisher","first-page":"1736","DOI":"10.1021\/acs.jcim.8b00234","volume":"58","author":"K Preuer","year":"2018","unstructured":"Preuer K, Renz P, Unterthiner T, Hochreiter S, Klambauer G (2018) Fr\u00e9chet chemnet distance: a metric for generative models for molecules in drug discovery. J Chem Inf Model 58(9):1736\u20131741","journal-title":"J Chem Inf Model"},{"issue":"24","key":"1108_CR63","doi-asserted-by":"publisher","first-page":"5441","DOI":"10.1039\/C8SC00148K","volume":"9","author":"A Mayr","year":"2018","unstructured":"Mayr A, Klambauer G, Unterthiner T, Steijaert M, Wegner JK, Ceulemans H, Clevert D-A, Hochreiter S (2018) Large-scale comparison of machine learning methods for drug target prediction on chembl. Chem Sci 9(24):5441\u20135451","journal-title":"Chem Sci"},{"key":"1108_CR64","first-page":"183","volume":"6","author":"M Fr\u00e9chet","year":"1957","unstructured":"Fr\u00e9chet M (1957) Sur la distance de deux lois de probabilit\u00e9. Ann ISUP 6:183\u2013198","journal-title":"Ann ISUP"},{"key":"1108_CR65","unstructured":"Xie Y, Xu Z, Ma J, Mei Q (2023) How much space has been explored? measuring the chemical space covered by databases and machine-generated molecules. In: The Eleventh International Conference on Learning Representations"},{"issue":"5","key":"1108_CR66","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1021\/ci100050t","volume":"50","author":"D Rogers","year":"2010","unstructured":"Rogers D, Hahn M (2010) Extended-connectivity fingerprints. J Chem Inf Model 50(5):742\u2013754","journal-title":"J Chem Inf Model"},{"key":"1108_CR67","doi-asserted-by":"crossref","unstructured":"Liu F, Mailhot O, Glenn IS, Vigneron SF, Bassim V, Xu X, Fonseca-Valencia K, Smith MS, Radchenko DS, Fraser JS et al (2025) The impact of library size and scale of testing on virtual screening. Nature Chem Biol 1\u20137","DOI":"10.1101\/2024.07.08.602536"},{"issue":"5","key":"1108_CR68","doi-asserted-by":"publisher","first-page":"1199","DOI":"10.1021\/acs.jcim.2c00079","volume":"62","author":"M Moret","year":"2022","unstructured":"Moret M, Grisoni F, Katzberger P, Schneider G (2022) Perplexity-based molecule ranking and bias estimation of chemical language models. J Chem Inf Model 62(5):1199\u20131206","journal-title":"J Chem Inf Model"},{"key":"1108_CR69","unstructured":"Kingma DP, Welling M (2022) Auto-encoding variational bayes"},{"issue":"57","key":"1108_CR70","first-page":"1","volume":"22","author":"G Papamakarios","year":"2021","unstructured":"Papamakarios G, Nalisnick E, Rezende DJ, Mohamed S, Lakshminarayanan B (2021) Normalizing flows for probabilistic modeling and inference. J Mach Learn Res 22(57):1\u201364","journal-title":"J Mach Learn Res"},{"key":"1108_CR71","unstructured":"Rezende D, Mohamed S (2015) Variational inference with normalizing flows. In: International conference on machine learning. pp 1530\u20131538, PMLR"},{"key":"1108_CR72","unstructured":"Goodfellow IJ, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y(2014) Generative adversarial nets. Adv Neural Inform Process Syst. vol.\u00a027"},{"issue":"12","key":"1108_CR73","doi-asserted-by":"publisher","first-page":"8170","DOI":"10.1021\/acs.jmedchem.3c00485","volume":"66","author":"M Ballarotto","year":"2023","unstructured":"Ballarotto M, Willems S, Stiller T, Nawa F, Marschner JA, Grisoni F, Merk D (2023) De novo design of nurr1 agonists via fragment-augmented generative deep learning in low-data regime. J Med Chem 66(12):8170\u20138177","journal-title":"J Med Chem"},{"issue":"4","key":"1108_CR74","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1038\/s42256-024-00821-x","volume":"6","author":"MA Skinnider","year":"2024","unstructured":"Skinnider MA (2024) Invalid smiles are beneficial rather than detrimental to chemical language models. Nature Mach Intell 6(4):437\u2013448","journal-title":"Nature Mach Intell"},{"issue":"15","key":"1108_CR75","doi-asserted-by":"publisher","first-page":"2887","DOI":"10.1021\/jm9602928","volume":"39","author":"GW Bemis","year":"1996","unstructured":"Bemis GW, Murcko MA (1996) The properties of known drugs. 1. molecular frameworks. J Med Chem 39(15):2887\u20132893","journal-title":"J Med Chem"},{"key":"1108_CR76","unstructured":"Huang L, Yu W, Ma W, Zhong W, Feng Z, Wang H, Chen Q, Peng W, Feng X, Qin B (2023) et\u00a0al., A survey on hallucination in large language models: Principles, taxonomy, challenges, and open questions. arXiv preprint arXiv:2311.05232"},{"key":"1108_CR77","unstructured":"Holtzman A, Buys J, Du L, Forbes M, Choi Y(2019) The curious case of neural text degeneration. arXiv:1904.09751"},{"key":"1108_CR78","doi-asserted-by":"crossref","unstructured":"Fan A, Lewis M, Dauphin Y(2018) Hierarchical neural story generation. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Association for Computational Linguistics","DOI":"10.18653\/v1\/P18-1082"},{"key":"1108_CR79","unstructured":"Zhang H, Duckworth D, Ippolito D, Neelakantan A(2020) Trading off diversity and quality in natural language generation. arXiv preprint arXiv:2004.10450"},{"issue":"4","key":"1108_CR80","doi-asserted-by":"publisher","first-page":"796","DOI":"10.1039\/D4DD00019F","volume":"3","author":"E Noutahi","year":"2024","unstructured":"Noutahi E, Gabellini C, Craig M, Lim JS, Tossou P (2024) Gotta be safe: a new framework for molecular design. Dig Discov 3(4):796\u2013804","journal-title":"Dig Discov"},{"issue":"1","key":"1108_CR81","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1038\/s42004-025-01423-3","volume":"8","author":"F Mastrolorito","year":"2025","unstructured":"Mastrolorito F, Ciriaco F, Togo MV, Gambacorta N, Trisciuzzi D, Altomare CD, Amoroso N, Grisoni F, Nicolotti O (2025) fragsmiles as a chemical string notation for advanced fragment and chirality representation. Commun Chem 8(1):26","journal-title":"Commun Chem"},{"issue":"3","key":"1108_CR82","doi-asserted-by":"publisher","first-page":"748","DOI":"10.1039\/D3DD00012E","volume":"2","author":"AH Cheng","year":"2023","unstructured":"Cheng AH, Cai A, Miret S, Malkomes G, Phielipp M, Aspuru-Guzik A (2023) Group selfies: a robust fragment-based molecular string representation. Dig Discov 2(3):748\u2013758","journal-title":"Dig Discov"},{"issue":"7","key":"1108_CR83","doi-asserted-by":"publisher","first-page":"eaap7885","DOI":"10.1126\/sciadv.aap7885","volume":"4","author":"M Popova","year":"2018","unstructured":"Popova M, Isayev O, Tropsha A (2018) Deep reinforcement learning for de novo drug design. Sci Adv 4(7):eaap7885","journal-title":"Sci Adv"},{"issue":"4","key":"1108_CR84","doi-asserted-by":"publisher","DOI":"10.1002\/wcms.1651","volume":"13","author":"C Abate","year":"2023","unstructured":"Abate C, Decherchi S, Cavalli A (2023) Graph neural networks for conditional de novo drug design. Wiley Interdisc Rev Comput Mol Sci 13(4):e1651","journal-title":"Wiley Interdisc Rev Comput Mol Sci"},{"issue":"5","key":"1108_CR85","doi-asserted-by":"publisher","first-page":"868","DOI":"10.1021\/ci990307l","volume":"39","author":"SA Wildman","year":"1999","unstructured":"Wildman SA, Crippen GM (1999) Prediction of physicochemical parameters by atomic contributions. J Chem Inf Comput Sci 39(5):868\u2013873","journal-title":"J Chem Inf Comput Sci"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-025-01108-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s13321-025-01108-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s13321-025-01108-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:03:34Z","timestamp":1762963414000},"score":1,"resource":{"primary":{"URL":"https:\/\/jcheminf.biomedcentral.com\/articles\/10.1186\/s13321-025-01108-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,12]]},"references-count":85,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1108"],"URL":"https:\/\/doi.org\/10.1186\/s13321-025-01108-y","relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,12]]},"assertion":[{"value":"6 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"169"}}