{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T05:22:51Z","timestamp":1770960171545,"version":"3.50.1"},"reference-count":27,"publisher":"Oxford University Press (OUP)","issue":"6","license":[{"start":{"date-parts":[[2024,6,13]],"date-time":"2024-06-13T00:00:00Z","timestamp":1718236800000},"content-version":"vor","delay-in-days":12,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100004052","name":"King Abdullah University of Science and Technology","doi-asserted-by":"publisher","award":["URF\/1\/4352-01-01"],"award-info":[{"award-number":["URF\/1\/4352-01-01"]}],"id":[{"id":"10.13039\/501100004052","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004052","name":"King Abdullah University of Science and Technology","doi-asserted-by":"publisher","award":["FCC\/1\/1976-44-01"],"award-info":[{"award-number":["FCC\/1\/1976-44-01"]}],"id":[{"id":"10.13039\/501100004052","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004052","name":"King Abdullah University of Science and Technology","doi-asserted-by":"publisher","award":["FCC\/1\/1976-45-01"],"award-info":[{"award-number":["FCC\/1\/1976-45-01"]}],"id":[{"id":"10.13039\/501100004052","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Macrocyclic peptides hold great promise as therapeutics targeting intracellular proteins. This stems from their remarkable ability to bind flat protein surfaces with high affinity and specificity while potentially traversing the cell membrane. Research has already explored their use in developing inhibitors for intracellular proteins, such as KRAS, a well-known driver in various cancers. However, computational approaches for de novo macrocyclic peptide design remain largely unexplored.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>Here, we introduce HELM-GPT, a novel method that combines the strength of the hierarchical editing language for macromolecules (HELM) representation and generative pre-trained transformer (GPT) for de novo macrocyclic peptide design. Through reinforcement learning (RL), our experiments demonstrate that HELM-GPT has the ability to generate valid macrocyclic peptides and optimize their properties. Furthermore, we introduce a contrastive preference loss during the RL process, further enhanced the optimization performance. Finally, to co-optimize peptide permeability and KRAS binding affinity, we propose a step-by-step optimization strategy, demonstrating its effectiveness in generating molecules fulfilling both criteria. In conclusion, the HELM-GPT method can be used to identify novel macrocyclic peptides to target intracellular proteins.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>The code and data of HELM-GPT are freely available on GitHub (https:\/\/github.com\/charlesxu90\/helm-gpt).<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae364","type":"journal-article","created":{"date-parts":[[2024,6,13]],"date-time":"2024-06-13T10:27:24Z","timestamp":1718274444000},"source":"Crossref","is-referenced-by-count":11,"title":["HELM-GPT: <i>de novo<\/i> macrocyclic peptide design using generative pre-trained transformer"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2414-7851","authenticated-orcid":false,"given":"Xiaopeng","family":"Xu","sequence":"first","affiliation":[{"name":"Computer Science Program, Computer, Electrical and Mathematical Science and Engineering (CEMSE), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"},{"name":"Computational Bioscience Research Center, King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"}]},{"given":"Chencheng","family":"Xu","sequence":"additional","affiliation":[{"name":"Computer Science Program, Computer, Electrical and Mathematical Science and Engineering (CEMSE), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"},{"name":"Computational Bioscience Research Center, King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"}]},{"given":"Wenjia","family":"He","sequence":"additional","affiliation":[{"name":"Computer Science Program, Computer, Electrical and Mathematical Science and Engineering (CEMSE), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"},{"name":"Computational Bioscience Research Center, King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"}]},{"given":"Lesong","family":"Wei","sequence":"additional","affiliation":[{"name":"Computer Science Program, Computer, Electrical and Mathematical Science and Engineering (CEMSE), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"},{"name":"Computational Bioscience Research Center, King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3164-8240","authenticated-orcid":false,"given":"Haoyang","family":"Li","sequence":"additional","affiliation":[{"name":"Computer Science Program, Computer, Electrical and Mathematical Science and Engineering (CEMSE), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"},{"name":"Computational Bioscience Research Center, King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6739-6236","authenticated-orcid":false,"given":"Juexiao","family":"Zhou","sequence":"additional","affiliation":[{"name":"Computer Science Program, Computer, Electrical and Mathematical Science and Engineering (CEMSE), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"},{"name":"Computational Bioscience Research Center, King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"}]},{"given":"Ruochi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Syneron Technology , Guangzhou 510000, China"}]},{"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Syneron Technology , Guangzhou 510000, China"}]},{"given":"Yuanpeng","family":"Xiong","sequence":"additional","affiliation":[{"name":"Syneron Technology , Guangzhou 510000, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7108-3574","authenticated-orcid":false,"given":"Xin","family":"Gao","sequence":"additional","affiliation":[{"name":"Computer Science Program, Computer, Electrical and Mathematical Science and Engineering (CEMSE), King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"},{"name":"Computational Bioscience Research Center, King Abdullah University of Science and Technology (KAUST) , Thuwal 23955-6900, Makkah, Kingdom of Saudi Arabia"}]}],"member":"286","published-online":{"date-parts":[[2024,6,12]]},"reference":[{"key":"2024071814113367900_btae364-B1","doi-asserted-by":"crossref","first-page":"3520","DOI":"10.1016\/j.cell.2022.07.019","article-title":"Accurate de novo design of membrane-traversing macrocycles","volume":"185","author":"Bhardwaj","year":"2022","journal-title":"Cell"},{"key":"2024071814113367900_btae364-B2","doi-asserted-by":"crossref","first-page":"5918","DOI":"10.1021\/acs.jcim.0c00915","article-title":"Reinvent 2.0: an ai tool for de novo drug design","volume":"60","author":"Blaschke","year":"2020","journal-title":"J Chem Inf Model"},{"key":"2024071814113367900_btae364-B3","doi-asserted-by":"crossref","first-page":"234","DOI":"10.1016\/j.tips.2021.11.008","article-title":"Targeting intracellular protein\u2013protein interactions with macrocyclic peptides","volume":"43","author":"Buyanova","year":"2022","journal-title":"Trends Pharmacol Sci"},{"key":"2024071814113367900_btae364-B4","doi-asserted-by":"crossref","first-page":"406","DOI":"10.1039\/D0ME00161A","article-title":"Sequence-based peptide identification, generation, and property prediction with deep learning: a review","volume":"6","author":"Chen","year":"2021","journal-title":"Mol Syst Des Eng"},{"key":"2024071814113367900_btae364-B5","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Hejna","year":"2024"},{"key":"2024071814113367900_btae364-B6","doi-asserted-by":"crossref","first-page":"3567","DOI":"10.1039\/C8SC05372C","article-title":"A graph-based genetic algorithm and generative model\/Monte Carlo tree search for the exploration of chemical space","volume":"10","author":"Jensen","year":"2019","journal-title":"Chem Sci"},{"key":"2024071814113367900_btae364-B7","author":"Kawada","year":"2023"},{"key":"2024071814113367900_btae364-B8","first-page":"31","article-title":"Rdkit: a software suite for cheminformatics, computational chemistry, and predictive modeling","volume":"8","author":"Landrum","year":"2013","journal-title":"Greg Landrum"},{"key":"2024071814113367900_btae364-B9","doi-asserted-by":"crossref","first-page":"2240","DOI":"10.1021\/acs.jcim.2c01573","article-title":"Cycpeptmpdb: a comprehensive database of membrane permeability of cyclic peptides","volume":"63","author":"Li","year":"2023","journal-title":"J Chem Inf Model"},{"key":"2024071814113367900_btae364-B10","doi-asserted-by":"crossref","first-page":"D930","DOI":"10.1093\/nar\/gky1075","article-title":"Chembl: towards direct deposition of bioassay data","volume":"47","author":"Mendez","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2024071814113367900_btae364-B11","doi-asserted-by":"crossref","first-page":"2707","DOI":"10.1016\/j.drudis.2021.05.019","article-title":"De novo molecular design and generative models","volume":"26","author":"Meyers","year":"2021","journal-title":"Drug Discov Today"},{"key":"2024071814113367900_btae364-B12","doi-asserted-by":"crossref","first-page":"833","DOI":"10.1080\/17460441.2020.1751117","article-title":"The emerging role of computational design in peptide macrocycle drug discovery","volume":"15","author":"Mulligan","year":"2020","journal-title":"Expert Opin Drug Discov"},{"key":"2024071814113367900_btae364-B13","doi-asserted-by":"crossref","first-page":"e2012800118","DOI":"10.1073\/pnas.2012800118","article-title":"Computationally designed peptide macrocycle inhibitors of new delhi metallo-\u03b2-lactamase 1","volume":"118","author":"Mulligan","year":"2021","journal-title":"Proc Natl Acad Sci USA"},{"key":"2024071814113367900_btae364-B14","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR) Workshop","author":"Neil","year":"2018"},{"key":"2024071814113367900_btae364-B15","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s13321-017-0235-x","article-title":"Molecular de-novo design through deep reinforcement learning","volume":"9","author":"Olivecrona","year":"2017","journal-title":"J Cheminform"},{"key":"2024071814113367900_btae364-B16","doi-asserted-by":"crossref","first-page":"565644","DOI":"10.3389\/fphar.2020.565644","article-title":"Molecular sets (moses): a benchmarking platform for molecular generation models","volume":"11","author":"Polykovskiy","year":"2020","journal-title":"Front Pharmacol"},{"key":"2024071814113367900_btae364-B17","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1007\/978-1-4939-2020-4_3","article-title":"Synthesis and screening of one-bead-one-compound cyclic peptide libraries","volume":"1248","author":"Qian","year":"2015","journal-title":"Peptide Libraries Methods Protoc"},{"key":"2024071814113367900_btae364-B18","article-title":"Improving Language Understanding by Generative Pre-training","author":"Radford","year":"2018","journal-title":"OpenAI Blog"},{"key":"2024071814113367900_btae364-B19","doi-asserted-by":"crossref","first-page":"14073","DOI":"10.1021\/ja063076p","article-title":"Conformational flexibility, internal hydrogen bonding, and passive membrane permeability: successful in silico prediction of the relative permeabilities of cyclic peptides","volume":"128","author":"Rezai","year":"2006","journal-title":"J Am Chem Soc"},{"key":"2024071814113367900_btae364-B20","doi-asserted-by":"crossref","first-page":"2510","DOI":"10.1021\/ja0563455","article-title":"Testing the conformational hypothesis of passive membrane permeability using synthetic cyclic peptide diastereomers","volume":"128","author":"Rezai","year":"2006","journal-title":"J Am Chem Soc"},{"key":"2024071814113367900_btae364-B21","doi-asserted-by":"crossref","first-page":"226","DOI":"10.1016\/j.sbi.2021.11.008","article-title":"Deep generative modeling for protein design","volume":"72","author":"Strokach","year":"2022","journal-title":"Curr Opin Struct Biol"},{"key":"2024071814113367900_btae364-B22","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1021\/ci00057a005","article-title":"Smiles, a chemical language and information system. 1. introduction to methodology and encoding rules","volume":"28","author":"Weininger","year":"1988","journal-title":"J Chem Inf Comput Sci"},{"key":"2024071814113367900_btae364-B23","doi-asserted-by":"crossref","first-page":"1043","DOI":"10.1016\/j.gpb.2023.03.004","article-title":"Ab-gen: antibody library design with generative pre-trained transformer and deep reinforcement learning","volume":"21","author":"Xu","year":"2023","journal-title":"Genomics Proteomics Bioinf"},{"key":"2024071814113367900_btae364-B24","doi-asserted-by":"crossref","DOI":"10.12688\/f1000research.130936.2","article-title":"Optimization of Binding Affinities in Chemical Space with Generative Pretrained Transformer and Deep Reinforcement Learning","volume":"12","author":"Xu","year":"2024","journal-title":"F1000Research"},{"key":"2024071814113367900_btae364-B25","doi-asserted-by":"crossref","first-page":"992171","DOI":"10.3389\/fonc.2022.992171","article-title":"Utilization of macrocyclic peptides to target protein\u2013protein interactions in cancer","volume":"12","author":"Yang","year":"2022","journal-title":"Front Oncol"},{"key":"2024071814113367900_btae364-B26","doi-asserted-by":"crossref","first-page":"1431","DOI":"10.1246\/cl.180665","article-title":"Population-based de novo molecule generation, using grammatical evolution","volume":"47","author":"Yoshikawa","year":"2018","journal-title":"Chem Lett"},{"key":"2024071814113367900_btae364-B27","doi-asserted-by":"crossref","first-page":"2796","DOI":"10.1021\/ci3001925","article-title":"Helm: A Hierarchical Notation Language for Complex Biomolecule Structure Representation","volume":"52","author":"Zhang","journal-title":"J Chem Inf Model"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae364\/58212621\/btae364.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/6\/btae364\/58585524\/btae364.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/6\/btae364\/58585524\/btae364.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,18]],"date-time":"2024-07-18T15:35:35Z","timestamp":1721316935000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae364\/7691994"}},"subtitle":[],"editor":[{"given":"Pier Luigi","family":"Martelli","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":27,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,6,3]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae364","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,6]]},"published":{"date-parts":[[2024,6]]},"article-number":"btae364"}}