{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T23:49:29Z","timestamp":1775951369515,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2022,9,26]],"date-time":"2022-09-26T00:00:00Z","timestamp":1664150400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,9,26]],"date-time":"2022-09-26T00:00:00Z","timestamp":1664150400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000092","name":"U.S. Department of Health & Human Services | NIH | U.S. National Library of Medicine","doi-asserted-by":"publisher","award":["5T15LM007056-35"],"award-info":[{"award-number":["5T15LM007056-35"]}],"id":[{"id":"10.13039\/100000092","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000057","name":"U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences","doi-asserted-by":"publisher","award":["R01GM135929"],"award-info":[{"award-number":["R01GM135929"]}],"id":[{"id":"10.13039\/100000057","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-022-00532-1","type":"journal-article","created":{"date-parts":[[2022,9,26]],"date-time":"2022-09-26T16:03:44Z","timestamp":1664208224000},"page":"840-851","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":61,"title":["Transformer-based protein generation with regularized latent space optimization"],"prefix":"10.1038","volume":"4","author":[{"given":"Egbert","family":"Castro","sequence":"first","affiliation":[]},{"given":"Abhinav","family":"Godavarthi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2501-3612","authenticated-orcid":false,"given":"Julian","family":"Rubinfien","sequence":"additional","affiliation":[]},{"given":"Kevin","family":"Givechian","sequence":"additional","affiliation":[]},{"given":"Dhananjay","family":"Bhaskar","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5823-1985","authenticated-orcid":false,"given":"Smita","family":"Krishnaswamy","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,26]]},"reference":[{"key":"532_CR1","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1186\/1756-0500-5-85","volume":"5","author":"A Tiessen","year":"2012","unstructured":"Tiessen, A., P\u00e9rez-Rodr\u00edguez, P. & Delaye-Arredondo, L. J. Mathematical modeling and comparison of protein size distribution in different plant, animal, fungal and microbial species reveals a negative correlation between protein size and protein number, thus providing insight into the evolution of proteomes. BMC Res. Notes 5, 85 (2012).","journal-title":"BMC Res. Notes"},{"key":"532_CR2","doi-asserted-by":"publisher","first-page":"1204","DOI":"10.1002\/pro.2897","volume":"25","author":"TN Starr","year":"2016","unstructured":"Starr, T. N. & Thornton, J. W. Epistasis in protein evolution. Protein Sci. 25, 1204\u20131218 (2016).","journal-title":"Protein Sci."},{"key":"532_CR3","doi-asserted-by":"publisher","first-page":"866","DOI":"10.1038\/nrm2805","volume":"10","author":"PA Romero","year":"2009","unstructured":"Romero, P. A. & Arnold, F. H. Exploring protein fitness landscapes by directed evolution. Nat. Rev. Mol. Cell Biol. 10, 866\u2013876 (2009).","journal-title":"Nat. Rev. Mol. Cell Biol."},{"key":"532_CR4","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1038\/s41929-019-0385-5","volume":"3","author":"K Chen","year":"2020","unstructured":"Chen, K. & Arnold, F. H. Engineering new catalytic activities in enzymes. Nat. Catal. 3, 203\u2013213 (2020).","journal-title":"Nat. Catal."},{"key":"532_CR5","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1021\/ar960017f","volume":"31","author":"FH Arnold","year":"1998","unstructured":"Arnold, F. H. Design by directed evolution. Acc. Chem. Res. 31, 125\u2013131 (1998).","journal-title":"Acc. Chem. Res."},{"key":"532_CR6","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/S0076-6879(04)83004-0","volume":"383","author":"CA Rohl","year":"2004","unstructured":"Rohl, C. A., Strauss, C. E. M., Misura, K. M. S. & Baker, D. Protein structure prediction using Rosetta. Methods Enzymol. 383, 66\u201393 (2004).","journal-title":"Methods Enzymol."},{"key":"532_CR7","doi-asserted-by":"publisher","first-page":"e2017228118","DOI":"10.1073\/pnas.2017228118","volume":"118","author":"C Norn","year":"2021","unstructured":"Norn, C. et al. Protein sequence design by conformational landscape optimization. Proc. Natl Acad. Sci. USA 118, e2017228118 (2021).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"532_CR8","unstructured":"Brookes, D. H. & Listgarten, J. Design by adaptive sampling. Preprint at https:\/\/arxiv.org\/abs\/1810.03714 (2018)."},{"key":"532_CR9","first-page":"773","volume":"97","author":"D Brookes","year":"2019","unstructured":"Brookes, D., Park, H. & Listgarten, J. Conditioning by adaptive sampling for robust design. Proceedings of the 36th International Conference on Machine Learning 97, 773\u2013782 (2019).","journal-title":"Proceedings of the 36th International Conference on Machine Learning"},{"key":"532_CR10","doi-asserted-by":"publisher","first-page":"687","DOI":"10.1038\/s41592-019-0496-6","volume":"16","author":"KK Yang","year":"2019","unstructured":"Yang, K. K., Wu, Z. & Arnold, F. H. Machine-learning-guided directed evolution for protein engineering. Nat. Methods 16, 687\u2013694 (2019).","journal-title":"Nat. Methods"},{"key":"532_CR11","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1038\/s41592-021-01100-y","volume":"18","author":"S Biswas","year":"2021","unstructured":"Biswas, S., Khimulya, G., Alley, E. C., Esvelt, K. M. & Church, G. M. Low-n protein engineering with data-efficient deep learning. Nat. Methods 18, 389\u2013396 (2021).","journal-title":"Nat. Methods"},{"key":"532_CR12","unstructured":"Linder, J. & Seelig, G. Fast differentiable DNA and protein sequence optimization for molecular design. Preprint at https:\/\/arxiv.org\/abs\/2005.11275 (2020)."},{"key":"532_CR13","unstructured":"Angermueller, C. et al. Model-based reinforcement learning for biological sequence design. In International Conference on Learning Representations (2019)."},{"key":"532_CR14","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1038\/s41592-019-0598-1","volume":"16","author":"EC Alley","year":"2019","unstructured":"Alley, E. C., Khimulya, G., Biswas, S., AlQuraishi, M. & Church, G. M. Unified rational protein engineering with sequence-based deep representation learning. Nat. Methods 16, 1315\u20131322 (2019).","journal-title":"Nat. Methods"},{"key":"532_CR15","doi-asserted-by":"publisher","first-page":"2126","DOI":"10.1093\/bioinformatics\/btz895","volume":"36","author":"G Liu","year":"2020","unstructured":"Liu, G. et al. Antibody complementarity determining region design using high-capacity machine learning. Bioinformatics 36, 2126\u20132133 (2020).","journal-title":"Bioinformatics"},{"key":"532_CR16","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","volume":"596","author":"J Jumper","year":"2021","unstructured":"Jumper, J. et al. Highly accurate protein structure prediction with AlphaFold. Nature 596, 583\u2013589 (2021).","journal-title":"Nature"},{"key":"532_CR17","doi-asserted-by":"crossref","unstructured":"Rao, R. et al. Evaluating protein transfer learning with TAPE. Adv. Neural Inf. Process. Syst. 32, 9689\u20139701 (2019).","DOI":"10.1101\/676825"},{"key":"532_CR18","doi-asserted-by":"publisher","unstructured":"Rao, R., Ovchinnikov, S., Meier, J., Rives, A. & Sercu, T. Transformer protein language models are unsupervised structure learners. Preprint at bioRxiv https:\/\/doi.org\/10.1101\/2020.12.15.422761 (2020).","DOI":"10.1101\/2020.12.15.422761"},{"key":"532_CR19","doi-asserted-by":"publisher","first-page":"e2016239118","DOI":"10.1073\/pnas.2016239118","volume":"118","author":"A Rives","year":"2021","unstructured":"Rives, A. et al. Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences. Proc. Natl Acad. Sci. USA 118, e2016239118 (2021).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"532_CR20","doi-asserted-by":"crossref","unstructured":"Vig, J. et al. BERTology meets biology: interpreting attention in protein language models. Preprint at https:\/\/arxiv.org\/abs\/2006.15222 (2020).","DOI":"10.1101\/2020.06.26.174417"},{"key":"532_CR21","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1142\/S0218488598000094","volume":"6","author":"S Hochreiter","year":"1998","unstructured":"Hochreiter, S. The vanishing gradient problem during learning recurrent neural nets and problem solutions. Int. J. Uncertain. Fuzziness Knowl.-Based Syst. 6, 107\u2013116 (1998).","journal-title":"Int. J. Uncertain. Fuzziness Knowl.-Based Syst."},{"key":"532_CR22","doi-asserted-by":"publisher","first-page":"1914","DOI":"10.1038\/s41467-022-29443-w","volume":"13","author":"NS Detlefsen","year":"2022","unstructured":"Detlefsen, N. S., Hauberg, S. & Boomsma, W. Learning meaningful representations of protein sequences. Nat. Commun. 13, 1914 (2022).","journal-title":"Nat. Commun."},{"key":"532_CR23","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1021\/acscentsci.7b00572","volume":"4","author":"R G\u00f3mez-Bombarelli","year":"2018","unstructured":"G\u00f3mez-Bombarelli, R. et al. Automatic chemical design using a data-driven continuous representation of molecules. ACS Cent. Sci. 4, 268\u2013276 (2018).","journal-title":"ACS Cent. Sci."},{"key":"532_CR24","doi-asserted-by":"crossref","unstructured":"Castro, E., Benz, A., Tong, A., Wolf, G. & Krishnaswamy, S. Uncovering the folding landscape of RNA secondary structure using deep graph embeddings. 2020 IEEE International Conference on Big Data. 4519\u20134528 (2020).","DOI":"10.1109\/BigData50022.2020.9378305"},{"key":"532_CR25","doi-asserted-by":"publisher","first-page":"397","DOI":"10.1038\/nature17995","volume":"533","author":"KS Sarkisyan","year":"2016","unstructured":"Sarkisyan, K. S. et al. Local fitness landscape of the green fluorescent protein. Nature 533, 397\u2013401 (2016).","journal-title":"Nature"},{"key":"532_CR26","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1002\/pro.3942","volume":"30","author":"CH Rodrigues","year":"2021","unstructured":"Rodrigues, C. H., Pires, D. E. & Ascher, D. B. Dynamut2: assessing changes in stability and flexibility upon single and multiple point missense mutations. Protein Sci. 30, 60\u201369 (2021).","journal-title":"Protein Sci."},{"key":"532_CR27","unstructured":"Vaswani, A. et al. Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)."},{"key":"532_CR28","unstructured":"Yoshida, Y. & Miyato, T. Spectral norm regularization for improving the generalizability of deep learning. Preprint at https:\/\/arxiv.org\/abs\/1705.10941 (2017)."},{"key":"532_CR29","doi-asserted-by":"publisher","first-page":"D412","DOI":"10.1093\/nar\/gkaa913","volume":"49","author":"J Mistry","year":"2021","unstructured":"Mistry, J. et al. Pfam: the protein families database in 2021. Nucleic Acids Res. 49, D412\u2013D419 (2021).","journal-title":"Nucleic Acids Res."},{"key":"532_CR30","doi-asserted-by":"publisher","first-page":"e16965","DOI":"10.7554\/eLife.16965","volume":"5","author":"NC Wu","year":"2016","unstructured":"Wu, N. C., Dai, L., Olson, C. A., Lloyd-Smith, J. O. & Sun, R. Adaptation in protein fitness landscapes is facilitated by indirect paths. eLife 5, e16965 (2016).","journal-title":"eLife"}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00532-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00532-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00532-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,11]],"date-time":"2024-12-11T22:10:55Z","timestamp":1733955055000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00532-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,26]]},"references-count":30,"journal-issue":{"issue":"10","published-online":{"date-parts":[[2022,10]]}},"alternative-id":["532"],"URL":"https:\/\/doi.org\/10.1038\/s42256-022-00532-1","relation":{},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,9,26]]},"assertion":[{"value":"27 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}