{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T06:17:52Z","timestamp":1780985872252,"version":"3.54.1"},"reference-count":100,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2022,6,22]],"date-time":"2022-06-22T00:00:00Z","timestamp":1655856000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,6,22]],"date-time":"2022-06-22T00:00:00Z","timestamp":1655856000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-022-00499-z","type":"journal-article","created":{"date-parts":[[2022,6,22]],"date-time":"2022-06-22T16:37:07Z","timestamp":1655915827000},"page":"521-532","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":182,"title":["Controllable protein design with language models"],"prefix":"10.1038","volume":"4","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4172-8201","authenticated-orcid":false,"given":"Noelia","family":"Ferruz","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8250-9462","authenticated-orcid":false,"given":"Birte","family":"H\u00f6cker","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,6,22]]},"reference":[{"key":"499_CR1","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1016\/j.cbpa.2018.07.022","volume":"47","author":"H Lechner","year":"2018","unstructured":"Lechner, H., Ferruz, N. & H\u00f6cker, B. Strategies for designing non-natural enzymes and binders. Curr. Opin. Chem. Biol. 47, 67\u201376 (2018).","journal-title":"Curr. Opin. Chem. Biol."},{"key":"499_CR2","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.sbi.2016.03.006","volume":"39","author":"P Gainza","year":"2016","unstructured":"Gainza, P., Nisonoff, H. M. & Donald, B. R. Algorithms for protein design. Curr. Opin. Struct. Biol. 39, 16\u201326 (2016).","journal-title":"Curr. Opin. Struct. Biol."},{"key":"499_CR3","doi-asserted-by":"publisher","first-page":"681","DOI":"10.1038\/s41580-019-0163-x","volume":"20","author":"B Kuhlman","year":"2019","unstructured":"Kuhlman, B. & Bradley, P. Advances in protein structure prediction and design. Nat. Rev. Mol. Cell Biol. 20, 681\u2013697 (2019).","journal-title":"Nat. Rev. Mol. Cell Biol."},{"key":"499_CR4","doi-asserted-by":"publisher","first-page":"3898","DOI":"10.1016\/j.jmb.2020.04.013","volume":"432","author":"N Ferruz","year":"2020","unstructured":"Ferruz, N. et al. Identification and analysis of natural building blocks for evolution-guided fragment-based protein design. J. Mol. Biol. 432, 3898\u20133914 (2020).","journal-title":"J. Mol. Biol."},{"key":"499_CR5","doi-asserted-by":"publisher","first-page":"869","DOI":"10.1038\/nature01025","volume":"418","author":"E W","year":"2002","unstructured":"W, E. et al. Molecular evolution of FOXP2, a gene involved in speech and language. Nature 418, 869\u2013872 (2002).","journal-title":"Nature"},{"key":"499_CR6","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1038\/nature09014","volume":"465","author":"DL Theobald","year":"2010","unstructured":"Theobald, D. L. A formal test of the theory of universal common ancestry. Nature 465, 219\u2013222 (2010).","journal-title":"Nature"},{"key":"499_CR7","doi-asserted-by":"publisher","first-page":"2157","DOI":"10.1158\/1078-0432.CCR-14-2821","volume":"21","author":"S Arena","year":"2015","unstructured":"Arena, S. et al. Emergence of multiple EGFR extracellular mutations during cetuximab treatment in colorectal cancer. Clin. Cancer Res. 21, 2157\u20132166 (2015).","journal-title":"Clin. Cancer Res."},{"key":"499_CR8","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1016\/S0959-440X(97)80061-9","volume":"7","author":"Y Lindqvist","year":"1997","unstructured":"Lindqvist, Y. & Schneider, G. Circular permutations of natural protein sequences: structural evidence. Curr. Opin. Struct. Biol. 7, 422\u2013427 (1997).","journal-title":"Curr. Opin. Struct. Biol."},{"key":"499_CR9","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1038\/nchembio.1966","volume":"12","author":"PS Huang","year":"2016","unstructured":"Huang, P. S. et al. De novo design of a four-fold symmetric TIM-barrel protein with atomic-level accuracy. Nat. Chem. Biol. 12, 29\u201334 (2016).","journal-title":"Nat. Chem. Biol."},{"key":"499_CR10","doi-asserted-by":"publisher","first-page":"702","DOI":"10.3389\/fpsyg.2016.00702","volume":"7","author":"MR Freeman","year":"2016","unstructured":"Freeman, M. R., Blumenfeld, H. K. & Marian, V. Phonotactic constraints are activated across languages in bilinguals. Front. Psychol. 7, 702 (2016).","journal-title":"Front. Psychol."},{"key":"499_CR11","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1002\/prot.340180402","volume":"18","author":"U G\u00f6bel","year":"1994","unstructured":"G\u00f6bel, U., Sander, C., Schneider, R. & Valencia, A. Correlated mutations and residue contacts in proteins. Proteins Struct. Funct. Bioinformatics 18, 309\u2013317 (1994).","journal-title":"Proteins Struct. Funct. Bioinformatics"},{"key":"499_CR12","unstructured":"Rao, R. M. et al. MSA Transformer. In Proc. 38th International Conference on Machine Learning Vol. 139, 8844\u20138856 https:\/\/proceedings.mlr.press\/v139\/rao21a.html (MLR, 2021)."},{"key":"499_CR13","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","volume":"596","author":"J Jumper","year":"2021","unstructured":"Jumper, J. et al. Highly accurate protein structure prediction with AlphaFold. Nature 596, 583\u2013589 (2021).","journal-title":"Nature"},{"key":"499_CR14","doi-asserted-by":"crossref","unstructured":"Nguyen, K. A., Im Walde, S. S. & Vu, N. T. Distinguishing antonyms and synonyms in a pattern-based neural network. In Proc. 15th Conference of the European Chapter of the Association for Computational Linguistics, EACL 2017 Vol. 1, 76\u201385 (Association for Computational Linguistics, 2017).","DOI":"10.18653\/v1\/E17-1008"},{"key":"499_CR15","doi-asserted-by":"crossref","unstructured":"Young, T., Hazarika, D., Poria, S., and Cambria, E. Recent Trends in Deep Learning Based Natural Language Processing\" in IEEE Computational Intelligence Magazine, Vol. 13, no. 3, 55-75, (2018).","DOI":"10.1109\/MCI.2018.2840738"},{"key":"499_CR16","doi-asserted-by":"publisher","unstructured":"Zhou, G. & Su, J. Named entity recognition using an HMM-based chunk tagger. In Proc. 40th Annual Meeting of the Association for Computational Linguistics, ACL \u201902 473\u2013480 https:\/\/doi.org\/10.3115\/1073083.1073163 (Association for Computational Linguistics, 2001).","DOI":"10.3115\/1073083.1073163"},{"key":"499_CR17","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1002\/prot.10369","volume":"51","author":"R Karchin","year":"2003","unstructured":"Karchin, R., Cline, M., Mandel-Gutfreund, Y. & Karplus, K. Hidden Markov models that use predicted local structure for fold recognition: alphabets of backbone geometry. Proteins Struct. Funct. Genet. 51, 504\u2013514 (2003).","journal-title":"Proteins Struct. Funct. Genet."},{"key":"499_CR18","doi-asserted-by":"publisher","unstructured":"Yakhnenko, O., Silvescu, A. & Honavar, V. Discriminatively trained Markov model for sequence classification. In Proc. IEEE International Conference on Data Mining, ICDM 498\u2013505 https:\/\/doi.org\/10.1109\/ICDM.2005.52 (IEEE, 2005).","DOI":"10.1109\/ICDM.2005.52"},{"key":"499_CR19","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1186\/1471-2105-10-202","volume":"10","author":"AN Nguyen Ba","year":"2009","unstructured":"Nguyen Ba, A. N., Pogoutse, A., Provart, N. & Moses, A. M. NLStradamus: a simple hidden Markov model for nuclear localization signal prediction. BMC Bioinformatics 10, 202 (2009).","journal-title":"BMC Bioinformatics"},{"key":"499_CR20","doi-asserted-by":"publisher","first-page":"951","DOI":"10.1093\/bioinformatics\/bti125","volume":"21","author":"J S\u00f6ding","year":"2005","unstructured":"S\u00f6ding, J. Protein homology detection by HMM-HMM comparison. Bioinformatics 21, 951\u2013960 (2005).","journal-title":"Bioinformatics"},{"key":"499_CR21","first-page":"1137","volume":"3","author":"Y Bengio","year":"2003","unstructured":"Bengio, Y. et al. A neural probabilistic language model. J. Mach. Learn. Res. 3, 1137\u20131155 (2003).","journal-title":"J. Mach. Learn. Res."},{"key":"499_CR22","unstructured":"Mikolov, T., Chen, K., Corrado, G. & Dean, J. Efficient estimation of word representations in vector space. In Proc. 1st International Conference on Learning Representations, ICLR 2013 (ICLR, 2013)."},{"key":"499_CR23","unstructured":"Mikolov, T., Chen, K., Corrado, G. & Dean, J. Distributed representations of words and phrases and their compositionality. In Proc. 26th International Conference on Neural Information Processing Systems Vol. 2, 3111\u20133119 (ACM, 2013)."},{"key":"499_CR24","unstructured":"Mikolov, T., Yih, W.-T. & Zweig, G. Linguistic Regularities in Continuous Space Word Representations http:\/\/research.microsoft.com\/en- (Microsoft, 2013)."},{"key":"499_CR25","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1109\/MCI.2018.2840738","volume":"13","author":"T Young","year":"2017","unstructured":"Young, T., Hazarika, D., Poria, S. & Cambria, E. Recent trends in deep learning based natural language processing. IEEE Comput. Intell. Mag. 13, 55\u201375 (2017).","journal-title":"IEEE Comput. Intell. Mag."},{"key":"499_CR26","doi-asserted-by":"publisher","first-page":"2642","DOI":"10.1093\/bioinformatics\/bty178","volume":"34","author":"KK Yang","year":"2018","unstructured":"Yang, K. K., Wu, Z., Bedbrook, C. N. & Arnold, F. H. Learned protein embeddings for machine learning. Bioinformatics 34, 2642\u20132648 (2018).","journal-title":"Bioinformatics"},{"key":"499_CR27","doi-asserted-by":"publisher","first-page":"e0141287","DOI":"10.1371\/journal.pone.0141287","volume":"10","author":"E Asgari","year":"2015","unstructured":"Asgari, E. & Mofrad, M. R. K. Continuous distributed representation of biological sequences for deep proteomics and genomics. PLoS ONE 10, e0141287 (2015).","journal-title":"PLoS ONE"},{"key":"499_CR28","doi-asserted-by":"publisher","unstructured":"Collobert, R. & Weston, J. A unified architecture for natural language processing. In Proc. 25th International Conference on Machine learning, ICML \u201908 160\u2013167 https:\/\/doi.org\/10.1145\/1390156.1390177 (ACM, 2008).","DOI":"10.1145\/1390156.1390177"},{"key":"499_CR29","doi-asserted-by":"publisher","first-page":"17315","DOI":"10.3390\/ijms160817315","volume":"16","author":"S Wang","year":"2015","unstructured":"Wang, S., Weng, S., Ma, J. & Tang, Q. DeepCNF-D: predicting protein order\/disorder regions by weighted deep convolutional neural fields. Int. J. Mol. Sci. 16, 17315\u201317330 (2015).","journal-title":"Int. J. Mol. Sci."},{"key":"499_CR30","doi-asserted-by":"publisher","first-page":"i121","DOI":"10.1093\/bioinformatics\/btw255","volume":"32","author":"H Zeng","year":"2016","unstructured":"Zeng, H., Edwards, M. D., Liu, G. & Gifford, D. K. Convolutional neural network architectures for predicting DNA-protein binding. Bioinformatics 32, i121\u2013i127 (2016).","journal-title":"Bioinformatics"},{"key":"499_CR31","doi-asserted-by":"publisher","first-page":"1295","DOI":"10.1093\/bioinformatics\/btx780","volume":"34","author":"J Hou","year":"2018","unstructured":"Hou, J., Adhikari, B. & Cheng, J. DeepSF: deep convolutional neural network for mapping protein sequences to folds. Bioinformatics 34, 1295\u20131303 (2018).","journal-title":"Bioinformatics"},{"key":"499_CR32","doi-asserted-by":"crossref","unstructured":"Mikolov, T. et al. Recurrent neural network based language model. In Proc. 11th Annual Conference of the International Speech Communication Association 1048\u20131048 (ISCA, 2010).","DOI":"10.21437\/Interspeech.2010-343"},{"key":"499_CR33","doi-asserted-by":"crossref","unstructured":"Lample, G., Ballesteros, M., Subramanian, S., Kawakami, K. & Dyer, C. Neural architectures for named entity recognition. In Proc. 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies 260\u2013270 (Association for Computational Linguistics, 2016).","DOI":"10.18653\/v1\/N16-1030"},{"key":"499_CR34","unstructured":"Bahdanau, D., Cho, K. H. & Bengio, Y. Neural machine translation by jointly learning to align and translate. In Proc. 3rd International Conference on Learning Representations, ICLR 2015 (ICLR, 2015)."},{"key":"499_CR35","unstructured":"Radford, A., Jozefowicz, R. & Sutskever, I. Learning to generate reviews and discovering sentiment. Preprint at https:\/\/arxiv.org\/abs\/1704.01444 (2017)."},{"key":"499_CR36","doi-asserted-by":"publisher","unstructured":"Krause, B., Murray, I., Renals, S. & Lu, L. Multiplicative LSTM for sequence modelling. In Proc. 5th International Conference on Learning Representations, ICLR 2017 https:\/\/doi.org\/10.48550\/arxiv.1609.07959 (ICLR, 2016).","DOI":"10.48550\/arxiv.1609.07959"},{"key":"499_CR37","doi-asserted-by":"publisher","first-page":"1315","DOI":"10.1038\/s41592-019-0598-1","volume":"16","author":"EC Alley","year":"2019","unstructured":"Alley, E. C., Khimulya, G., Biswas, S., AlQuraishi, M. & Church, G. M. Unified rational protein engineering with sequence-based deep representation learning. Nat. Methods 16, 1315\u20131322 (2019).","journal-title":"Nat. Methods"},{"key":"499_CR38","doi-asserted-by":"publisher","first-page":"706","DOI":"10.1038\/s41586-019-1923-7","volume":"577","author":"AW Senior","year":"2020","unstructured":"Senior, A. W. et al. Improved protein structure prediction using potentials from deep learning. Nature 577, 706\u2013710 (2020).","journal-title":"Nature"},{"key":"499_CR39","unstructured":"Vaswani, A. et al. Transformer: attention is all you need. In Advances in Neural Information Processing Systems Vol. 2017, 5999\u20136009 (NIPS, 2017)."},{"key":"499_CR40","unstructured":"Radford, A. & Narasimhan, K. Improving language understanding by generative pre-training; https:\/\/openai.com\/blog\/language-unsupervised\/ (2018)."},{"key":"499_CR41","unstructured":"Radford, A. et al. Language Models are Unsupervised Multitask Learners (GitHub); https:\/\/github.com\/codelucas\/newspaper"},{"key":"499_CR42","unstructured":"Brown, T. B. et al. Language models are few-shot learners. Preprint at https:\/\/arxiv.org\/abs\/2005.14165 (2020)."},{"key":"499_CR43","unstructured":"Mak, A. When is technology too dangerous to release to the public? Slate https:\/\/slate.com\/technology\/2019\/02\/openai-gpt2-text-generating-algorithm-ai-dangerous.html (22 February 2019)."},{"key":"499_CR44","unstructured":"Devlin, J., Chang, M.-W., Lee, K. & Toutanova, K. BERT: pre-training of deep bidirectional transformers for language understanding. In Proc. 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies Vol. 1 4171\u20134186 (Association for Computational Linguistics, 2018)."},{"key":"499_CR45","doi-asserted-by":"crossref","unstructured":"Wang, A. & Cho, K. BERT has a mouth, and it must speak: BERT as a Markov random field language model. In Proc. Workshop on Methods for Optimizing and Evaluating Neural Language Generation 30\u201336 (ACL, 2019).","DOI":"10.18653\/v1\/W19-2304"},{"key":"499_CR46","doi-asserted-by":"crossref","unstructured":"Sun, C., Qiu, X., Xu, Y. & Huang, X. in Lecture Notes in Computer Science Vol. 11856, 194\u2013206 (Springer, 2019).","DOI":"10.1007\/978-3-030-32381-3_16"},{"key":"499_CR47","unstructured":"Wolf, T. et al. ransformers: state-of-the-art natural language processing. in Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations. 38-45 (2020)."},{"key":"499_CR48","unstructured":"Total Data Volume Worldwide 2010\u20132025 (Statista); https:\/\/www.statista.com\/statistics\/871513\/worldwide-data-created\/"},{"key":"499_CR49","doi-asserted-by":"publisher","first-page":"3636","DOI":"10.1073\/pnas.1814684116","volume":"116","author":"L Yu","year":"2019","unstructured":"Yu, L. et al. Grammar of protein domain architectures. Proc. Natl Acad. Sci. USA 116, 3636\u20133645 (2019).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"499_CR50","doi-asserted-by":"publisher","first-page":"e2016239118","DOI":"10.1073\/pnas.2016239118","volume":"118","author":"A Rives","year":"2021","unstructured":"Rives, A. et al. Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences. Proc. Natl Acad. Sci. USA 118, e2016239118 (2021).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"499_CR51","first-page":"9689","volume":"32","author":"R Rao","year":"2019","unstructured":"Rao, R. et al. Evaluating protein transfer learning with TAPE. Adv. Neural Inf. Process. Syst 32, 9689\u20139701 (2019).","journal-title":"Adv. Neural Inf. Process. Syst"},{"key":"499_CR52","doi-asserted-by":"crossref","unstructured":"Elnaggar, A. et al. ProtTrans: towards cracking the language of life\u2019s code through self-supervised learning. IEEE Transactions on Pattern Analysis and Machine Intelligence. 1-1 (2019).","DOI":"10.1109\/TPAMI.2021.3095381"},{"key":"499_CR53","doi-asserted-by":"publisher","unstructured":"Ferruz, N. & H\u00f6cker, B. Dreaming ideal protein structures. Nat. Biotechnol. https:\/\/doi.org\/10.1038\/s41587-021-01196-9 (2022).","DOI":"10.1038\/s41587-021-01196-9"},{"key":"499_CR54","unstructured":"Mordvintsev, A. DeepDream\u2014a code example for visualizing neural networks. Google Research Blog https:\/\/web.archive.org\/web\/20150708233542\/http:\/\/googleresearch.blogspot.co.uk\/2015\/07\/deepdream-code-example-for-visualizing.html"},{"key":"499_CR55","doi-asserted-by":"publisher","first-page":"547","DOI":"10.1038\/s41586-021-04184-w","volume":"600","author":"I Anishchenko","year":"2021","unstructured":"Anishchenko, I. et al. De novo protein design by deep network hallucination. Nature 600, 547\u2013552 (2021).","journal-title":"Nature"},{"key":"499_CR56","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1038\/s41586-021-04383-5","volume":"602","author":"B Huang","year":"2022","unstructured":"Huang, B. et al. A backbone-centred energy function of neural networks for protein design. Nature 602, 523\u2013528 (2022).","journal-title":"Nature"},{"key":"499_CR57","unstructured":"Castro, E. et al. Guided generative protein design using regularized transformers. Preprint at https:\/\/arxiv.org\/abs\/2201.09948 (2022)."},{"key":"499_CR58","doi-asserted-by":"crossref","unstructured":"Moffat, L., Kandathil, S. M. & Jones, D. T. Design in the DARK: learning deep generative models for de novo protein design. Preprint at https:\/\/www.biorxiv.org\/content\/10.1101\/2022.01.27.478087v1 (2022).","DOI":"10.1101\/2022.01.27.478087"},{"key":"499_CR59","doi-asserted-by":"crossref","unstructured":"Ferruz, N., Schmidt, S. & H\u00f6cker, B. A deep unsupervised language model for protein design. Preprint at https:\/\/www.biorxiv.org\/content\/10.1101\/2022.03.09.483666v1 (2022).","DOI":"10.1101\/2022.03.09.483666"},{"key":"499_CR60","doi-asserted-by":"publisher","first-page":"101983","DOI":"10.1016\/j.wpi.2020.101983","volume":"62","author":"JS Lee","year":"2020","unstructured":"Lee, J. S. & Hsiang, J. Patent claim generation by fine-tuning OpenAI GPT-2. World Pat. Inf. 62, 101983 (2020).","journal-title":"World Pat. Inf."},{"key":"499_CR61","doi-asserted-by":"crossref","unstructured":"Gligorijevi\u0107, V. et al. Function-guided protein design by deep manifold sampling. in Neural Information Processing Systems (NeurIPS, 2021).","DOI":"10.1101\/2021.12.22.473759"},{"key":"499_CR62","unstructured":"Keskar, N. S., McCann, B., Varshney, L. R., Xiong, C. & Socher, R. CTRL: a conditional transformer language model for controllable generation. Preprint at https:\/\/arxiv.org\/abs\/1909.05858 (2019)."},{"key":"499_CR63","doi-asserted-by":"crossref","unstructured":"Madani, A. et al. ProGen: language modeling for protein generation. Preprint at https:\/\/www.biorxiv.org\/content\/10.1101\/2020.03.07.982272v2 (2020).","DOI":"10.1101\/2020.03.07.982272"},{"key":"499_CR64","doi-asserted-by":"crossref","unstructured":"Madani, A. et al. Deep neural language modeling enables functional protein generation across families. Preprint at https:\/\/www.biorxiv.org\/content\/10.1101\/2021.07.18.452833v1 (2021).","DOI":"10.1101\/2021.07.18.452833"},{"key":"499_CR65","doi-asserted-by":"publisher","first-page":"e1009446","DOI":"10.1371\/journal.pcbi.1009446","volume":"17","author":"E Rembeza","year":"2021","unstructured":"Rembeza, E. & Engqvist, M. K. M. Experimental and computational investigation of enzyme functional annotations uncovers misannotation in the EC 1.1.3.15 enzyme class. PLoS Comput. Biol. 17, e1009446 (2021).","journal-title":"PLoS Comput. Biol."},{"key":"499_CR66","doi-asserted-by":"publisher","first-page":"D330","DOI":"10.1093\/nar\/gkv1324","volume":"44","author":"YC Chang","year":"2016","unstructured":"Chang, Y. C. et al. COMBREX-DB: an experiment centered database of protein function: knowledge, predictions and knowledge gaps. Nucleic Acids Res. 44, D330\u2013D335 (2016).","journal-title":"Nucleic Acids Res."},{"key":"499_CR67","doi-asserted-by":"publisher","unstructured":"Bileschi, M. L. et al. Using deep learning to annotate the protein universe. Nat. Biotechnol. https:\/\/doi.org\/10.1038\/s41587-021-01179-w (2022).","DOI":"10.1038\/s41587-021-01179-w"},{"key":"499_CR68","doi-asserted-by":"publisher","first-page":"6091","DOI":"10.1039\/C8SC02339E","volume":"9","author":"P Schwaller","year":"2018","unstructured":"Schwaller, P., Gaudin, T., L\u00e1nyi, D., Bekas, C. & Laino, T. \u2018Found in Translation\u2019: predicting outcomes of complex organic chemistry reactions using neural sequence-to-sequence models. Chem. Sci. 9, 6091\u20136098 (2018).","journal-title":"Chem. Sci."},{"key":"499_CR69","doi-asserted-by":"publisher","first-page":"3316","DOI":"10.1039\/C9SC05704H","volume":"11","author":"P Schwaller","year":"2020","unstructured":"Schwaller, P. et al. Predicting retrosynthetic pathways using transformer-based models and a hyper-graph exploration strategy. Chem. Sci. 11, 3316\u20133325 (2020).","journal-title":"Chem. Sci."},{"key":"499_CR70","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1038\/s41598-020-79682-4","volume":"11","author":"D Grechishnikova","year":"2021","unstructured":"Grechishnikova, D. Transformer neural network for protein-specific de novo drug generation as a machine translation problem. Sci. Rep. 11, 321 (2021).","journal-title":"Sci. Rep."},{"key":"499_CR71","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1038\/s42256-020-00284-w","volume":"3","author":"P Schwaller","year":"2021","unstructured":"Schwaller, P. et al. Mapping the space of chemical reactions using attention-based neural networks. Nat. Mach. Intell. 3, 144\u2013152 (2021).","journal-title":"Nat. Mach. Intell."},{"key":"499_CR72","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1038\/nature11600","volume":"491","author":"N Koga","year":"2012","unstructured":"Koga, N. et al. Principles for designing ideal protein structures. Nature 491, 222\u2013227 (2012).","journal-title":"Nature"},{"key":"499_CR73","doi-asserted-by":"publisher","first-page":"573","DOI":"10.1038\/s42256-020-00236-4","volume":"2","author":"J Jim\u00e9nez-Luna","year":"2020","unstructured":"Jim\u00e9nez-Luna, J., Grisoni, F. & Schneider, G. Drug discovery with explainable artificial intelligence. Nat. Mach. Intell. 2, 573\u2013584 (2020).","journal-title":"Nat. Mach. Intell."},{"key":"499_CR74","unstructured":"Danilevsky, M. et al. A survey of the state of explainable AI for natural language processing. In Proc. 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing 447\u2013459 (Association for Computational Linguistics, 2020)."},{"key":"499_CR75","doi-asserted-by":"publisher","unstructured":"Hoover, B., Strobelt, H. & Gehrmann, S. exBERT: a visual analysis tool to explore learned representations in transformer models. In Proc 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations 187\u2013196 (Association for Computational Linguistics, 2019); https:\/\/doi.org\/10.18653\/v1\/2020.acl-demos.22","DOI":"10.18653\/v1\/2020.acl-demos.22"},{"key":"499_CR76","unstructured":"OpenAI\u2019s massive GPT-3 model is impressive, but size isn\u2019t everything. VentureBeat https:\/\/venturebeat.com\/2020\/06\/01\/ai-Junemachine-learning-openai-gpt-3-size-isnt-everything\/ (1 June 2020)."},{"key":"499_CR77","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1038\/s42256-020-0219-9","volume":"2","author":"P Dhar","year":"2020","unstructured":"Dhar, P. The carbon impact of artificial intelligence. Nat. Mach. Intell. 2, 423\u2013425 (2020).","journal-title":"Nat. Mach. Intell."},{"key":"499_CR78","unstructured":"Li, Z. et al. Train large, then compress: rethinking model size for efficient training and inference of Transformers. In Proc. 37th International Conference on Machine Learning ICML 2020 5914\u20135924 (ICML, 2020)."},{"key":"499_CR79","unstructured":"AI and Compute; https:\/\/openai.com\/blog\/ai-and-compute\/"},{"key":"499_CR80","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1145\/1364782.1364802","volume":"51","author":"DE Shaw","year":"2008","unstructured":"Shaw, D. E. et al. Anton, a special-purpose machine for molecular dynamics simulation. Commun. ACM 51, 91\u201397 (2008).","journal-title":"Commun. ACM"},{"key":"499_CR81","doi-asserted-by":"publisher","first-page":"10184","DOI":"10.1073\/pnas.1103547108","volume":"108","author":"I Buch","year":"2011","unstructured":"Buch, I., Giorgino, T. & De Fabritiis, G. Complete reconstruction of an enzyme-inhibitor binding process by molecular dynamics simulations. Proc. Natl Acad. Sci. USA 108, 10184\u201310189 (2011).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"499_CR82","doi-asserted-by":"publisher","first-page":"2200","DOI":"10.1021\/acs.jcim.5b00453","volume":"55","author":"N Ferruz","year":"2015","unstructured":"Ferruz, N., Harvey, M. J., Mestres, J. & De Fabritiis, G. Insights from fragment hit binding assays by molecular simulations. J. Chem. Inf. Model. 55, 2200\u20132205 (2015).","journal-title":"J. Chem. Inf. Model."},{"key":"499_CR83","doi-asserted-by":"publisher","first-page":"687","DOI":"10.1038\/s41592-019-0496-6","volume":"16","author":"KK Yang","year":"2019","unstructured":"Yang, K. K., Wu, Z. & Arnold, F. H. Machine-learning-guided directed evolution for protein engineering. Nat. Methods 16, 687\u2013694 (2019).","journal-title":"Nat. Methods"},{"key":"499_CR84","unstructured":"Chu, S. K. S. & Siegel, J. Predicting single-point mutational effect on protein stability. In Proc. 35th Conference on Neural Information Processing Systems (NIPS, 2021)."},{"key":"499_CR85","doi-asserted-by":"crossref","unstructured":"Hsu, C., Nisonoff, H., Fannjiang, C. & Listgarten, J. Combining evolutionary and assay-labelled data for protein fitness prediction. Preprint at https:\/\/www.biorxiv.org\/content\/10.1101\/2021.03.28.437402v1 (2021).","DOI":"10.1101\/2021.03.28.437402"},{"key":"499_CR86","doi-asserted-by":"publisher","first-page":"10900","DOI":"10.1073\/pnas.1707171114","volume":"114","author":"D Baran","year":"2017","unstructured":"Baran, D. et al. Principles for computational design of binding antibodies. Proc. Natl Acad. Sci. USA 114, 10900\u201310905 (2017).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"499_CR87","doi-asserted-by":"crossref","unstructured":"Dai, Z. et al. Transformer-XL: attentive language models beyond a fixed-length context. In Proc. 57th Annual Meeting for the Association for Computational Linguistics 2978\u20132988 (ACL, 2019).","DOI":"10.18653\/v1\/P19-1285"},{"key":"499_CR88","first-page":"7057","volume":"32","author":"G Lample","year":"2019","unstructured":"Lample, G. & Conneau, A. Cross-lingual language model pretraining. Adv. Neural Inf. Process. Syst. 32, 7057\u20137067 (2019).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"499_CR89","unstructured":"Yang, Z. et al. XLNet: Generalized autoregressive pretraining for language understanding. In Proc. 33rd International Conference on Neural Information Processing Systems Vol. 517, 5753\u20135763 (ACM, 2019)."},{"key":"499_CR90","unstructured":"Liu, Y. et al. RoBERTa: a robustly optimized BERT pretraining approach. Preprint at https:\/\/arxiv.org\/abs\/1907.11692 (2019)."},{"key":"499_CR91","unstructured":"Shoeybi, M. et al. Megatron-LM: training multi-billion parameter language models using model parallelism. Preprint at https:\/\/arxiv.org\/abs\/1909.08053 (2019)."},{"key":"499_CR92","unstructured":"Lan, Z. et al. ALBERT: a lite BERT for self-supervised learning of language representations. Preprint at https:\/\/arxiv.org\/abs\/1909.11942 (2019)."},{"key":"499_CR93","unstructured":"Sanh, V., Debut, L., Chaumond, J. & Wolf, T. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. Preprint at https:\/\/arxiv.org\/abs\/1910.01108 (2019)."},{"key":"499_CR94","unstructured":"Gao, L. et al. The Pile: an 800-GB dataset of diverse text for language modeling. Preprint at https:\/\/arxiv.org\/abs\/2101.00027 (2020)."},{"key":"499_CR95","doi-asserted-by":"publisher","unstructured":"Rasley, J., Rajbhandari, S., Ruwase, O. & He, Y. DeepSpeed: system optimizations enable training deep learning models with over 100 billion parameters. In Proc. 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining 3505\u20133506 https:\/\/doi.org\/10.1145\/3394486.3406703 (ACM, 2020).","DOI":"10.1145\/3394486.3406703"},{"key":"499_CR96","unstructured":"Clark, K., Luong, M.-T., Brain, G., Le Google Brain, Q. V. & Manning, C. D. ELECTRA: pre-training text encoders as discriminators rather than generators. Preprint at https:\/\/arxiv.org\/abs\/2003.10555 (2020)."},{"key":"499_CR97","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C. et al. Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 1\u201367 (2020).","journal-title":"J. Mach. Learn. Res."},{"key":"499_CR98","first-page":"1","volume":"23","author":"W Fedus","year":"2022","unstructured":"Fedus, W., Brain, G., Zoph, B. & Shazeer, N. Switch transformers: scaling to trillion parameter models with simple and efficient sparsity. J. Mach. Learn. Res. 23, 1\u201339 (2022).","journal-title":"J. Mach. Learn. Res."},{"key":"499_CR99","unstructured":"Smith, S. et al. Using DeepSpeed and Megatron to train Megatron-Turing NLG 530B, a large-scale generative language model. Preprint at https:\/\/arxiv.org\/abs\/2201.11990 (2022)."},{"key":"499_CR100","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1038\/nature19946","volume":"537","author":"PS Huang","year":"2016","unstructured":"Huang, P. S., Boyken, S. E. & Baker, D. The coming of age of de novo protein design. Nature 537, 320\u2013327 (2016).","journal-title":"Nature"}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00499-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00499-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00499-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,27]],"date-time":"2024-09-27T14:38:13Z","timestamp":1727447893000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-022-00499-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,22]]},"references-count":100,"journal-issue":{"issue":"6","published-online":{"date-parts":[[2022,6]]}},"alternative-id":["499"],"URL":"https:\/\/doi.org\/10.1038\/s42256-022-00499-z","relation":{},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,22]]},"assertion":[{"value":"9 September 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 June 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}