{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T09:19:49Z","timestamp":1778059189562,"version":"3.51.4"},"reference-count":100,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["2019R1A6A1A10073437"],"award-info":[{"award-number":["2019R1A6A1A10073437"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["NRF-2020M3A9G7103933"],"award-info":[{"award-number":["NRF-2020M3A9G7103933"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"name":"New Faculty Startup Fund and the Creative-Pioneering Researchers Program"},{"DOI":"10.13039\/501100002551","name":"Seoul National University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002551","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006228","name":"Oak Ridge National Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006228","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-AC05-00OR22725"],"award-info":[{"award-number":["DE-AC05-00OR22725"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"name":"TensorFlow Research Cloud"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,10,1]]},"DOI":"10.1109\/tpami.2021.3095381","type":"journal-article","created":{"date-parts":[[2021,7,7]],"date-time":"2021-07-07T16:13:27Z","timestamp":1625674407000},"page":"7112-7127","source":"Crossref","is-referenced-by-count":1934,"title":["ProtTrans: Toward Understanding the Language of Life Through Self-Supervised Learning"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4998-312X","authenticated-orcid":false,"given":"Ahmed","family":"Elnaggar","sequence":"first","affiliation":[{"name":"Department of Informatics, Bioinformatics &amp; Computational Biology - i12, Technical University of Munich (TUM), Garching\/Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9601-3580","authenticated-orcid":false,"given":"Michael","family":"Heinzinger","sequence":"additional","affiliation":[{"name":"Department of Informatics, Bioinformatics &amp; Computational Biology - i12, Technical University of Munich (TUM), Garching\/Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4650-6181","authenticated-orcid":false,"given":"Christian","family":"Dallago","sequence":"additional","affiliation":[{"name":"Department of Informatics, Bioinformatics &amp; Computational Biology - i12, Technical University of Munich (TUM), Garching\/Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5115-8658","authenticated-orcid":false,"given":"Ghalia","family":"Rehawi","sequence":"additional","affiliation":[{"name":"Department of Informatics, Bioinformatics &amp; Computational Biology - i12, Technical University of Munich (TUM), Garching\/Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4976-9366","authenticated-orcid":false,"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Med AI Technology (Wu Xi) Ltd., Wu Xi, Jiang Su, China"}]},{"given":"Llion","family":"Jones","sequence":"additional","affiliation":[{"name":"Google AI, Google, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9196-5830","authenticated-orcid":false,"given":"Tom","family":"Gibbs","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2095-4349","authenticated-orcid":false,"given":"Tamas","family":"Feher","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, CA, USA"}]},{"given":"Christoph","family":"Angerer","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, CA, USA"}]},{"given":"Martin","family":"Steinegger","sequence":"additional","affiliation":[{"name":"School of Biological Sciences, Seoul National University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7770-9091","authenticated-orcid":false,"given":"Debsindhu","family":"Bhowmik","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, TN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0179-8424","authenticated-orcid":false,"given":"Burkhard","family":"Rost","sequence":"additional","affiliation":[{"name":"Department of Informatics, Bioinformatics &amp; Computational Biology - i12, Technical University of Munich (TUM), Garching\/Munich, Germany"}]}],"member":"263","reference":[{"key":"ref39","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"van der maaten","year":"2008","journal-title":"J Mach Learn Res"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1207382109"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-018-04964-5"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btu739"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1002\/prot.340190108"},{"key":"ref30","article-title":"High accuracy protein structure prediction using deep learning","author":"john","year":"2020","journal-title":"Fourteenth Critical Assessment of Techniques for Protein Structure Prediction (Abstract Book)"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-564"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.0737502100"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-019-38746-w"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.1101\/2020.03.07.982272","article-title":"ProGen: Language modeling for protein generation","author":"madani","year":"2020"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.cell.2012.04.012"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1508380112"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/S0958-1669(96)80124-8"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1002\/prot.25155"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1006\/jmbi.1993.1413"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.90.16.7558"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-019-0437-4"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gky1049"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1110\/ps.03128904"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkab354"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1016\/S0969-2126(98)00047-1"},{"key":"ref50","article-title":"ALBERT: A lite BERT for Self-supervised learning of language representations","author":"lan","year":"2020"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"key":"ref59","year":"2020"},{"key":"ref58","article-title":"Extreme scale-out super-MUC phase 2 - lessons learned","author":"hammer","year":"2016"},{"key":"ref57","article-title":"Press release announcing Supercomputer Fugaku","author":"limited","year":"2019"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-019-3220-8"},{"key":"ref55","article-title":"Large batch optimization for deep learning: Training BERT in 76 minutes","author":"you","year":"2019"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/3388440.3412467"},{"key":"ref53","article-title":"Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences","author":"rives","year":"2019","journal-title":"bioRxiv 622803"},{"key":"ref52","first-page":"9689","article-title":"Evaluating protein transfer learning with TAPE","author":"rao","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gky1134"},{"key":"ref4","first-page":"8026","article-title":"PyTorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref3","article-title":"TensorFlow: Large-scale machine learning on heterogeneous distributed systems","author":"abadi","year":"2016"},{"key":"ref6","article-title":"Horovod: Fast and easy distributed deep learning in TensorFlow","author":"sergeev","year":"2018"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1296907.1296909"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref7","article-title":"PowerAI DDL","author":"cho","year":"2017"},{"key":"ref49","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1031"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btg224"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1038\/nbt.3988"},{"key":"ref48","article-title":"Electra: Pre-training text encoders as discriminators rather than generators","author":"clark","year":"2020"},{"key":"ref47","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","author":"raffel","year":"2019"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-0134(19990301)34:4<508::AID-PROT10>3.0.CO;2-4"},{"key":"ref41","first-page":"482","article-title":"Sixty-five years of the long march in protein secondary structure prediction: The final stretch?","volume":"19","author":"yang","year":"2018","journal-title":"Brief Bioinf"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/28.1.235"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1002\/prot.25423"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btx218"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1038\/srep18962"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkw306"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkv332"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1002\/prot.24863"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bts390"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0028766"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1016\/j.str.2009.03.015"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1093\/bioadv\/vbab035"},{"key":"ref79","article-title":"Language models are few-shot learners","author":"brown","year":"2020"},{"key":"ref60","year":"2020"},{"key":"ref62","article-title":"Optimal gradient checkpoint search for arbitrary computation graphs","author":"feng","year":"2019"},{"key":"ref61","article-title":"TFLMS: Large model support in tensor-flow by graph rewriting","author":"le","year":"2019"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459469"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/28.1.304"},{"key":"ref65","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2016"},{"key":"ref66","article-title":"Attention interpretability across NLP tasks","author":"vashishth","year":"2019"},{"key":"ref67","article-title":"Transformer protein language models are unsupervised structure learners","author":"rao","year":"2020","journal-title":"bioRxiv 2020 12 15 422761v1"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-3007"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.89.22.10915"},{"key":"ref1","article-title":"Announcing supercomputer summit","author":"wells","year":"2016"},{"key":"ref95","article-title":"MSA transformer","author":"rao","year":"2021","journal-title":"bioRxiv 2021 02 12 430858"},{"key":"ref94","article-title":"Learning protein sequence embeddings using information from structure","author":"bepler","year":"2019"},{"key":"ref93","article-title":"Language modelling for biological sequences&#x2013;curated datasets and baselines","author":"armenteros","year":"2020","journal-title":"bioRxiv 2020 03 09 983585"},{"key":"ref92","article-title":"Pre-training of deep bidirectional protein sequence representations with structural information","author":"min","year":"2020"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-019-0598-1"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1016\/S0076-6879(96)66033-9"},{"key":"ref98","article-title":"Reformer: The efficient transformer","author":"kitaev","year":"2019"},{"key":"ref99","article-title":"Big bird: Transformers for longer sequences","author":"zaheer","year":"2020"},{"key":"ref96","article-title":"Generating long sequences with sparse transformers","author":"child","year":"2019"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1145\/276698.276876"},{"key":"ref10","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref11","article-title":"XLNet: Generalized autoregressive pretraining for language understanding","author":"yang","year":"2020"},{"key":"ref12","article-title":"Megatron-LM: Training multi-billion parameter language models using model parallelism","author":"shoeybi","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/S0021-9258(18)64177-8"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.bb.25.060196.000553"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1002\/prot.25674"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btx431"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1002\/pmic.201800227"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1914677117"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1110\/ps.03128904"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1002\/humu.23961"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0141287"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1002\/prot.25585"},{"key":"ref83","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/19.suppl.2247"},{"key":"ref89","article-title":"ZeRO: Memory optimization towards training a trillion parameter models","author":"rajbhandari","year":"2019"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-019-2932-0"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-020-80786-0"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1016\/j.cels.2019.03.006"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1007\/s00439-021-02411-y"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/34\/9893033\/9477085-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9893033\/09477085.pdf?arnumber=9477085","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,4]],"date-time":"2023-04-04T22:21:58Z","timestamp":1680646918000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9477085\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,1]]},"references-count":100,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3095381","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2020.07.12.199554","asserted-by":"object"}]},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,1]]}}}