{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T02:45:51Z","timestamp":1779158751230,"version":"3.51.4"},"reference-count":98,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1109\/tai.2025.3631454","type":"journal-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T18:44:38Z","timestamp":1762973078000},"page":"2478-2495","source":"Crossref","is-referenced-by-count":2,"title":["A Comprehensive Review of Transformer-Based Language Models for Protein Sequence Analysis and Design"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0697-6368","authenticated-orcid":false,"given":"Nimisha","family":"Ghosh","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Shiv Nadar University, Chennai, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1898-5285","authenticated-orcid":false,"given":"Daniele","family":"Santoni","sequence":"additional","affiliation":[{"name":"Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1152-8073","authenticated-orcid":false,"given":"Debaleena","family":"Nawn","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Adamas University, Kolkata, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6643-1694","authenticated-orcid":false,"given":"Eleonora","family":"Ottaviani","sequence":"additional","affiliation":[{"name":"Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0544-5407","authenticated-orcid":false,"given":"Giovanni","family":"Felici","sequence":"additional","affiliation":[{"name":"Institute for System Analysis and Computer Science &#x201C;Antonio Ruberti&#x201D;, National Research Council of Italy, Rome, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0021-9258(18)72300-4"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1042\/bj0450563"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/0022-5193(66)90021-X"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/BF00204658"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1093\/protein\/6.5.485"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1089\/cmb.2005.12.64"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IEMBS.2006.260336"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bti806"},{"issue":"4","key":"ref9","first-page":"158","article-title":"Distributed representations of words and phrases and their compositionality","volume":"26","author":"Mikolov","year":"2013","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1406.1078"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.csbj.2025.03.024"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1101\/676825","article-title":"Evaluating protein transfer learning with tape","author":"Rao","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1923-7"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-03819-2"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3095381"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btac020"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2016239118"},{"key":"ref21","first-page":"29","article-title":"Language models enable zero-shot prediction of the effects of mutations on protein function","volume":"34","author":"Meier","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1126\/science.ade2574"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1101\/2020.03.07.982272","article-title":"ProGen: Language modeling for protein generation","author":"Madani","year":"2020"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.cels.2023.10.002"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-32007-7"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1126\/science.ads0018"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.gpb.2022.11.014"},{"key":"ref28","first-page":"8844","article-title":"MSA transformer","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","volume":"139","author":"Rao","year":"2021"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1126\/science.abj8754"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1038\/s41587-022-01432-w"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TCBB.2023.3247634"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1002\/pmic.202300011"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.7554\/eLife.82819"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1093\/bioadv\/vbad001"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.sbi.2025.103027"},{"key":"ref36","article-title":"A comprehensive review of protein language models","author":"Wang","year":"2025"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"issue":"6","key":"ref38","first-page":"245","article-title":"Improving language understanding by generative pre-training","volume":"23","author":"Radford","year":"2018","journal-title":"OpenAI"},{"key":"ref39","article-title":"Ontoprotein: Protein pretraining with gene ontology embedding","author":"Zhang","year":"2022"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM55620.2022.9995108"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TNB.2023.3278033"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1093\/gigascience\/giae093"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CIBCB55180.2022.9863026"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3569192.3569196"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jproteome.2c00667"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btaf124"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btac469"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1093\/glycob\/cwad033"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkac278"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-022-05000-6"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3389\/fgene.2022.885627"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.3390\/ijms232012385"},{"key":"ref53","article-title":"Well-read students learn better: On the importance of pre-training compact models","author":"Iulia","year":"2019"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-022-05031-z"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbac401"},{"key":"ref56","article-title":"Roberta: A robustly optimized Bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btac474"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-71783-8"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-019-3220-8"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2024.107918"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2024.109598"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/s00439-025-02727-z"},{"key":"ref63","article-title":"PetriBert: Augmenting Bert with tridimensional encoding for inverse protein folding and design","author":"Dumortier","year":"2022","journal-title":"bioRxiv"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbac499"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.3389\/fgene.2022.859188"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbae359"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/s12539-023-00549-z"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbab200"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-64211-4"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijbiomac.2024.131311"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1093\/nargab\/lqae150"},{"key":"ref72","article-title":"xtrimopglm: Unified 100b-scale pre-trained transformer for deciphering the language of protein","author":"Chen","year":"2023","journal-title":"bioRxiv"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"ref74","article-title":"CFP-Gen: Combinatorial functional protein generation via diffusion language models","author":"Yin","year":"2025"},{"key":"ref75","article-title":"Diffusion language models are versatile protein learners","volume-title":"Proc. 41st Int. Conf. Mach. Learn. (ICML)","author":"Wang","year":"2024"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00639-z"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2025.3539710"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-023-01237-7"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1002\/pro.4862"},{"key":"ref80","first-page":"279","article-title":"BERTology meets biology: Interpreting attention in protein language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Vig","year":"2021"},{"issue":"4","key":"ref81","first-page":"349","article-title":"Translating embeddings for modeling multi-relational data","volume":"26","author":"Bordes","year":"2013","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btx431"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d18-2012"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1038\/s41929-018-0159-5"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1126\/science.aba3304"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-021-00310-5"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkab1061"},{"key":"ref88","article-title":"Protein complex prediction with alphafold-multimer","author":"Evans","year":"2021","journal-title":"bioRxiv"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-024-07487-w"},{"issue":"6","key":"ref90","first-page":"1709","article-title":"Bidirectional hierarchical protein multi-modal representation learning","volume":"21","author":"Liu","year":"2025"},{"key":"ref91","article-title":"Longformer: The long-document transformer","author":"Beltagy","year":"2020"},{"key":"ref92","article-title":"Rethinking Attention with Performers","author":"Choromanski","year":"2022"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1189"},{"key":"ref94","article-title":"Linformer: Self-Attention with Linear Complexity","author":"Wang","year":"2020"},{"key":"ref95","article-title":"Long-context protein language model","author":"Wang","year":"2024"},{"key":"ref96","article-title":"Uni-Mol2: Exploring molecular pretraining model at scale","author":"Ji","year":"2024"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.3389\/fbinf.2023.1178926"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkae1082"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/9078688\/11503071\/11242134.pdf?arnumber=11242134","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T19:57:11Z","timestamp":1777665431000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11242134\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":98,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tai.2025.3631454","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5]]}}}