{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T04:12:11Z","timestamp":1779336731589,"version":"3.51.4"},"reference-count":38,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T00:00:00Z","timestamp":1777075200000},"content-version":"vor","delay-in-days":1,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006469","name":"Macao Science and Technology Development Fund","doi-asserted-by":"publisher","award":["0030\/2024\/RIA1"],"award-info":[{"award-number":["0030\/2024\/RIA1"]}],"id":[{"id":"10.13039\/501100006469","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Macao Polytechnic University","award":["RP\/FCA-15\/2023"],"award-info":[{"award-number":["RP\/FCA-15\/2023"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,5,3]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>T cell receptor (TCR) and peptide interactions (TPI) are one of the most important parts of T cell immunity. Experimental identification of TPI is time-consuming and labor-intensive; therefore, it is necessary to develop computational prediction method that exploit existing data to predict TPI.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We use huge TCR and peptide sequences to pre-train two language models (\u223c152M parameters), respectively, and integrate them into a sequence-based only prediction framework (i.e. RoBERTcr) with supervised fine-tuning (SFT). Visualization of amino acids embedding from pre-trained language model (PLM) shows biochemical clusters based on different properties, and our PLMs outperform existing protein language models (i.e. ESM and ProtTrans) under the same condition. RoBERTcr achieved higher performance than other state-of-the-art methods based on structures or sequences without dataset bias. The visualization of attention from our framework implies valuable spatial information that residues in TCR contacting peptides are the key to their interaction.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability<\/jats:title>\n                    <jats:p>RoBERTcr is free available at https:\/\/fca_icdb.mpu.edu.mo\/robertcr\/ and https:\/\/doi.org\/10.5281\/zenodo.18043054.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btag200","type":"journal-article","created":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T11:30:59Z","timestamp":1776771059000},"source":"Crossref","is-referenced-by-count":0,"title":["Supervised fine-tuning enhances unsupervised learning from 45 million amino acids in TCR and peptide sequences"],"prefix":"10.1093","volume":"42","author":[{"given":"Kewei","family":"Zhou","sequence":"first","affiliation":[{"name":"Faculty of Applied Sciences, Macao Polytechnic University Centre for Artificial Intelligence Driven Drug Discovery, , Rua de Lu\u00eds Gonzaga Gomes , Macao, 999078,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Xu","sequence":"additional","affiliation":[{"name":"Faculty of Applied Sciences, Macao Polytechnic University Centre for Artificial Intelligence Driven Drug Discovery, , Rua de Lu\u00eds Gonzaga Gomes , Macao, 999078,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaolong","family":"Lin","sequence":"additional","affiliation":[{"name":"Faculty of Applied Sciences, Macao Polytechnic University Centre for Artificial Intelligence Driven Drug Discovery, , Rua de Lu\u00eds Gonzaga Gomes , Macao, 999078,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Silong","family":"Zhai","sequence":"additional","affiliation":[{"name":"Faculty of Applied Sciences, Macao Polytechnic University Centre for Artificial Intelligence Driven Drug Discovery, , Rua de Lu\u00eds Gonzaga Gomes , Macao, 999078,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9284-3667","authenticated-orcid":false,"given":"Huanxiang","family":"Liu","sequence":"additional","affiliation":[{"name":"Faculty of Applied Sciences, Macao Polytechnic University Centre for Artificial Intelligence Driven Drug Discovery, , Rua de Lu\u00eds Gonzaga Gomes , Macao, 999078,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8974-0173","authenticated-orcid":false,"given":"Xiaojun","family":"Yao","sequence":"additional","affiliation":[{"name":"Faculty of Applied Sciences, Macao Polytechnic University Centre for Artificial Intelligence Driven Drug Discovery, , Rua de Lu\u00eds Gonzaga Gomes , Macao, 999078,","place":["China"]}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2026,4,24]]},"reference":[{"key":"2026052023575270500_btag200-B1","doi-asserted-by":"crossref","first-page":"eadf3700","DOI":"10.1126\/sciadv.adf3700","article-title":"TCR-engineered T cell therapy in solid tumors, state of the art and perspectives","volume":"9","author":"Baulu","year":"2023","journal-title":"Sci Adv"},{"key":"2026052023575270500_btag200-B2","doi-asserted-by":"crossref","first-page":"103099","DOI":"10.1016\/j.artmed.2025.103099","article-title":"TDMFS, tucker decomposition multimodal fusion model for pan-cancer survival prediction","volume":"162","author":"Chen","year":"2025","journal-title":"Artif Intell Med"},{"key":"2026052023575270500_btag200-B4","doi-asserted-by":"crossref","first-page":"300","DOI":"10.1038\/s42256-022-00459-7","article-title":"A transformer-based model to predict peptide-HLA class I binding and optimize mutated peptides for vaccine design","volume":"4","author":"Chu","year":"2022","journal-title":"Nat Mach Intell"},{"key":"2026052023575270500_btag200-B5","doi-asserted-by":"crossref","first-page":"639","DOI":"10.1038\/s41586-025-09697-2","article-title":"Enzyme specificity prediction using cross-attention graph neural networks","volume":"647","author":"Cui","year":"2025","journal-title":"Nature"},{"key":"2026052023575270500_btag200-B6","doi-asserted-by":"crossref","first-page":"7112","DOI":"10.1109\/TPAMI.2021.3095381","article-title":"ProtTrans, toward understanding the language of life through self-supervised learning","volume":"44","author":"Elnaggar","year":"2022","journal-title":"IEEE Trans Pattern Anal"},{"key":"2026052023575270500_btag200-B7","doi-asserted-by":"crossref","first-page":"3150","DOI":"10.1093\/bioinformatics\/bts565","article-title":"CD-HIT, accelerated for clustering the next-generation sequencing data","volume":"28","author":"Fu","year":"2012","journal-title":"Bioinformatics"},{"key":"2026052023575270500_btag200-B8","doi-asserted-by":"crossref","first-page":"472","DOI":"10.1038\/s41591-022-01700-x","article-title":"Ancestral SARS-CoV-2-specific T cells cross-recognize the omicron variant","volume":"28","author":"Gao","year":"2022","journal-title":"Nat Med"},{"key":"2026052023575270500_btag200-B9","doi-asserted-by":"crossref","first-page":"9881","DOI":"10.1039\/D4SC08141B","article-title":"TRAP, a contrastive learning-enhanced framework for robust TCR-pMHC binding prediction with improved generalizability","volume":"16","author":"Ge","year":"2025","journal-title":"Chem Sci"},{"key":"2026052023575270500_btag200-B10","doi-asserted-by":"crossref","first-page":"2231","DOI":"10.1016\/j.immuni.2023.09.005","article-title":"CD8+ T cells in the cancer-immunity cycle","volume":"56","author":"Giles","year":"2023","journal-title":"Immunity"},{"key":"2026052023575270500_btag200-B11","doi-asserted-by":"crossref","first-page":"1017","DOI":"10.1038\/s41592-022-01578-0","article-title":"VDJdb in the pandemic era, a compendium of T cell receptors specific for SARS-CoV-2","volume":"19","author":"Goncharov","year":"2022","journal-title":"Nat Methods"},{"key":"2026052023575270500_btag200-B12","doi-asserted-by":"crossref","first-page":"bbaf728","DOI":"10.1093\/bib\/bbaf728","article-title":"BiChemoCLAM, a weakly supervised multimodal framework for chemotherapy response prediction","volume":"27","author":"Gui","year":"2026","journal-title":"Brief Bioinform"},{"key":"2026052023575270500_btag200-B13","doi-asserted-by":"crossref","first-page":"4236","DOI":"10.1038\/s41467-025-59422-w","article-title":"A protein language model for exploring viral fitness landscapes","volume":"16","author":"Ito","year":"2025","journal-title":"Nat Commun"},{"key":"2026052023575270500_btag200-B14","doi-asserted-by":"crossref","first-page":"488","DOI":"10.1038\/s41586-022-04460-3","article-title":"T cell responses to SARS-CoV-2 spike cross-recognize omicron","volume":"603","author":"Keeton","year":"2022","journal-title":"Nature"},{"key":"2026052023575270500_btag200-B15","doi-asserted-by":"crossref","first-page":"2017","DOI":"10.1038\/s41467-025-57148-3","article-title":"Rapid and accurate prediction of protein homo-oligomer symmetry using Seq2Symm","volume":"16","author":"Kshirsagar","year":"2025","journal-title":"Nat Commun"},{"key":"2026052023575270500_btag200-B16","first-page":"1123","article-title":"Evolutionary-scale prediction of atomic-level protein structure with a language model","volume":"379","author":"Lin","year":"2023","journal-title":"Science (1979)"},{"key":"2026052023575270500_btag200-B17","doi-asserted-by":"crossref","first-page":"1052","DOI":"10.1002\/eji.201948085","article-title":"TCR-induced alteration of primary MHC peptide anchor residue","volume":"49","author":"Madura","year":"2019","journal-title":"Eur J Immunol"},{"key":"2026052023575270500_btag200-B18","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1038\/s43586-024-00292-9","article-title":"Adaptive immune receptor repertoire analysis","volume":"4","author":"Mhanna","year":"2024","journal-title":"Nat Rev Method Prime"},{"key":"2026052023575270500_btag200-B19","doi-asserted-by":"crossref","first-page":"1060","DOI":"10.1038\/s42003-021-02610-3","article-title":"NetTCR-2.0 enables accurate prediction of TCR-peptide binding by using paired TCR\u03b1 and \u03b2 sequence data","volume":"4","author":"Montemurro","year":"2021","journal-title":"Commun Biol"},{"key":"2026052023575270500_btag200-B20","doi-asserted-by":"crossref","first-page":"bbaa318","DOI":"10.1093\/bib\/bbaa318","article-title":"Current challenges for unseen-epitope TCR interaction prediction and a new perspective derived from image classification","volume":"22","author":"Moris","year":"2021","journal-title":"Brief Bioinform"},{"key":"2026052023575270500_btag200-B21","doi-asserted-by":"crossref","first-page":"e16530","DOI":"10.1002\/advs.202516530","article-title":"Accurate identification of protein binding sites for all drug modalities using ALLSites","volume":"13","author":"Mou","year":"2026","journal-title":"Adv Sci"},{"key":"2026052023575270500_btag200-B22","doi-asserted-by":"crossref","first-page":"2599","DOI":"10.1084\/jem.20141764","article-title":"The molecular bases of \u03b4\/\u03b1\u03b2 T cell-mediated antigen recognition","volume":"211","author":"Pellicci","year":"2014","journal-title":"J Exp Med"},{"key":"2026052023575270500_btag200-B24","doi-asserted-by":"crossref","first-page":"btad284","DOI":"10.1093\/bioinformatics\/btad284","article-title":"epiTCR, a highly sensitive predictor for TCR-peptide binding","volume":"39","author":"Pham","year":"2023","journal-title":"Bioinformatics"},{"key":"2026052023575270500_btag200-B25","doi-asserted-by":"crossref","first-page":"751","DOI":"10.1038\/s41577-022-00707-2","article-title":"A guide to antigen processing and presentation","volume":"22","author":"Pishesha","year":"2022","journal-title":"Nat Rev Immunol"},{"key":"2026052023575270500_btag200-B26","doi-asserted-by":"crossref","first-page":"5171","DOI":"10.1038\/s41467-025-60461-6","article-title":"Identifying T cell antigen at the atomic level with graph convolutional network","volume":"16","author":"Que","year":"2025","journal-title":"Nat Commun"},{"key":"2026052023575270500_btag200-B27","doi-asserted-by":"crossref","first-page":"e20221839","DOI":"10.1084\/jem.20221839","article-title":"Nutrients, signal 4 in T cell immunity","volume":"221","author":"Raynor","year":"2024","journal-title":"J Exp Med"},{"key":"2026052023575270500_btag200-B28","doi-asserted-by":"crossref","first-page":"e2016239118","DOI":"10.1073\/pnas.2016239118","article-title":"Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences","volume":"118","author":"Rives","year":"2021","journal-title":"Proc Natl Acad Sci USA"},{"key":"2026052023575270500_btag200-B29","doi-asserted-by":"crossref","first-page":"412","DOI":"10.1038\/s41392-021-00823-w","article-title":"T cell receptor (TCR) signaling in health and disease","volume":"6","author":"Shah","year":"2021","journal-title":"Signal Transduct. Target Ther"},{"key":"2026052023575270500_btag200-B30","doi-asserted-by":"crossref","first-page":"672","DOI":"10.1016\/j.cell.2018.06.017","article-title":"Isolation of a structural mechanism for uncoupling T cell receptor signaling from peptide-MHC binding","volume":"174","author":"Sibener","year":"2018","journal-title":"Cell"},{"key":"2026052023575270500_btag200-B31","doi-asserted-by":"crossref","first-page":"107882","DOI":"10.1016\/j.celrep.2020.107882","article-title":"High frequency of shared clonotypes in human T cell receptor repertoires","volume":"32","author":"Soto","year":"2020","journal-title":"Cell Rep"},{"key":"2026052023575270500_btag200-B32","doi-asserted-by":"crossref","first-page":"1803","DOI":"10.3389\/fimmu.2020.01803","article-title":"Prediction of specific TCR-peptide binding from large dictionaries of TCR-peptide pairs","volume":"11","author":"Springer","year":"2020","journal-title":"Front Immunol"},{"key":"2026052023575270500_btag200-B33","doi-asserted-by":"crossref","first-page":"bbaf434","DOI":"10.1093\/bib\/bbaf434","article-title":"ESM2_AMP, an interpretable framework for protein-protein interactions prediction and biological mechanism discovery","volume":"26","author":"Sun","year":"2025","journal-title":"Brief Bioinform"},{"key":"2026052023575270500_btag200-B34","doi-asserted-by":"crossref","first-page":"847","DOI":"10.1016\/j.cell.2022.01.015","article-title":"SARS-CoV-2 vaccination induces immunological T cell memory able to cross-recognize variants from alpha to omicron","volume":"185","author":"Tarke","year":"2022","journal-title":"Cell"},{"key":"2026052023575270500_btag200-B35","first-page":"D609","article-title":"UniProt, the universal protein knowledgebase in 2025","volume":"53","author":"UniProt","year":"2024","journal-title":"Nucleic Acids Res"},{"key":"2026052023575270500_btag200-B36","first-page":"E4792","article-title":"How an alloreactive T-cell receptor achieves peptide and MHC specificity","volume":"114","author":"Wang","year":"2017","journal-title":"Proc Natl Acad Sci USA"},{"key":"2026052023575270500_btag200-B37","doi-asserted-by":"crossref","first-page":"bbab335","DOI":"10.1093\/bib\/bbab335","article-title":"DLpTCR, an ensemble deep learning framework for predicting immunogenic peptide recognized by T cell receptor","volume":"22","author":"Xu","year":"2021","journal-title":"Brief Bioinform"},{"key":"2026052023575270500_btag200-B38","doi-asserted-by":"crossref","first-page":"e1013836","DOI":"10.1371\/journal.pcbi.1013836","article-title":"DSCA-HLAII, a dual-stream cross-attention model for predicting peptide-HLA class II interaction and presentation","volume":"22","author":"Yan","year":"2026","journal-title":"PLoS Comput Biol"},{"key":"2026052023575270500_btag200-B40","first-page":"gkaf876","article-title":"TCRdb 2.0, an updated T-cell receptor sequence database","volume":"1","author":"Yue","year":"2025","journal-title":"Nucleic Acids Res"},{"key":"2026052023575270500_btag200-B42","first-page":"1218","article-title":"A robustly optimized BERT pre-training approach with post-training","volume":"1","author":"Zhuang","year":"2021","journal-title":"Proc 20th Chin Natl Conf Comput Linguist"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btag200\/68156646\/btag200.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/42\/5\/btag200\/68156646\/btag200.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/42\/5\/btag200\/68156646\/btag200.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T03:58:30Z","timestamp":1779335910000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btag200\/8662188"}},"subtitle":[],"editor":[{"given":"Ercument","family":"Cicek","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2026,4,24]]},"references-count":38,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2026,5,3]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btag200","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2026,5]]},"published":{"date-parts":[[2026,4,24]]},"article-number":"btag200"}}