{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T00:07:20Z","timestamp":1775606840977,"version":"3.50.1"},"reference-count":59,"publisher":"Oxford University Press (OUP)","issue":"12","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100013105","name":"Shanghai Rising-Star Program","doi-asserted-by":"publisher","award":["23QD1400600"],"award-info":[{"award-number":["23QD1400600"]}],"id":[{"id":"10.13039\/501100013105","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["82204278"],"award-info":[{"award-number":["82204278"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["T2225002"],"award-info":[{"award-number":["T2225002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFC3400504"],"award-info":[{"award-number":["2022YFC3400504"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,11,28]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Assigning accurate property labels to proteins, like functional terms and catalytic activity, is challenging, especially for proteins without homologs and \u201ctail labels\u201d with few known examples. Previous methods mainly focused on protein sequence features, overlooking the semantic meaning of protein labels.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We introduce functional annotation of proteins using multimodal models (FAPM), a contrastive multimodal model that links natural language with protein sequence language. This model combines a pretrained protein sequence model with a pretrained large language model to generate labels, such as Gene Ontology (GO) functional terms and catalytic activity predictions, in natural language. Our results show that FAPM excels in understanding protein properties, outperforming models based solely on protein sequences or structures. It achieves state-of-the-art performance on public benchmarks and in-house experimentally annotated phage proteins, which often have few known homologs. Additionally, FAPM\u2019s flexibility allows it to incorporate extra text prompts, like taxonomy information, enhancing both its predictive performance and explainability. This novel approach offers a promising alternative to current methods that rely on multiple sequence alignment for protein annotation.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>The online demo is at: https:\/\/huggingface.co\/spaces\/wenkai\/FAPM_demo.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae680","type":"journal-article","created":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T07:20:36Z","timestamp":1731396036000},"source":"Crossref","is-referenced-by-count":11,"title":["FAPM: functional annotation of proteins using multimodal models beyond structural modeling"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-6869-7931","authenticated-orcid":false,"given":"Wenkai","family":"Xiang","sequence":"first","affiliation":[{"name":"Drug Discovery and Design Center, State Key Laboratory of Drug Research, Shanghai Institute of Materia Medica, Chinese Academy of Sciences , Shanghai 201203,","place":["China"]},{"name":"Lingang Laboratory , Shanghai 200031,","place":["China"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5041-9385","authenticated-orcid":false,"given":"Zhaoping","family":"Xiong","sequence":"additional","affiliation":[{"name":"ProtonUnfold Technology Co., Ltd , Suzhou 215000,","place":["China"]}]},{"given":"Huan","family":"Chen","sequence":"additional","affiliation":[{"name":"BioBank, The First Affiliated Hospital of Xi\u2019an Jiaotong University , Xi\u2019an 710061,","place":["China"]}]},{"given":"Jiacheng","family":"Xiong","sequence":"additional","affiliation":[{"name":"Drug Discovery and Design Center, State Key Laboratory of Drug Research, Shanghai Institute of Materia Medica, Chinese Academy of Sciences , Shanghai 201203,","place":["China"]},{"name":"University of Chinese Academy of Sciences , Beijing 100049,","place":["China"]}]},{"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Drug Discovery and Design Center, State Key Laboratory of Drug Research, Shanghai Institute of Materia Medica, Chinese Academy of Sciences , Shanghai 201203,","place":["China"]},{"name":"University of Chinese Academy of Sciences , Beijing 100049,","place":["China"]}]},{"given":"Zunyun","family":"Fu","sequence":"additional","affiliation":[{"name":"Drug Discovery and Design Center, State Key Laboratory of Drug Research, Shanghai Institute of Materia Medica, Chinese Academy of Sciences , Shanghai 201203,","place":["China"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3323-3092","authenticated-orcid":false,"given":"Mingyue","family":"Zheng","sequence":"additional","affiliation":[{"name":"Drug Discovery and Design Center, State Key Laboratory of Drug Research, Shanghai Institute of Materia Medica, Chinese Academy of Sciences , Shanghai 201203,","place":["China"]},{"name":"Lingang Laboratory , Shanghai 200031,","place":["China"]},{"name":"University of Chinese Academy of Sciences , Beijing 100049,","place":["China"]}]},{"given":"Bing","family":"Liu","sequence":"additional","affiliation":[{"name":"BioBank, The First Affiliated Hospital of Xi\u2019an Jiaotong University , Xi\u2019an 710061,","place":["China"]}]},{"given":"Qian","family":"Shi","sequence":"additional","affiliation":[{"name":"Lingang Laboratory , Shanghai 200031,","place":["China"]}]}],"member":"286","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"2024121022510614500_btae680-B1","first-page":"1514","volume-title":"Nat Methods","author":"Ahdritz","year":"2024"},{"key":"2024121022510614500_btae680-B2","doi-asserted-by":"crossref","first-page":"403","DOI":"10.1016\/S0022-2836(05)80360-2","article-title":"Basic local alignment search tool","volume":"215","author":"Altschul","year":"1990","journal-title":"J Mol Biol"},{"key":"2024121022510614500_btae680-B3","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1038\/75556","article-title":"Gene Ontology: tool for the unification of biology","volume":"25","author":"Ashburner","year":"2000","journal-title":"Nat Genet"},{"key":"2024121022510614500_btae680-B4","doi-asserted-by":"crossref","first-page":"2241","DOI":"10.1093\/nar\/19.suppl.2241","article-title":"PROSITE: a dictionary of sites and patterns in proteins","volume":"19","author":"Bairoch","year":"1991","journal-title":"Nucleic Acids Res"},{"key":"2024121022510614500_btae680-B5","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1016\/j.cell.2009.09.028","article-title":"Cellular and molecular mechanisms of pain","volume":"139","author":"Basbaum","year":"2009","journal-title":"Cell"},{"key":"2024121022510614500_btae680-B6","doi-asserted-by":"crossref","first-page":"D344","DOI":"10.1093\/nar\/gkaa977","article-title":"The InterPro protein families and domains database: 20 years on","volume":"49","author":"Blum","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2024121022510614500_btae680-B7","doi-asserted-by":"crossref","first-page":"i318","DOI":"10.1093\/bioinformatics\/btad208","article-title":"Combining protein sequences and structures with transformers and equivariant graph neural networks to predict protein function","volume":"39","author":"Boadu","year":"2023","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B8","doi-asserted-by":"crossref","first-page":"365","DOI":"10.1093\/nar\/gkg095","article-title":"The SWISS-PROT protein knowledgebase and its supplement TrEMBL in 2003","volume":"31","author":"Boeckmann","year":"2003","journal-title":"Nucleic Acids Res"},{"key":"2024121022510614500_btae680-B9","doi-asserted-by":"crossref","first-page":"2102","DOI":"10.1093\/bioinformatics\/btac020","article-title":"ProteinBERT: a universal deep-learning model of protein sequence and function","volume":"38","author":"Brandes","year":"2022","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B10","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1038\/nmeth.3176","article-title":"Fast and sensitive protein alignment using DIAMOND","volume":"12","author":"Buchfink","year":"2015","journal-title":"Nat Methods"},{"key":"2024121022510614500_btae680-B11","first-page":"627","volume-title":"Methods Mol Biol","author":"Burley","year":"2017"},{"key":"2024121022510614500_btae680-B12","first-page":"257","author":"Chiang","year":"2019"},{"key":"2024121022510614500_btae680-B13","doi-asserted-by":"crossref","first-page":"i53","DOI":"10.1093\/bioinformatics\/btt228","article-title":"Information-theoretic evaluation of predicted ontological annotations","volume":"29","author":"Clark","year":"2013","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B14","doi-asserted-by":"crossref","first-page":"e4497","DOI":"10.1002\/pro.4497","article-title":"BepiPred-3.0: improved B-cell epitope prediction using protein language models","volume":"31","author":"Clifford","year":"2022","journal-title":"Protein Sci"},{"key":"2024121022510614500_btae680-B15","doi-asserted-by":"crossref","first-page":"D523","DOI":"10.1093\/nar\/gkac1052","article-title":"UniProt: the universal protein knowledgebase in 2023","volume":"51","author":"Consortium","year":"2023","journal-title":"Nucleic Acids Res"},{"key":"2024121022510614500_btae680-B16","author":"Devlin"},{"key":"2024121022510614500_btae680-B17","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1093\/bioinformatics\/14.9.755","article-title":"Profile hidden Markov models","volume":"14","author":"Eddy","year":"1998","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B18","author":"Fang"},{"key":"2024121022510614500_btae680-B19","doi-asserted-by":"crossref","first-page":"3168","DOI":"10.1038\/s41467-021-23303-9","article-title":"Structure-based protein function prediction using graph convolutional networks","volume":"12","author":"Gligorijevi\u0107","year":"2021","journal-title":"Nat Commun"},{"key":"2024121022510614500_btae680-B20","doi-asserted-by":"crossref","first-page":"1419","DOI":"10.1074\/jbc.M107889200","article-title":"Crystal structures of the semireduced and inhibitor-bound forms of cyclic nucleotide phosphodiesterase from Arabidopsis thaliana 210","volume":"277","author":"Hofmann","year":"2002","journal-title":"J Biol Chem"},{"key":"2024121022510614500_btae680-B21","doi-asserted-by":"crossref","first-page":"6601","DOI":"10.1038\/s41467-024-50955-0","article-title":"Accurate prediction of protein function using statistics-informed graph networks","volume":"15","author":"Jang","year":"2024","journal-title":"Nat Commun"},{"key":"2024121022510614500_btae680-B22","author":"Jiang"},{"key":"2024121022510614500_btae680-B23","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1186\/s13059-016-1037-6","article-title":"An expanded evaluation of protein function prediction methods shows an improvement in accuracy","volume":"17","author":"Jiang","year":"2016","journal-title":"Genome Biol"},{"key":"2024121022510614500_btae680-B24","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","article-title":"Highly accurate protein structure prediction with AlphaFold","volume":"596","author":"Jumper","year":"2021","journal-title":"Nature"},{"key":"2024121022510614500_btae680-B25","doi-asserted-by":"crossref","first-page":"1709","DOI":"10.3390\/biom12111709","article-title":"GOProFormer: a multi-modal transformer method for gene ontology protein function prediction","volume":"12","author":"Kabir","year":"2022","journal-title":"Biomolecules"},{"key":"2024121022510614500_btae680-B26","author":"Kipf"},{"key":"2024121022510614500_btae680-B27","doi-asserted-by":"crossref","first-page":"724","DOI":"10.1016\/j.tim.2019.03.008","article-title":"Bacillus subtilis","volume":"27","author":"Kov\u00e1cs","year":"2019","journal-title":"Trends Microbiol"},{"key":"2024121022510614500_btae680-B28","doi-asserted-by":"crossref","first-page":"1600","DOI":"10.1111\/j.1742-4658.2007.05707.x","article-title":"Solution structure of the catalytic domain of RICH protein from goldfish","volume":"274","author":"Kozlov","year":"2007","journal-title":"FEBS J"},{"key":"2024121022510614500_btae680-B29","first-page":"25","author":"Krizhevsky","year":"2012"},{"key":"2024121022510614500_btae680-B30","doi-asserted-by":"crossref","first-page":"1501","DOI":"10.1006\/jmbi.1994.1104","article-title":"Hidden Markov models in computational biology: applications to protein modeling","volume":"235","author":"Krogh","year":"1994","journal-title":"J Mol Biol"},{"key":"2024121022510614500_btae680-B31","author":"Kulmanov"},{"key":"2024121022510614500_btae680-B32","doi-asserted-by":"crossref","first-page":"422","DOI":"10.1093\/bioinformatics\/btz595","article-title":"DeepGOPlus: improved protein function prediction from sequence","volume":"36","author":"Kulmanov","year":"2020","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B33","doi-asserted-by":"crossref","first-page":"i238","DOI":"10.1093\/bioinformatics\/btac256","article-title":"DeepGOZero: improving protein function prediction from sequence and zero-shot learning based on ontology axioms","volume":"38","author":"Kulmanov","year":"2022","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B34","doi-asserted-by":"crossref","first-page":"660","DOI":"10.1093\/bioinformatics\/btx624","article-title":"DeepGO: predicting protein functions from sequence and interactions using a deep ontology-aware classifier","volume":"34","author":"Kulmanov","year":"2018","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B35","author":"Kulmanov","year":"2019"},{"key":"2024121022510614500_btae680-B36","first-page":"9694","article-title":"Align before fuse: vision and language representation learning with momentum distillation","volume":"34","author":"Li","year":"2021","journal-title":"Adv Neural Inf Process Syst"},{"key":"2024121022510614500_btae680-B37","author":"Lin"},{"key":"2024121022510614500_btae680-B38","author":"Lin"},{"key":"2024121022510614500_btae680-B39","doi-asserted-by":"crossref","first-page":"7478","DOI":"10.1109\/TNNLS.2022.3227717","article-title":"A survey of visual transformers","volume":"35","author":"Liu","year":"2024","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2024121022510614500_btae680-B40","author":"McInnes"},{"key":"2024121022510614500_btae680-B41","doi-asserted-by":"crossref","first-page":"142","DOI":"10.1128\/CMR.11.1.142","article-title":"Diarrheagenic Escherichia coli","volume":"11","author":"Nataro","year":"1998","journal-title":"Clin Microbiol Rev"},{"key":"2024121022510614500_btae680-B42","doi-asserted-by":"crossref","first-page":"3486","DOI":"10.1093\/emboj\/cdg359","article-title":"Diversity of protein\u2013protein interactions","volume":"22","author":"Nooren","year":"2003","journal-title":"EMBO J"},{"key":"2024121022510614500_btae680-B43","doi-asserted-by":"crossref","first-page":"e1010961","DOI":"10.1371\/journal.pcbi.1010961","article-title":"ProInfer: an interpretable protein inference tool leveraging on biological networks","volume":"19","author":"Peng","year":"2023","journal-title":"PLoS Comput Biol"},{"key":"2024121022510614500_btae680-B44","doi-asserted-by":"crossref","first-page":"6074","DOI":"10.1109\/JBHI.2023.3316750","article-title":"Large ai models in health informatics: applications, challenges, and the future","volume":"27","author":"Qiu","year":"2023","journal-title":"IEEE J Biomed Health Inform"},{"key":"2024121022510614500_btae680-B45","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1038\/nmeth.2340","article-title":"A large-scale evaluation of computational protein function prediction","volume":"10","author":"Radivojac","year":"2013","journal-title":"Nat Methods"},{"key":"2024121022510614500_btae680-B46","doi-asserted-by":"crossref","first-page":"1697","DOI":"10.1261\/rna.046797.114","article-title":"Structure and mechanism of E. coli RNA 2\u2032,3\u2032-cyclic phosphodiesterase","volume":"20","author":"Remus","year":"2014","journal-title":"RNA"},{"key":"2024121022510614500_btae680-B47","doi-asserted-by":"crossref","first-page":"e80942","DOI":"10.7554\/eLife.80942","article-title":"ProteInfer, deep neural networks for protein functional inference","volume":"12","author":"Sanderson","year":"2023","journal-title":"Elife"},{"key":"2024121022510614500_btae680-B48","article-title":"Convolutional LSTM network: a machine learning approach for precipitation nowcasting","volume":"28","author":"Shi","year":"2015","journal-title":"Adv Neural Inf Process Syst"},{"key":"2024121022510614500_btae680-B49","doi-asserted-by":"crossref","first-page":"D607","DOI":"10.1093\/nar\/gky1131","article-title":"STRING v11: protein\u2013protein association networks with increased coverage, supporting functional discovery in genome-wide experimental datasets","volume":"47","author":"Szklarczyk","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2024121022510614500_btae680-B50","author":"Taylor","year":"2022"},{"key":"2024121022510614500_btae680-B51","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1038\/s42256-022-00457-9","article-title":"Learning functional properties of proteins with language models","volume":"4","author":"Unsal","year":"2022","journal-title":"Nat Mach Intell"},{"key":"2024121022510614500_btae680-B52","doi-asserted-by":"crossref","first-page":"D439","DOI":"10.1093\/nar\/gkab1061","article-title":"AlphaFold protein structure database: massively expanding the structural coverage of protein-sequence space with high-accuracy models","volume":"50","author":"Varadi","year":"2022","journal-title":"Nucleic Acids Res"},{"key":"2024121022510614500_btae680-B53","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv Neural Inf Process Syst"},{"key":"2024121022510614500_btae680-B54","doi-asserted-by":"crossref","first-page":"349","DOI":"10.1016\/j.gpb.2023.04.001","article-title":"NetGO 3.0: protein language model improves large-scale functional annotations","volume":"21","author":"Wang","year":"2023","journal-title":"Genomics Proteomics Bioinformatics"},{"key":"2024121022510614500_btae680-B55","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1016\/j.tim.2012.11.003","article-title":"Phage\u2013bacteria infection networks","volume":"21","author":"Weitz","year":"2013","journal-title":"Trends Microbiol"},{"key":"2024121022510614500_btae680-B56","doi-asserted-by":"crossref","first-page":"i262","DOI":"10.1093\/bioinformatics\/btab270","article-title":"DeepGraphGO: graph neural network for large-scale, multispecies protein function prediction","volume":"37","author":"You","year":"2021","journal-title":"Bioinformatics"},{"key":"2024121022510614500_btae680-B57","doi-asserted-by":"crossref","first-page":"bbad117","DOI":"10.1093\/bib\/bbad117","article-title":"Fast and accurate protein function prediction from sequence through pretrained language model and homology-based label diffusion","volume":"24","author":"Yuan","year":"2023","journal-title":"Brief Bioinform"},{"key":"2024121022510614500_btae680-B58","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3676955","article-title":"A survey of controllable text generation using transformer-based pre-trained language models","volume":"56","author":"Zhang","year":"2024","journal-title":"ACM Comput Surv"},{"key":"2024121022510614500_btae680-B59","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1186\/s13059-024-03166-1","article-title":"AnnoPRO: a strategy for protein function annotation based on multi-scale protein representation and a hybrid deep learning of dual-path encoding","volume":"25","author":"Zheng","year":"2024","journal-title":"Genome Biol"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae680\/60675512\/btae680.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/12\/btae680\/60972751\/btae680.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/12\/btae680\/60972751\/btae680.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T17:51:50Z","timestamp":1733853110000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae680\/7900294"}},"subtitle":[],"editor":[{"given":"Jonathan","family":"Wren","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":59,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2024,11,28]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae680","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2024.05.07.593067","asserted-by":"object"}]},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,12]]},"published":{"date-parts":[[2024,11,14]]},"article-number":"btae680"}}