{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T02:33:23Z","timestamp":1774406003902,"version":"3.50.1"},"reference-count":33,"publisher":"Oxford University Press (OUP)","issue":"8","license":[{"start":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T00:00:00Z","timestamp":1753660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Proteins are of great significance in living organisms. However, understanding their functions encounters numerous challenges, such as insufficient integration of multimodal information, a large number of training parameters, limited flexibility of classification-based methods, and the lack of systematic evaluation metrics for protein question answering systems. To tackle these issues, we propose the Prot2Chat framework.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We modified ProteinMPNN to encode protein sequence and structural information in a unified way. We used a large language model (LLM) to encode questions into vectors and developed a protein-text adapter to compress protein information into virtual tokens based on these vectors, achieving the early fusion of text and protein information. Finally, the same LLM reads the virtual tokens and the questions to generate answers. To optimize training efficiency, we froze the encoder and employed low-rank adaptation (LoRA) techniques for the LLM. Experiments on two datasets show that both automated metrics and expert evaluations demonstrate the superior performance of our model, and zero-shot prediction results highlight its generalization ability. We have developed an easy-to-use web interactive platform and a rapid installation option, allowing users to swiftly engage with Prot2Chat.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>The models and codes are available at https:\/\/github.com\/wangzc1233\/Prot2Chat.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaf396","type":"journal-article","created":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T16:18:57Z","timestamp":1753719537000},"source":"Crossref","is-referenced-by-count":4,"title":["Prot2Chat: protein large language model with early fusion of text, sequence, and structure"],"prefix":"10.1093","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8011-3228","authenticated-orcid":false,"given":"Zhicong","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Soochow University , Suzhou 215006,","place":["China"]}]},{"given":"Zicheng","family":"Ma","sequence":"additional","affiliation":[{"name":"Changping Laboratory , Beijing 102206,","place":["China"]}]},{"given":"Ziqiang","family":"Cao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University , Suzhou 215006,","place":["China"]}]},{"given":"Changlong","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University , Suzhou 215006,","place":["China"]}]},{"given":"Jun","family":"Zhang","sequence":"additional","affiliation":[{"name":"Changping Laboratory , Beijing 102206,","place":["China"]}]},{"given":"Yi Qin","family":"Gao","sequence":"additional","affiliation":[{"name":"Changping Laboratory , Beijing 102206,","place":["China"]}]}],"member":"286","published-online":{"date-parts":[[2025,7,28]]},"reference":[{"key":"2025081213183149600_btaf396-B1","first-page":"10757","author":"Abdine","year":"2024"},{"key":"2025081213183149600_btaf396-B2","doi-asserted-by":"crossref","first-page":"493","DOI":"10.1038\/s41586-024-07487-w","article-title":"Accurate structure prediction of biomolecular interactions with alphafold 3","volume":"630","author":"Abramson","year":"2024","journal-title":"Nature"},{"key":"2025081213183149600_btaf396-B3","doi-asserted-by":"crossref","DOI":"10.1093\/bioinformatics\/btaf170","article-title":"Protnote: a multimodal method for protein-function annotation","volume":"41","author":"Char","year":"2025","journal-title":"Bioinformatics"},{"key":"2025081213183149600_btaf396-B4","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1126\/science.add2187","article-title":"Robust deep learning\u2013based protein sequence design using proteinMPNN","volume":"378","author":"Dauparas","year":"2022","journal-title":"Science"},{"key":"2025081213183149600_btaf396-B5","first-page":"8469","author":"Driess","year":"2023"},{"key":"2025081213183149600_btaf396-B6","author":"Dubey","year":"2024"},{"key":"2025081213183149600_btaf396-B7","first-page":"1943","author":"Fang","year":"2024"},{"key":"2025081213183149600_btaf396-B8","author":"Fang","year":"2024"},{"key":"2025081213183149600_btaf396-B9","doi-asserted-by":"crossref","first-page":"3168","DOI":"10.1038\/s41467-021-23303-9","article-title":"Structure-based protein function prediction using graph convolutional networks","volume":"12","author":"Gligorijevi\u0107","year":"2021","journal-title":"Nat Commun"},{"key":"2025081213183149600_btaf396-B10","author":"Hu","year":"2022"},{"key":"2025081213183149600_btaf396-B11","doi-asserted-by":"crossref","first-page":"422","DOI":"10.1093\/bioinformatics\/btz595","article-title":"Deepgoplus: improved protein function prediction from sequence","volume":"36","author":"Kulmanov","year":"2020","journal-title":"Bioinformatics"},{"key":"2025081213183149600_btaf396-B12","first-page":"19730","author":"Li","year":"2023"},{"key":"2025081213183149600_btaf396-B13","first-page":"74","author":"Lin","year":"2004"},{"key":"2025081213183149600_btaf396-B15","doi-asserted-by":"crossref","first-page":"1123","DOI":"10.1126\/science.ade2574","article-title":"Evolutionary-scale prediction of atomic-level protein structure with a language model","volume":"379","author":"Lin","year":"2023","journal-title":"Science"},{"key":"2025081213183149600_btaf396-B16","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3505955","author":"Luo","year":"2024","journal-title":"IEEE J Biomed Health Inform"},{"key":"2025081213183149600_btaf396-B17","author":"Lyu","year":"2023"},{"key":"2025081213183149600_btaf396-B18","first-page":"16990","author":"Notin","year":"2022"},{"key":"2025081213183149600_btaf396-B19","first-page":"311","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","author":"Papineni","year":"2002"},{"key":"2025081213183149600_btaf396-B20","first-page":"155","author":"Qin","year":"2025"},{"key":"2025081213183149600_btaf396-B21","author":"Qiu","year":"2024"},{"key":"2025081213183149600_btaf396-B22","doi-asserted-by":"crossref","first-page":"btae756","DOI":"10.1093\/bioinformatics\/btae756","article-title":"Funcfetch: an LLM-assisted workflow enables mining thousands of enzyme\u2013substrate interactions from published manuscripts","volume":"41","author":"Smith","year":"2024","journal-title":"Bioinformatics"},{"key":"2025081213183149600_btaf396-B23","author":"Su","year":"2024."},{"key":"2025081213183149600_btaf396-B24","author":"Taylor","year":"2022"},{"key":"2025081213183149600_btaf396-B25","doi-asserted-by":"crossref","first-page":"2699","DOI":"10.1093\/nar\/gky092","article-title":"UniProt: the universal protein knowledgebase","volume":"46","author":"The UniProt Consortium","year":"2018","journal-title":"Nucleic Acids Res"},{"key":"2025081213183149600_btaf396-B26","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1038\/s41587-023-01773-0","volume":"42","author":"van Kempen","year":"2024","journal-title":"Nat Biotechnol"},{"key":"2025081213183149600_btaf396-B27","first-page":"6000","author":"Vaswani","year":"2017"},{"key":"2025081213183149600_btaf396-B28","first-page":"1076","author":"Wang","year":"2025"},{"key":"2025081213183149600_btaf396-B29","first-page":"1114","author":"Wang","year":"2024"},{"key":"2025081213183149600_btaf396-B30","doi-asserted-by":"crossref","first-page":"832","DOI":"10.1038\/s41586-023-06832-9","article-title":"Predicting multiple conformations via sequence clustering and alphafold2","volume":"625","author":"Wayment-Steele","year":"2024","journal-title":"Nature"},{"key":"2025081213183149600_btaf396-B31","doi-asserted-by":"crossref","first-page":"btae680","DOI":"10.1093\/bioinformatics\/btae680","article-title":"FAPM: functional annotation of proteins using multi-modal models beyond structural modeling","volume":"40","author":"Xiang","year":"2024","journal-title":"Bioinformatics"},{"key":"2025081213183149600_btaf396-B32","first-page":"543","author":"Zhang","year":"2023"},{"key":"2025081213183149600_btaf396-B33","author":"Zhou","year":"2023"},{"key":"2025081213183149600_btaf396-B34","author":"Zhou","year":"2025"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaf396\/63866323\/btaf396.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/8\/btaf396\/63866323\/btaf396.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/8\/btaf396\/63866323\/btaf396.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,12]],"date-time":"2025-08-12T17:18:39Z","timestamp":1755019119000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btaf396\/8215464"}},"subtitle":[],"editor":[{"given":"Arne","family":"Elofsson","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2025,7,28]]},"references-count":33,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaf396","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2025,8]]},"published":{"date-parts":[[2025,7,28]]},"article-number":"btaf396"}}