{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T01:21:27Z","timestamp":1768267287348,"version":"3.49.0"},"reference-count":17,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1109\/bibm62325.2024.10821894","type":"proceedings-article","created":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T20:12:45Z","timestamp":1736539965000},"page":"2390-2395","source":"Crossref","is-referenced-by-count":9,"title":["A Fine-tuning Dataset and Benchmark for Large Language Models for Protein Understanding"],"prefix":"10.1109","author":[{"given":"Yiqing","family":"Shen","sequence":"first","affiliation":[{"name":"Toursun Synbio,Shanghai,China"}]},{"given":"Zan","family":"Chen","sequence":"additional","affiliation":[{"name":"Toursun Synbio,Shanghai,China"}]},{"given":"Michail","family":"Mamalakis","sequence":"additional","affiliation":[{"name":"University of Cambridge,Department of Computer Science and Technology,Cambridge,UK"}]},{"given":"Luhan","family":"He","sequence":"additional","affiliation":[{"name":"Toursun Synbio,Shanghai,China"}]},{"given":"Haiyang","family":"Xia","sequence":"additional","affiliation":[{"name":"Toursun Synbio,Shanghai,China"}]},{"given":"Tianbin","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai AI Laboratory,Shanghai,China"}]},{"given":"Yanzhou","family":"Su","sequence":"additional","affiliation":[{"name":"Shanghai AI Laboratory,Shanghai,China"}]},{"given":"Junjun","family":"He","sequence":"additional","affiliation":[{"name":"Shanghai AI Laboratory,Shanghai,China"}]},{"given":"Yu Guang","family":"Wang","sequence":"additional","affiliation":[{"name":"Toursun Synbio,Shanghai,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.1101\/622803","article-title":"Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences","volume-title":"PNAS","author":"Rives","year":"2019"},{"key":"ref2","article-title":"GPT-4 Technical Report","year":"2024"},{"key":"ref3","first-page":"38 749","article-title":"ProtST: Multi-modality learning of protein sequences and biomedical texts","volume-title":"ICML","volume":"202","author":"Xu","year":"2023"},{"key":"ref4","article-title":"Protein representation learning via knowledge enhanced primary structure reasoning","volume-title":"ICLR","author":"Zhou","year":"2023"},{"key":"ref5","article-title":"Chain of thought prompting elicits reasoning in large language models","volume-title":"NeurIPS","author":"Wei","year":"2022"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btac598"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkab1112"},{"key":"ref8","article-title":"Retrieval-augmented generation for large language models: A survey","author":"Gao","year":"2024"},{"key":"ref9","article-title":"The Falcon Series of Open Language Models","author":"Almazrouei","year":"2023"},{"key":"ref10","article-title":"Qwen technical report","author":"Bai","year":"2023"},{"key":"ref11","article-title":"Moonshot: Towards controllable video generation and editing with multimodal conditions","author":"Zhang","year":"2024"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctv1tjrvk1.47"},{"key":"ref13","article-title":"Baichuan 2: Open large-scale language models","year":"2023"},{"key":"ref14","article-title":"Llama 2: Open Foundation and Fine-Tuned Chat Models","author":"Touvron","year":"2023"},{"key":"ref15","article-title":"Internlm2 technical report","author":"Cai","year":"2024"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.26"},{"key":"ref17","article-title":"Yi: Open Foundation Models by 01.AI","author":"Young","year":"2024"}],"event":{"name":"2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","location":"Lisbon, Portugal","start":{"date-parts":[[2024,12,3]]},"end":{"date-parts":[[2024,12,6]]}},"container-title":["2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10821710\/10821711\/10821894.pdf?arnumber=10821894","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,11]],"date-time":"2025-01-11T10:11:17Z","timestamp":1736590277000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10821894\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1109\/bibm62325.2024.10821894","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]}}}