{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T05:06:50Z","timestamp":1773551210898,"version":"3.50.1"},"reference-count":83,"publisher":"Oxford University Press (OUP)","issue":"10","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Deep learning has deeply influenced protein science, enabling breakthroughs in predicting protein properties, higher-order structures, and molecular interactions.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>This article introduces DeepProtein, a comprehensive and user-friendly deep learning library tailored for protein-related tasks. It enables researchers to seamlessly address protein data with cutting-edge deep learning models. To assess model performance, we establish a benchmark that evaluates different deep learning architectures across multiple protein-related tasks, including protein function prediction, subcellular localization prediction, protein\u2013protein interaction prediction, and protein structure prediction. Furthermore, we introduce DeepProt-T5, a series of fine-tuned Prot-T5-based models that achieve state-of-the-art performance on four benchmark tasks, while demonstrating competitive results on six of others. Comprehensive documentation and tutorials are available which could ensure accessibility and support reproducibility.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>Built upon the widely used drug discovery library DeepPurpose, DeepProtein is publicly available at https:\/\/github.com\/jiaqingxie\/DeepProtein.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaf165","type":"journal-article","created":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T08:15:10Z","timestamp":1747383310000},"source":"Crossref","is-referenced-by-count":4,"title":["DeepProtein: deep learning library and benchmark for protein sequence learning"],"prefix":"10.1093","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7634-4457","authenticated-orcid":false,"given":"Jiaqing","family":"Xie","sequence":"first","affiliation":[{"name":"Computer Science Department, ETH Zurich , Z\u00fcrich 8006,","place":["Switzerland"]}]},{"given":"Yuqiang","family":"Li","sequence":"additional","affiliation":[{"name":"AI for Science, Shanghai Artificial Intelligence Laboratory, Shanghai 200030, The People\u2019s Republic of China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5574-2541","authenticated-orcid":false,"given":"Tianfan","family":"Fu","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology at Nanjing University, School of Computer Science, Nanjing University , Nanjing, Jiangsu 210023,","place":["China"]}]}],"member":"286","published-online":{"date-parts":[[2025,5,19]]},"reference":[{"key":"2025102511212740000_btaf165-B1","doi-asserted-by":"crossref","first-page":"3387","DOI":"10.1093\/bioinformatics\/btx431","article-title":"Deeploc: prediction of protein subcellular localization using deep learning","volume":"33","author":"Almagro Armenteros","year":"2017","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B2","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1093\/nar\/28.1.45","article-title":"The Swiss-Prot protein sequence database and its supplement trembl in 2000","volume":"28","author":"Bairoch","year":"2000","journal-title":"Nucleic Acids Res"},{"key":"2025102511212740000_btaf165-B43","first-page":"1","volume-title":"J Cheminform","author":"Bento"},{"key":"2025102511212740000_btaf165-B3","doi-asserted-by":"crossref","first-page":"2102","DOI":"10.1093\/bioinformatics\/btac020","article-title":"Proteinbert: a universal deep-learning model of protein sequence and function","volume":"38","author":"Brandes","year":"2022","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B4","author":"Chen","year":"2019"},{"key":"2025102511212740000_btaf165-B5","author":"Chen","year":"2020"},{"key":"2025102511212740000_btaf165-B6","doi-asserted-by":"crossref","first-page":"0126","DOI":"10.1038\/s41597-024-02909-w","article-title":"Uncertainty quantification and interpretability for clinical trial approval prediction","volume":"4","author":"Chen","year":"2024","journal-title":"Health Data Sci"},{"key":"2025102511212740000_btaf165-B7","author":"Chen","year":"2024"},{"key":"2025102511212740000_btaf165-B8","author":"Chen","year":"2024"},{"key":"2025102511212740000_btaf165-B9","first-page":"6802832","article-title":"Profold: protein fold classification with additional structural features and a novel ensemble classifier","volume":"2016","author":"Chen","year":"2016","journal-title":"BioMed Res Int"},{"key":"2025102511212740000_btaf165-B10","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1038\/s41598-020-79704-1","article-title":"Data-driven detection of subtype-specific differentially expressed genes","volume":"11","author":"Chen","year":"2021","journal-title":"Sci Rep"},{"key":"2025102511212740000_btaf165-B11","first-page":"1724","author":"Cho","year":"2014"},{"key":"2025102511212740000_btaf165-B12","doi-asserted-by":"crossref","first-page":"D204","DOI":"10.1093\/nar\/gku989","article-title":"Uniprot: a hub for protein information","volume":"43","author":"Consortium","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2025102511212740000_btaf165-B13","author":"Dallago","year":"2021"},{"key":"2025102511212740000_btaf165-B14","first-page":"4171","author":"Devlin","year":"2019"},{"key":"2025102511212740000_btaf165-B15","author":"Dimitra","year":"2020"},{"key":"2025102511212740000_btaf165-B16","first-page":"28265","volume-title":"Sci Rep","author":"Du","year":"2024"},{"key":"2025102511212740000_btaf165-B17","doi-asserted-by":"crossref","first-page":"D1140","DOI":"10.1093\/nar\/gkt1043","article-title":"SAbDab: the structural antibody database","volume":"42","author":"Dunbar","year":"2014","journal-title":"Nucleic Acids Res"},{"key":"2025102511212740000_btaf165-B18","first-page":"2224","article-title":"Convolutional networks on graphs for learning molecular fingerprints","volume":"28","author":"Duvenaud","year":"2015;","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025102511212740000_btaf165-B19","first-page":"7112","volume-title":"IEEE Trans Pattern Anal Mach","author":"Elnaggar","year":"2021"},{"key":"2025102511212740000_btaf165-B20","doi-asserted-by":"crossref","first-page":"Ddn3","DOI":"10.1093\/bioinformatics\/btae376","article-title":"Determining significant rewiring of biological network structure with differential dependency networks","volume":"40","author":"Fu","year":"2024","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B21","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1038\/s41592-019-0666-6","article-title":"Deciphering interaction fingerprints from protein molecular surfaces using geometric deep learning","volume":"17","author":"Gainza","year":"2020","journal-title":"Nat Methods"},{"key":"2025102511212740000_btaf165-B22","first-page":"21342","author":"Gao","year":"2022"},{"key":"2025102511212740000_btaf165-B23","doi-asserted-by":"crossref","first-page":"3168","DOI":"10.1038\/s41467-021-23303-9","article-title":"Structure-based protein function prediction using graph convolutional networks","volume":"12","author":"Gligorijevi\u0107","year":"2021","journal-title":"Nat Commun"},{"key":"2025102511212740000_btaf165-B24","doi-asserted-by":"crossref","first-page":"116","DOI":"10.1016\/j.cels.2017.11.003","article-title":"Quantitative missense variant effect prediction using large-scale mutagenesis data","volume":"6","author":"Gray","year":"2018","journal-title":"Cell Syst"},{"key":"2025102511212740000_btaf165-B25","doi-asserted-by":"crossref","first-page":"btad410","DOI":"10.1093\/bioinformatics\/btad410","article-title":"Hierarchical graph transformer with contrastive learning for protein function prediction","volume":"39","author":"Gu","year":"2023","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B26","doi-asserted-by":"crossref","first-page":"3025","DOI":"10.1093\/nar\/gkn159","article-title":"Using support vector machine combined with auto covariance to predict protein\u2013protein interactions from protein sequences","volume":"36","author":"Guo","year":"2008","journal-title":"Nucleic Acids Res"},{"key":"2025102511212740000_btaf165-B27","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1093\/nar\/28.1.235","article-title":"The protein data bank","volume":"28","author":"Helen","year":"2000","journal-title":"Nucleic Acids Res"},{"key":"2025102511212740000_btaf165-B28","first-page":"473","article-title":"LSTM can solve hard long time lag problems","volume":"9","author":"Hochreiter","year":"1996","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025102511212740000_btaf165-B29","doi-asserted-by":"crossref","first-page":"1295","DOI":"10.1093\/bioinformatics\/btx780","article-title":"Deepsf: deep convolutional neural network for mapping protein sequences to folds","volume":"34","author":"Hou","year":"2018","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B30","doi-asserted-by":"crossref","first-page":"1033","DOI":"10.1038\/s41589-022-01131-2","article-title":"Artificial intelligence foundation for therapeutic science","volume":"18","author":"Huang","year":"2022","journal-title":"Nat Chem Biol"},{"key":"2025102511212740000_btaf165-B31","author":"Huang","year":"2021"},{"key":"2025102511212740000_btaf165-B32","doi-asserted-by":"crossref","first-page":"5545","DOI":"10.1093\/bioinformatics\/btaa1005","article-title":"Deeppurpose: a deep learning library for drug\u2013target interaction prediction","volume":"36","author":"Huang","year":"2021","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B33","doi-asserted-by":"crossref","first-page":"2600","DOI":"10.1093\/bioinformatics\/bts489","article-title":"Skempi: a structural kinetic and energetic database of mutant protein interactions and its use in empirical models","volume":"28","author":"Iain","year":"2012","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B34","first-page":"1263","author":"Gilmer","year":"2017"},{"key":"2025102511212740000_btaf165-B35","doi-asserted-by":"crossref","first-page":"W24","DOI":"10.1093\/nar\/gkx346","article-title":"Bepipred-2.0: improving sequence-based B-cell epitope prediction using conformational epitopes","volume":"45","author":"Jespersen","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"2025102511212740000_btaf165-B36","author":"Jing","year":"2021"},{"key":"2025102511212740000_btaf165-B37","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","article-title":"Highly accurate protein structure prediction with alphafold","volume":"596","author":"Jumper","year":"2021","journal-title":"Nature"},{"key":"2025102511212740000_btaf165-B38","doi-asserted-by":"crossref","first-page":"2605","DOI":"10.1093\/bioinformatics\/bty166","article-title":"Deepsol: a deep learning framework for sequence-based protein solubility prediction","volume":"34","author":"Khurana","year":"2018","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B39","author":"Kingma"},{"key":"2025102511212740000_btaf165-B40","author":"Kipf","year":"2016"},{"key":"2025102511212740000_btaf165-B41","doi-asserted-by":"crossref","first-page":"520","DOI":"10.1002\/prot.25674","article-title":"Netsurfp-2.0: improved prediction of protein structural features by integrated deep learning","volume":"87","author":"Klausen","year":"2019","journal-title":"Proteins Struct Funct Bioinf"},{"key":"2025102511212740000_btaf165-B42","author":"Labrak"},{"key":"2025102511212740000_btaf165-B44","doi-asserted-by":"crossref","first-page":"1034","DOI":"10.1038\/s41587-019-0203-2","article-title":"Large dataset enables prediction of repair after crispr\u2013cas9 editing in primary t cells","volume":"37","author":"Leenay","year":"2019","journal-title":"Nat Biotechnol"},{"key":"2025102511212740000_btaf165-B45","doi-asserted-by":"crossref","first-page":"2944","DOI":"10.1093\/bioinformatics\/bty305","article-title":"Parapred: antibody paratope prediction using convolutional and recurrent neural networks","volume":"34","author":"Liberis","year":"2018","journal-title":"Bioinformatics"},{"key":"2025102511212740000_btaf165-B46","doi-asserted-by":"crossref","first-page":"1123","DOI":"10.1126\/science.ade2574","article-title":"Evolutionary-scale prediction of atomic-level protein structure with a language model","volume":"379","author":"Lin","year":"2023","journal-title":"Science"},{"key":"2025102511212740000_btaf165-B47","doi-asserted-by":"crossref","first-page":"13664","DOI":"10.1039\/D1SC04444C","article-title":"Structure-based de novo drug design using 3d deep generative models","volume":"12","author":"Li","year":"2021","journal-title":"Chem Sci"},{"key":"2025102511212740000_btaf165-B48","first-page":"27233","volume-title":"ACS omega","author":"Li","year":"2021"},{"key":"2025102511212740000_btaf165-B49","first-page":"534","author":"Lu","year":"2022"},{"key":"2025102511212740000_btaf165-B50","author":"Lu","year":"2018"},{"key":"2025102511212740000_btaf165-B51","author":"Lu","year":"2024"},{"key":"2025102511212740000_btaf165-B52","doi-asserted-by":"crossref","first-page":"4992","DOI":"10.1021\/pr100618t","article-title":"Large-scale prediction of human protein\u2013protein interactions from amino acid sequence based on latent topic features","volume":"9","author":"Pan","year":"2010","journal-title":"J Proteome Res"},{"key":"2025102511212740000_btaf165-B53","first-page":"2825","article-title":"Scikit-learn: machine learning in python","volume":"12","author":"Pedregosa","year":"2011","journal-title":"J Mach Learn Res"},{"key":"2025102511212740000_btaf165-B54","author":"Pei"},{"key":"2025102511212740000_btaf165-B55","author":"Pei"},{"key":"2025102511212740000_btaf165-B56","doi-asserted-by":"crossref","first-page":"387","DOI":"10.1002\/path.2440","article-title":"The human protein atlas\u2014a tool for pathology","volume":"216","author":"Pont\u00e9n","year":"2008","journal-title":"J Pathol"},{"key":"2025102511212740000_btaf165-B57","first-page":"9689","article-title":"Evaluating protein transfer learning with tape","volume":"32","author":"Rao","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025102511212740000_btaf165-B58","doi-asserted-by":"crossref","first-page":"4025","DOI":"10.1073\/pnas.1810576116","article-title":"Five computational developability guidelines for therapeutic antibody profiling","volume":"116","author":"Raybould","year":"2019","journal-title":"Proc Natl Acad Sci USA"},{"key":"2025102511212740000_btaf165-B59","doi-asserted-by":"crossref","DOI":"10.1073\/pnas.2016239118","article-title":"Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences","volume":"118","author":"Rives","year":"2021","journal-title":"Proc Natl Acad Sci USA"},{"key":"2025102511212740000_btaf165-B60","doi-asserted-by":"crossref","first-page":"168","DOI":"10.1126\/science.aan0693","article-title":"Global analysis of protein folding using massively parallel design, synthesis, and testing","volume":"357","author":"Rocklin","year":"2017","journal-title":"Science"},{"key":"2025102511212740000_btaf165-B61","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1038\/nature17995","article-title":"Local fitness landscape of the green fluorescent protein","volume":"533","author":"Sarkisyan","year":"2016","journal-title":"Nature"},{"key":"2025102511212740000_btaf165-B62","author":"Sevgen","year":"2023"},{"key":"2025102511212740000_btaf165-B63","author":"Shanehsazzadeh","year":"2020"},{"key":"2025102511212740000_btaf165-B64","first-page":"6000","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025102511212740000_btaf165-B65","author":"Velickovic"},{"key":"2025102511212740000_btaf165-B66","doi-asserted-by":"crossref","first-page":"D339","DOI":"10.1093\/nar\/gky1006","article-title":"The immune epitope database (IEDB): 2018 update","volume":"47","author":"Vita","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2025102511212740000_btaf165-B67","author":"Wang"},{"key":"2025102511212740000_btaf165-B68","doi-asserted-by":"crossref","DOI":"10.2337\/db24-1596-P","article-title":"1596-p: personalized glycemic response to carbohydrates and associated physiological signatures in multiomics","volume":"73","author":"Wu","year":"2024","journal-title":"Diabetes"},{"key":"2025102511212740000_btaf165-B69","doi-asserted-by":"crossref","first-page":"vbac037","DOI":"10.1093\/bioadv\/vbac037","article-title":"Cot: an efficient and accurate method for detecting marker genes among many subtypes","volume":"2","author":"Wu","year":"2022","journal-title":"Bioinform Adv"},{"key":"2025102511212740000_btaf165-B70","doi-asserted-by":"crossref","first-page":"vbac076","DOI":"10.1093\/bioadv\/vbac076","article-title":"Cosbin: cosine score-based iterative normalization of biologically diverse samples","volume":"2","author":"Wu","year":"2022","journal-title":"Bioinform Adv"},{"key":"2025102511212740000_btaf165-B71","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1093\/bib\/bbac603","article-title":"Leveraging scaffold information to predict protein\u2013ligand binding affinity with an empirical graph neural network","volume":"24","author":"Xia","year":"2023","journal-title":"Brief Bioinform"},{"key":"2025102511212740000_btaf165-B72","doi-asserted-by":"crossref","first-page":"8749","DOI":"10.1021\/acs.jmedchem.9b00959","article-title":"Pushing the boundaries of molecular representation for drug discovery with the graph attention mechanism","volume":"63","author":"Xiong","year":"2020","journal-title":"J Med Chem"},{"key":"2025102511212740000_btaf165-B73","author":"Xu"},{"key":"2025102511212740000_btaf165-B74","first-page":"35156","article-title":"Peer: a comprehensive and multi-task benchmark for protein sequence understanding","volume":"35","author":"Xu","year":"2022","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025102511212740000_btaf165-B75","first-page":"438","author":"Yi","year":"2018"},{"key":"2025102511212740000_btaf165-B76","first-page":"28877","article-title":"Do transformers really perform badly for graph representation?","volume":"34","author":"Ying","year":"2021","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025102511212740000_btaf165-B77","author":"Yu","year":"2024"},{"key":"2025102511212740000_btaf165-B78","doi-asserted-by":"crossref","first-page":"bbab564","DOI":"10.1093\/bib\/bbab564","article-title":"Alphafold2-aware protein\u2013DNA binding site prediction using graph transformer","volume":"23","author":"Yuan","year":"2022","journal-title":"Brief Bioinform"},{"key":"2025102511212740000_btaf165-B79","author":"Yue","year":"2024"},{"key":"2025102511212740000_btaf165-B80","first-page":"11983","article-title":"Graph transformer networks","volume":"32","author":"Yun","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"2025102511212740000_btaf165-B81","author":"Zhang","year":"2021"},{"key":"2025102511212740000_btaf165-B82","author":"Zhang","year":"2024"},{"key":"2025102511212740000_btaf165-B83","author":"Zhang"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaf165\/63235927\/btaf165.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/10\/btaf165\/63235927\/btaf165.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/10\/btaf165\/63235927\/btaf165.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T15:21:39Z","timestamp":1761405699000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btaf165\/8137840"}},"subtitle":[],"editor":[{"given":"Zhiyong","family":"Lu","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":83,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2025,10,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaf165","relation":{},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2025,10]]},"published":{"date-parts":[[2025,5,19]]},"article-number":"btaf165"}}