{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T11:58:44Z","timestamp":1779191924353,"version":"3.51.4"},"reference-count":58,"publisher":"Oxford University Press (OUP)","issue":"6","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/journals\/pages\/open_access\/funder_policies\/chorus\/standard_publication_model"}],"funder":[{"name":"Guangzhou S&T Research Plan","award":["202007030010"],"award-info":[{"award-number":["202007030010"]}]},{"name":"Introducing Innovative and Entrepreneurial Teams","award":["2016ZT06D211"],"award-info":[{"award-number":["2016ZT06D211"]}]},{"name":"Guangdong Key Field R&D Plan","award":["2018B010109006"],"award-info":[{"award-number":["2018B010109006"]}]},{"name":"Guangdong Key Field R&D Plan","award":["2019B020228001"],"award-info":[{"award-number":["2019B020228001"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62041209"],"award-info":[{"award-number":["62041209"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61772566"],"award-info":[{"award-number":["61772566"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020YFB0204803"],"award-info":[{"award-number":["2020YFB0204803"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,11,19]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>More than one-third of the proteins contain metal ions in the Protein Data Bank. Correct identification of metal ion-binding residues is important for understanding protein functions and designing novel drugs. Due to the small size and high versatility of metal ions, it remains challenging to computationally predict their binding sites from protein sequence. Existing sequence-based methods are of low accuracy due to the lack of structural information, and time-consuming owing to the usage of multi-sequence alignment. Here, we propose LMetalSite, an alignment-free sequence-based predictor for binding sites of the four most frequently seen metal ions in BioLiP (Zn2+, Ca2+, Mg2+ and Mn2+). LMetalSite leverages the pretrained language model to rapidly generate informative sequence representations and employs transformer to capture long-range dependencies. Multi-task learning is adopted to compensate for the scarcity of training data and capture the intrinsic similarities between different metal ions. LMetalSite was shown to surpass state-of-the-art structure-based methods by more than 19.7, 14.4, 36.8 and 12.6% in area under the precision recall on the four independent tests, respectively. Further analyses indicated that the self-attention modules are effective to learn the structural contexts of residues from protein sequence. We provide the data sets, source codes and trained models of LMetalSite at https:\/\/github.com\/biomed-AI\/LMetalSite.<\/jats:p>","DOI":"10.1093\/bib\/bbac444","type":"journal-article","created":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T11:09:12Z","timestamp":1663672152000},"source":"Crossref","is-referenced-by-count":61,"title":["Alignment-free metal ion-binding site prediction from protein sequence through pretrained language model and multi-task learning"],"prefix":"10.1093","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6098-9103","authenticated-orcid":false,"given":"Qianmu","family":"Yuan","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering at Sun Yat-sen University , Guangzhou 510000, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sheng","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering at Sun Yat-sen University , Guangzhou 510000, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Peng Cheng National Laboratory at Shenzhen , Guangzhou 510000, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huiying","family":"Zhao","sequence":"additional","affiliation":[{"name":"Sun Yat-sen Memorial Hospital at Sun Yat-sen University , Guangzhou 510000, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuedong","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Sun Yat-sen University, Guangzhou 510000, China, and Key Laboratory of Machine Intelligence and Advanced Computing of MOE, Sun Yat-sen University , Guangzhou 510000, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2022,10,23]]},"reference":[{"key":"2022112111202353900_ref1","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1093\/nar\/28.1.235","article-title":"The Protein Data Bank","volume":"28","author":"Berman","year":"2000","journal-title":"Nucleic Acids Res"},{"key":"2022112111202353900_ref2","doi-asserted-by":"crossref","first-page":"D459","DOI":"10.1093\/nar\/gkx989","article-title":"MetalPDB in 2018: a database of metal sites in biological macromolecular structures","volume":"46","author":"Putignano","year":"2018","journal-title":"Nucleic Acids Res"},{"key":"2022112111202353900_ref3","doi-asserted-by":"crossref","first-page":"582","DOI":"10.1016\/0958-1669(91)90084-I","article-title":"Metal-binding sites in proteins","volume":"2","author":"Tainer","year":"1991","journal-title":"Curr Opin Biotechnol"},{"key":"2022112111202353900_ref4","doi-asserted-by":"crossref","first-page":"1471","DOI":"10.1021\/ar900015x","article-title":"Metalloproteomes: a bioinformatic approach","volume":"42","author":"Andreini","year":"2009","journal-title":"Acc Chem Res"},{"key":"2022112111202353900_ref5","doi-asserted-by":"crossref","first-page":"1205","DOI":"10.1007\/s00775-008-0404-5","article-title":"Metal ions in biological catalysis: from enzyme databases to general principles","volume":"13","author":"Andreini","year":"2008","journal-title":"J Biol Inorg Chem"},{"key":"2022112111202353900_ref6","doi-asserted-by":"crossref","first-page":"405","DOI":"10.1146\/annurev.bb.19.060190.002201","article-title":"Zinc finger domains: hypotheses and current knowledge","volume":"19","author":"Berg","year":"1990","journal-title":"Annu Rev Biophys Biophys Chem"},{"key":"2022112111202353900_ref7","doi-asserted-by":"crossref","first-page":"2588","DOI":"10.1093\/bioinformatics\/btt447","article-title":"Protein\u2013ligand binding site recognition using complementary binding-specific substructure comparison and sequence profile alignment","volume":"29","author":"Yang","year":"2013","journal-title":"Bioinformatics"},{"key":"2022112111202353900_ref8","doi-asserted-by":"crossref","first-page":"11014","DOI":"10.1021\/bi0508136","article-title":"Metal binding sites in proteins: identification and characterization by paramagnetic NMR relaxation","volume":"44","author":"Jensen","year":"2005","journal-title":"Biochemistry"},{"key":"2022112111202353900_ref9","first-page":"231","article-title":"Mn2+ as a probe of divalent metal ion binding and function in enzymes and other proteins","volume":"37","author":"Reed","year":"2000","journal-title":"Met Ions Biol Syst"},{"key":"2022112111202353900_ref10","doi-asserted-by":"crossref","first-page":"2287","DOI":"10.1021\/acs.jcim.6b00407","article-title":"MIB: metal ion-binding site prediction and docking server","volume":"56","author":"Lin","year":"2016","journal-title":"J Chem Inf Model"},{"key":"2022112111202353900_ref11","doi-asserted-by":"crossref","first-page":"3018","DOI":"10.1093\/bioinformatics\/btaa110","article-title":"Protein\u2013ligand binding residue prediction enhancement through hybrid deep heterogeneous learning of sequence and structure data","volume":"36","author":"Xia","year":"2020","journal-title":"Bioinformatics"},{"key":"2022112111202353900_ref12","doi-asserted-by":"crossref","first-page":"e51","DOI":"10.1093\/nar\/gkab044","article-title":"GraphBind: protein structural context embedded rules learned by hierarchical graph neural networks for recognizing nucleic-acid-binding residues","volume":"49","author":"Xia","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2022112111202353900_ref13","doi-asserted-by":"crossref","first-page":"3260","DOI":"10.1093\/bioinformatics\/btw396","article-title":"Recognizing metal and acid radical ion-binding sites by integrating ab initio modeling with template-based transferals","volume":"32","author":"Hu","year":"2016","journal-title":"Bioinformatics"},{"key":"2022112111202353900_ref14","doi-asserted-by":"crossref","first-page":"7606","DOI":"10.1093\/nar\/gkt544","article-title":"Novel approach for selecting the best predictor for identifying the binding sites in DNA binding proteins","volume":"41","author":"Nagarajan","year":"2013","journal-title":"Nucleic Acids Res"},{"key":"2022112111202353900_ref15","doi-asserted-by":"crossref","first-page":"994","DOI":"10.1109\/TCBB.2013.104","article-title":"Designing template-free predictor for targeting protein-ligand binding sites with classifier ensemble and spatial clustering","volume":"10","author":"Yu","year":"2013","journal-title":"IEEE\/ACM Trans Comput Biol Bioinform"},{"key":"2022112111202353900_ref16","doi-asserted-by":"crossref","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","article-title":"Gapped BLAST and PSI-BLAST: a new generation of protein database search programs","volume":"25","author":"Altschul","year":"1997","journal-title":"Nucleic Acids Res"},{"key":"2022112111202353900_ref17","doi-asserted-by":"crossref","DOI":"10.1073\/pnas.2016239118","article-title":"Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences","volume":"118","author":"Rives","year":"2021","journal-title":"Proc Natl Acad Sci"},{"key":"2022112111202353900_ref18","doi-asserted-by":"crossref","first-page":"7112","DOI":"10.1109\/TPAMI.2021.3095381","article-title":"ProtTrans: towards cracking the language of lifes code through self-supervised deep learning and high performance computing","volume":"44","author":"Elnaggar","year":"2021","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2022112111202353900_ref19","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1038\/s42256-022-00457-9","article-title":"Learning functional properties of proteins with language models","volume":"4","author":"Unsal","year":"2022","journal-title":"Nat Mach Intell"},{"key":"2022112111202353900_ref20","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1093\/nsr\/nwx105","article-title":"An overview of multi-task learning","volume":"5","author":"Zhang","year":"2018","journal-title":"Natl Sci Rev"},{"key":"2022112111202353900_ref21","first-page":"1","article-title":"DeepDist: real-value inter-residue distance prediction with deep residual convolutional network","volume":"22","author":"Wu","year":"2021","journal-title":"BMC Bioinform"},{"key":"2022112111202353900_ref22","doi-asserted-by":"crossref","DOI":"10.1016\/j.jbi.2020.103376","article-title":"Compositional framework for multitask learning in the identification of cleavage sites of HIV-1 protease","volume":"102","author":"Singh","year":"2020","journal-title":"J Biomed Inform"},{"key":"2022112111202353900_ref23","volume-title":"To improve the predictions of binding residues with DNA, RNA, carbohydrate, and peptide via multi-task deep neural networks, IEEE\/ACM Trans Comput Biol Bioinform","author":"Sun","year":"2021"},{"key":"2022112111202353900_ref24","article-title":"DeepDISOBind: accurate prediction of RNA-, DNA-and protein-binding intrinsically disordered residues with deep multi-task learning","volume":"23","author":"Zhang","year":"2022","journal-title":"Brief Bioinform"},{"key":"2022112111202353900_ref25","doi-asserted-by":"crossref","first-page":"D1096","DOI":"10.1093\/nar\/gks966","article-title":"BioLiP: a semi-manually curated database for biologically relevant ligand\u2013protein interactions","volume":"41","author":"Yang","year":"2012","journal-title":"Nucleic Acids Res"},{"key":"2022112111202353900_ref26","first-page":"5998","volume-title":"In: Advances in Neural Information Processing Systems","author":"Vaswani","year":"2017"},{"key":"2022112111202353900_ref27","doi-asserted-by":"crossref","first-page":"47","DOI":"10.1021\/acs.jcim.9b00949","article-title":"Predicting retrosynthetic reactions using self-corrected transformer neural networks","volume":"60","author":"Zheng","year":"2019","journal-title":"J Chem Inf Model"},{"key":"2022112111202353900_ref28","doi-asserted-by":"crossref","first-page":"3150","DOI":"10.1093\/bioinformatics\/bts565","article-title":"CD-HIT: accelerated for clustering the next-generation sequencing data","volume":"28","author":"Fu","year":"2012","journal-title":"Bioinformatics"},{"key":"2022112111202353900_ref29","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J Mach Learn Res"},{"key":"2022112111202353900_ref30","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1038\/s41592-019-0437-4","article-title":"Protein-level assembly increases protein sequence recovery from metagenomic samples manyfold","volume":"16","author":"Steinegger","year":"2019","journal-title":"Nat Methods"},{"key":"2022112111202353900_ref31","doi-asserted-by":"crossref","first-page":"1282","DOI":"10.1093\/bioinformatics\/btm098","article-title":"UniRef: comprehensive and non-redundant UniProt reference clusters","volume":"23","author":"Suzek","year":"2007","journal-title":"Bioinformatics"},{"key":"2022112111202353900_ref32","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Kenton","year":"2019","journal-title":"In: Proceedings of NAACL-HLT"},{"key":"2022112111202353900_ref33","doi-asserted-by":"crossref","first-page":"173","DOI":"10.1038\/nmeth.1818","article-title":"HHblits: lightning-fast iterative protein sequence searching by HMM-HMM alignment","volume":"9","author":"Remmert","year":"2012","journal-title":"Nat Methods"},{"key":"2022112111202353900_ref34","doi-asserted-by":"crossref","first-page":"D170","DOI":"10.1093\/nar\/gkw1081","article-title":"Uniclust databases of clustered and deeply annotated protein sequences and alignments","volume":"45","author":"Mirdita","year":"2017","journal-title":"Nucleic Acids Res"},{"key":"2022112111202353900_ref35","doi-asserted-by":"crossref","first-page":"2577","DOI":"10.1002\/bip.360221211","article-title":"Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features","volume":"22","author":"Kabsch","year":"1983","journal-title":"Biopolymers: Original Research on Biomolecules"},{"key":"2022112111202353900_ref36","first-page":"770","volume-title":"In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"He","year":"2016"},{"key":"2022112111202353900_ref37","first-page":"21","article-title":"Layer normalization","volume":"1050","author":"Ba","year":"2016","journal-title":"Stat"},{"key":"2022112111202353900_ref38","first-page":"15820","article-title":"Generative models for graph-based protein design","volume":"32","author":"Ingraham","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"2022112111202353900_ref39","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40537-019-0197-0","article-title":"A survey on image data augmentation for deep learning","volume":"6","author":"Shorten","year":"2019","journal-title":"Journal of big data"},{"key":"2022112111202353900_ref40","volume-title":"In: 3rd International Conference on Learning Representations (Poster)","author":"Kingma","year":"2015"},{"key":"2022112111202353900_ref41","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"Paszke","year":"2019","journal-title":"Adv Neural Inf Process Syst"},{"key":"2022112111202353900_ref42","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1093\/bioinformatics\/btab643","article-title":"Structure-aware protein\u2013protein interaction site prediction using deep graph convolutional network","volume":"38","author":"Yuan","year":"2022","journal-title":"Bioinformatics"},{"key":"2022112111202353900_ref43","doi-asserted-by":"crossref","DOI":"10.1093\/bib\/bbab564","article-title":"AlphaFold2-aware protein\u2013DNA binding site prediction using graph transformer","volume":"23","author":"Yuan","year":"2022","journal-title":"Brief Bioinform"},{"key":"2022112111202353900_ref44","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0118432","article-title":"The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets","volume":"10","author":"Saito","year":"2015","journal-title":"PLoS One"},{"key":"2022112111202353900_ref45","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1016\/j.ab.2018.11.009","article-title":"MIonSite: ligand-specific prediction of metal ion-binding sites via enhanced AdaBoost algorithm with protein sequence information","volume":"566","author":"Qiao","year":"2019","journal-title":"Anal Biochem"},{"key":"2022112111202353900_ref46","doi-asserted-by":"crossref","DOI":"10.1093\/bib\/bbac178","article-title":"GASS-Metal: identifying metal-binding sites on protein structures using genetic algorithms","volume":"23","author":"Paiva","year":"2022","journal-title":"Brief Bioinform"},{"key":"2022112111202353900_ref47","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1214\/aoms\/1177730491","article-title":"On a test of whether one of two random variables is stochastically larger than the other","volume":"18","author":"Mann","year":"1947","journal-title":"Ann Math Stat"},{"key":"2022112111202353900_ref48","first-page":"613","article-title":"Tests for departure from normality. Empirical results for the distributions of b 2 and\u221a b","volume":"60","author":"D'agostino","year":"1973","journal-title":"Biometrika"},{"key":"2022112111202353900_ref49","first-page":"1126","volume-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"Finn","year":"2017"},{"key":"2022112111202353900_ref50","doi-asserted-by":"crossref","first-page":"1627","DOI":"10.1021\/acs.jcim.0c01416","article-title":"Meta learning for low-resource molecular optimization","volume":"61","author":"Wang","year":"2021","journal-title":"J Chem Inf Model"},{"key":"2022112111202353900_ref51","first-page":"1","article-title":"Highly accurate protein structure prediction with AlphaFold","author":"Jumper","year":"2021","journal-title":"Nature"},{"key":"2022112111202353900_ref52","doi-asserted-by":"crossref","first-page":"871","DOI":"10.1126\/science.abj8754","article-title":"Accurate prediction of protein structures and interactions using a three-track neural network","volume":"373","author":"Baek","year":"2021","journal-title":"Science"},{"key":"2022112111202353900_ref53","doi-asserted-by":"crossref","first-page":"4498","DOI":"10.1172\/JCI91553","article-title":"JAK2-binding long noncoding RNA promotes breast cancer brain metastasis","volume":"127","author":"Wang","year":"2017","journal-title":"J Clin Invest"},{"key":"2022112111202353900_ref54","doi-asserted-by":"crossref","first-page":"302","DOI":"10.1016\/j.ajhg.2015.05.021","article-title":"THOC2 mutations implicate mRNA-export pathway in X-linked intellectual disability","volume":"97","author":"Kumar","year":"2015","journal-title":"Am J Hum Genet"},{"key":"2022112111202353900_ref55","doi-asserted-by":"crossref","first-page":"5858","DOI":"10.1021\/jm100574m","article-title":"Understanding and predicting druggability. A high-throughput method for detection of drug binding sites","volume":"53","author":"Schmidtke","year":"2010","journal-title":"J Med Chem"},{"key":"2022112111202353900_ref56","doi-asserted-by":"crossref","first-page":"3240","DOI":"10.1021\/acs.jcim.0c01494","article-title":"De novo molecule design through the molecular generative model conditioned by 3D information of protein binding sites","volume":"61","author":"Xu","year":"2021","journal-title":"J Chem Inf Model"},{"key":"2022112111202353900_ref57","doi-asserted-by":"crossref","first-page":"134","DOI":"10.1038\/s42256-020-0152-y","article-title":"Predicting drug\u2013protein interaction using quasi-visual question answering system","volume":"2","author":"Zheng","year":"2020","journal-title":"Nat Mach Intell"},{"key":"2022112111202353900_ref58","doi-asserted-by":"crossref","first-page":"1308","DOI":"10.1021\/acs.jcim.2c00060","article-title":"Structure-aware multimodal deep learning for drug\u2013protein interaction prediction","volume":"62","author":"Wang","year":"2022","journal-title":"J Chem Inf Model"}],"container-title":["Briefings in Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/23\/6\/bbac444\/47144554\/bbac444.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bib\/article-pdf\/23\/6\/bbac444\/47144554\/bbac444.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T02:27:37Z","timestamp":1701052057000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bib\/article\/doi\/10.1093\/bib\/bbac444\/6770088"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":58,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2022,11,19]]}},"URL":"https:\/\/doi.org\/10.1093\/bib\/bbac444","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2022.05.20.492769","asserted-by":"object"}]},"ISSN":["1467-5463","1477-4054"],"issn-type":[{"value":"1467-5463","type":"print"},{"value":"1477-4054","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2022,11]]},"published":{"date-parts":[[2022,10,23]]},"article-number":"bbac444"}}