{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T00:24:21Z","timestamp":1767831861913,"version":"3.49.0"},"reference-count":23,"publisher":"Oxford University Press (OUP)","issue":"7","license":[{"start":{"date-parts":[[2023,6,29]],"date-time":"2023-06-29T00:00:00Z","timestamp":1687996800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000925","name":"National Health and Medical Research Council","doi-asserted-by":"publisher","award":["GNT1174405"],"award-info":[{"award-number":["GNT1174405"]}],"id":[{"id":"10.13039\/501100000925","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Victorian Government\u2019s Operational Infrastructure Support Program"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,7,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>With the development of sequencing techniques, the discovery of new proteins significantly exceeds the human capacity and resources for experimentally characterizing protein functions. Localization, EC numbers, and GO terms with the structure-based Cutoff Scanning Matrix (LEGO-CSM) is a comprehensive web-based resource that fills this gap by leveraging the well-established and robust graph-based signatures to supervised learning models using both protein sequence and structure information to accurately model protein function in terms of Subcellular Localization, Enzyme Commission (EC) numbers, and Gene Ontology (GO) terms.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We show our models perform as well as or better than alternative approaches, achieving area under the receiver operating characteristic curve of up to 0.93 for subcellular localization, up to 0.93 for EC, and up to 0.81 for GO terms on independent blind tests.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>LEGO-CSM\u2019s web server is freely available at https:\/\/biosig.lab.uq.edu.au\/lego_csm. In addition, all datasets used to train and test LEGO-CSM\u2019s models can be downloaded at https:\/\/biosig.lab.uq.edu.au\/lego_csm\/data.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad402","type":"journal-article","created":{"date-parts":[[2023,6,29]],"date-time":"2023-06-29T14:28:19Z","timestamp":1688048899000},"source":"Crossref","is-referenced-by-count":2,"title":["LEGO-CSM: a tool for functional characterization of proteins"],"prefix":"10.1093","volume":"39","author":[{"given":"Thanh Binh","family":"Nguyen","sequence":"first","affiliation":[{"name":"School of Chemistry and Molecular Biosciences, University of Queensland , Brisbane City, QLD 4072, Australia"},{"name":"Systems and Computational Biology, Bio21 Institute, University of Melbourne , Parkville, VIC 3052, Australia"},{"name":"Computational Biology and Clinical Informatics, Baker Heart and Diabetes Institute , Melbourne, VIC 3004, Australia"}]},{"given":"Alex G C","family":"de S\u00e1","sequence":"additional","affiliation":[{"name":"School of Chemistry and Molecular Biosciences, University of Queensland , Brisbane City, QLD 4072, Australia"},{"name":"Systems and Computational Biology, Bio21 Institute, University of Melbourne , Parkville, VIC 3052, Australia"},{"name":"Computational Biology and Clinical Informatics, Baker Heart and Diabetes Institute , Melbourne, VIC 3004, Australia"},{"name":"Baker Department of Cardiometabolic Health, University of Melbourne , Parkville, VIC 3010, Australia"}]},{"given":"Carlos H M","family":"Rodrigues","sequence":"additional","affiliation":[{"name":"School of Chemistry and Molecular Biosciences, University of Queensland , Brisbane City, QLD 4072, Australia"},{"name":"Systems and Computational Biology, Bio21 Institute, University of Melbourne , Parkville, VIC 3052, Australia"},{"name":"Computational Biology and Clinical Informatics, Baker Heart and Diabetes Institute , Melbourne, VIC 3004, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3004-2119","authenticated-orcid":false,"given":"Douglas E V","family":"Pires","sequence":"additional","affiliation":[{"name":"Systems and Computational Biology, Bio21 Institute, University of Melbourne , Parkville, VIC 3052, Australia"},{"name":"Computational Biology and Clinical Informatics, Baker Heart and Diabetes Institute , Melbourne, VIC 3004, Australia"},{"name":"School of Computing and Information Systems, University of Melbourne , Parkville, VIC 3052, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2948-2413","authenticated-orcid":false,"given":"David B","family":"Ascher","sequence":"additional","affiliation":[{"name":"School of Chemistry and Molecular Biosciences, University of Queensland , Brisbane City, QLD 4072, Australia"},{"name":"Systems and Computational Biology, Bio21 Institute, University of Melbourne , Parkville, VIC 3052, Australia"},{"name":"Computational Biology and Clinical Informatics, Baker Heart and Diabetes Institute , Melbourne, VIC 3004, Australia"},{"name":"Baker Department of Cardiometabolic Health, University of Melbourne , Parkville, VIC 3010, Australia"},{"name":"School of Computing and Information Systems, University of Melbourne , Parkville, VIC 3052, Australia"}]}],"member":"286","published-online":{"date-parts":[[2023,6,29]]},"reference":[{"key":"2023070819154481000_btad402-B1","author":"Akdel","year":"2021"},{"key":"2023070819154481000_btad402-B2","doi-asserted-by":"crossref","first-page":"3387","DOI":"10.1093\/bioinformatics\/btx431","article-title":"DeepLoc: prediction of protein subcellular localization using deep learning","volume":"33","author":"Almagro Armenteros","year":"2017","journal-title":"Bioinformatics"},{"key":"2023070819154481000_btad402-B3","doi-asserted-by":"crossref","first-page":"871","DOI":"10.1126\/science.abj8754","article-title":"Accurate prediction of protein structures and interactions using a three-track neural network","volume":"373","author":"Baek","year":"2021","journal-title":"Science"},{"key":"2023070819154481000_btad402-B4","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1038\/nrg2918","article-title":"Network medicine: a network-based approach to human disease","volume":"12","author":"Barabasi","year":"2011","journal-title":"Nat Rev Genet"},{"key":"2023070819154481000_btad402-B5","doi-asserted-by":"crossref","DOI":"10.3390\/ijms20112845","article-title":"Prediction of enzyme function based on three parallel deep CNN and amino acid mutation","volume":"20","author":"Gao","year":"2019","journal-title":"Int J Mol Sci"},{"key":"2023070819154481000_btad402-B6","doi-asserted-by":"crossref","first-page":"3168","DOI":"10.1038\/s41467-021-23303-9","article-title":"Structure-based protein function prediction using graph convolutional networks","volume":"12","author":"Gligorijevic","year":"2021","journal-title":"Nat Commun"},{"key":"2023070819154481000_btad402-B7","doi-asserted-by":"crossref","first-page":"367","DOI":"10.1016\/j.compbiolchem.2004.09.006","article-title":"Comparing two K-category assignments by a K-category correlation coefficient","volume":"28","author":"Gorodkin","year":"2004","journal-title":"Comput Biol Chem"},{"key":"2023070819154481000_btad402-B8","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1186\/s13059-016-1037-6","article-title":"An expanded evaluation of protein function prediction methods shows an improvement in accuracy","volume":"17","author":"Jiang","year":"2016","journal-title":"Genome Biol"},{"key":"2023070819154481000_btad402-B9","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","article-title":"Highly accurate protein structure prediction with AlphaFold","volume":"596","author":"Jumper","year":"2021","journal-title":"Nature"},{"key":"2023070819154481000_btad402-B10","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1016\/j.ymeth.2014.11.017","article-title":"Prediction of drug gene associations via ontological profile similarity with application to drug repositioning","volume":"74","author":"Kissa","year":"2015","journal-title":"Methods"},{"key":"2023070819154481000_btad402-B11","doi-asserted-by":"crossref","first-page":"422","DOI":"10.1093\/bioinformatics\/btz595","article-title":"DeepGOPlus: improved protein function prediction from sequence","volume":"36","author":"Kulmanov","year":"2020","journal-title":"Bioinformatics"},{"key":"2023070819154481000_btad402-B12","doi-asserted-by":"crossref","first-page":"1551","DOI":"10.1038\/nprot.2013.092","article-title":"Large-scale gene function analysis with the PANTHER classification system","volume":"8","author":"Mi","year":"2013","journal-title":"Nat Protoc"},{"key":"2023070819154481000_btad402-B13","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1093\/bioinformatics\/btt691","article-title":"mCSM: predicting the effects of mutations in proteins using graph-based signatures","volume":"30","author":"Pires","year":"2014","journal-title":"Bioinformatics"},{"key":"2023070819154481000_btad402-B14","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1038\/nmeth.2340","article-title":"A large-scale evaluation of computational protein function prediction","volume":"10","author":"Radivojac","year":"2013","journal-title":"Nat Methods"},{"key":"2023070819154481000_btad402-B15","doi-asserted-by":"crossref","first-page":"13996","DOI":"10.1073\/pnas.1821905116","article-title":"Deep learning enables high-quality and high-throughput prediction of enzyme commission numbers","volume":"116","author":"Ryu","year":"2019","journal-title":"Proc Natl Acad Sci USA"},{"key":"2023070819154481000_btad402-B16","doi-asserted-by":"crossref","first-page":"D480","DOI":"10.1093\/nar\/gkaa1100","article-title":"UniProt: the universal protein knowledgebase in 2021","volume":"49","author":"UniProt","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2023070819154481000_btad402-B17","doi-asserted-by":"crossref","first-page":"536","DOI":"10.1016\/j.omtn.2021.08.016","article-title":"TSMDA: target and symptom-based computational model for miRNA-disease-association prediction","volume":"26","author":"Uthayopas","year":"2021","journal-title":"Mol Ther Nucleic Acids"},{"key":"2023070819154481000_btad402-B18","doi-asserted-by":"crossref","first-page":"459","DOI":"10.3389\/fgene.2019.00459","article-title":"Gradient boosting decision Tree-Based method for predicting interactions between target genes and drugs","volume":"10","author":"Xuan","year":"2019","journal-title":"Front Genet"},{"key":"2023070819154481000_btad402-B19","doi-asserted-by":"crossref","first-page":"W469","DOI":"10.1093\/nar\/gkab398","article-title":"NetGO 2.0: improving large-scale protein function prediction with massive sequence, text, domain, family and network information","volume":"49","author":"Yao","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2023070819154481000_btad402-B20","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1093\/bib\/bbv033","article-title":"Integrative approaches for predicting microRNA function and prioritizing disease-related microRNA using biological interaction networks","volume":"17","author":"Zeng","year":"2016","journal-title":"Brief Bioinform"},{"key":"2023070819154481000_btad402-B21","doi-asserted-by":"crossref","first-page":"396","DOI":"10.1109\/TCBB.2017.2701379","article-title":"Integrating multiple heterogeneous networks for novel LncRNA-disease association inference","volume":"16","author":"Zhang","year":"2019","journal-title":"IEEE\/ACM Trans Comput Biol Bioinform"},{"key":"2023070819154481000_btad402-B22","doi-asserted-by":"crossref","first-page":"W358","DOI":"10.1093\/nar\/gkn276","article-title":"GOEAST: a web-based software toolkit for gene ontology enrichment analysis","volume":"36","author":"Zheng","year":"2008","journal-title":"Nucleic Acids Res"},{"key":"2023070819154481000_btad402-B23","doi-asserted-by":"crossref","first-page":"244","DOI":"10.1186\/s13059-019-1835-8","article-title":"The CAFA challenge reports improved protein function prediction and new functional annotations for hundreds of genes through experimental screens","volume":"20","author":"Zhou","year":"2019","journal-title":"Genome Biol"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btad402\/50739846\/btad402.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/7\/btad402\/50843002\/btad402.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/7\/btad402\/50843002\/btad402.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,8]],"date-time":"2023-07-08T19:16:10Z","timestamp":1688843770000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btad402\/7210257"}},"subtitle":[],"editor":[{"given":"Alfonso","family":"Valencia","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2023,6,29]]},"references-count":23,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,7,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad402","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2023,7,1]]},"published":{"date-parts":[[2023,6,29]]},"article-number":"btad402"}}