{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,11]],"date-time":"2026-05-11T23:17:15Z","timestamp":1778541435286,"version":"3.51.4"},"reference-count":56,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T00:00:00Z","timestamp":1757376000000},"content-version":"vor","delay-in-days":8,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100009708","name":"Novo Nordisk Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100009708","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021373","name":"Swiss Institute of Bioinformatics","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100021373","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100009708","name":"Novo Nordisk Foundation","doi-asserted-by":"publisher","award":["NNF14CC0001"],"award-info":[{"award-number":["NNF14CC0001"]}],"id":[{"id":"10.13039\/501100009708","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100009708","name":"Novo Nordisk Foundation","doi-asserted-by":"publisher","award":["NNF20SA0035590"],"award-info":[{"award-number":["NNF20SA0035590"]}],"id":[{"id":"10.13039\/501100009708","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Representation learning has revolutionized sequence-based prediction of protein function and subcellular localization. Protein networks are an important source of information complementary to sequences, but the use of protein networks has proven to be challenging in the context of machine learning, especially in a cross-species setting.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We leveraged the STRING database of protein networks and orthology relations for 1322 eukaryotes to generate network-based cross-species protein embeddings. We did this by first creating species-specific network embeddings and subsequently aligning them based on orthology relations to facilitate direct cross-species comparisons. We show that these aligned network embeddings ensure consistency across species without sacrificing quality compared to species-specific network embeddings. We also show that the aligned network embeddings are complementary to sequence embedding techniques, despite the use of sequence-based orthology relations in the alignment process. Finally, we validated the embeddings by using them for two well-established tasks: subcellular localization prediction and protein function prediction. Training logistic regression classifiers on aligned network embeddings and sequence embeddings improved the accuracy over using sequence alone, reaching performance numbers close to state-of-the-art deep-learning methods.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>The source code and scripts for generating the network-based cross-species protein embeddings are available at https:\/\/github.com\/deweihu96\/SPACE. Precomputed network embeddings and sequence embeddings for all eukaryotic proteins are included in STRING version 12.0 (https:\/\/string-db.org\/cgi\/download).<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaf496","type":"journal-article","created":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T11:58:13Z","timestamp":1757073493000},"source":"Crossref","is-referenced-by-count":6,"title":["SPACE: STRING proteins as complementary embeddings"],"prefix":"10.1093","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-5823-1498","authenticated-orcid":false,"given":"Dewei","family":"Hu","sequence":"first","affiliation":[{"name":"Novo Nordisk Foundation Center for Protein Research, Department of Cellular and Molecular Medicine, Faculty of Health and Medical Sciences, University of Copenhagen , Copenhagen 2200,","place":["Denmark"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Damian","family":"Szklarczyk","sequence":"additional","affiliation":[{"name":"Department of Molecular Life Sciences, University of Zurich , Zurich 8057,","place":["Switzerland"]},{"name":"SIB Swiss Institute of Bioinformatics, Amphip\u00f4le, Quartier UNIL-Sorge , Lausanne 1015,","place":["Switzerland"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7734-9102","authenticated-orcid":false,"given":"Christian","family":"von Mering","sequence":"additional","affiliation":[{"name":"Department of Molecular Life Sciences, University of Zurich , Zurich 8057,","place":["Switzerland"]},{"name":"SIB Swiss Institute of Bioinformatics, Amphip\u00f4le, Quartier UNIL-Sorge , Lausanne 1015,","place":["Switzerland"]}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lars Juhl","family":"Jensen","sequence":"additional","affiliation":[{"name":"Novo Nordisk Foundation Center for Protein Research, Department of Cellular and Molecular Medicine, Faculty of Health and Medical Sciences, University of Copenhagen , Copenhagen 2200,","place":["Denmark"]},{"name":"ZS Discovery, ZS Associates , Kongens Lyngby 2800,","place":["Denmark"]}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2025,9,9]]},"reference":[{"key":"2025092219523514700_btaf496-B1","doi-asserted-by":"crossref","first-page":"iyad031","DOI":"10.1093\/genetics\/iyad031","article-title":"The gene ontology knowledgebase in 2023","volume":"224","author":"Aleksander","year":"2023","journal-title":"Genetics"},{"key":"2025092219523514700_btaf496-B2","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1038\/75556","article-title":"Gene ontology: tool for the unification of biology","volume":"25","author":"Ashburner","year":"2000","journal-title":"Nat Genet"},{"key":"2025092219523514700_btaf496-B3","doi-asserted-by":"crossref","first-page":"100741","DOI":"10.1016\/j.websem.2022.100741","article-title":"Towards the web of embeddings: integrating multiple knowledge graph embedding spaces with FedCoder","volume":"75","author":"Baumgartner","year":"2023","journal-title":"J Web Semant"},{"key":"2025092219523514700_btaf496-B4","doi-asserted-by":"crossref","first-page":"326","DOI":"10.1186\/s12859-022-04873-x","article-title":"TMbed: transmembrane proteins predicted through language model embeddings","volume":"23","author":"Bernhofer","year":"2022","journal-title":"BMC Bioinformatics"},{"key":"2025092219523514700_btaf496-B5","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1002\/prot.25832","article-title":"Machine learning techniques for protein function prediction","volume":"88","author":"Bonetta","year":"2020","journal-title":"Proteins: Struct Funct Bioinform"},{"key":"2025092219523514700_btaf496-B6","doi-asserted-by":"crossref","first-page":"2102","DOI":"10.1093\/bioinformatics\/btac020","article-title":"ProteinBERT: a universal deep-learning model of protein sequence and function","volume":"38","author":"Brandes","year":"2022","journal-title":"Bioinformatics"},{"key":"2025092219523514700_btaf496-B7","doi-asserted-by":"crossref","first-page":"605","DOI":"10.1016\/j.bbamcr.2008.10.016","article-title":"Sorting of lysosomal proteins","volume":"1793","author":"Braulke","year":"2009","journal-title":"Biochim Biophys Acta"},{"key":"2025092219523514700_btaf496-B8","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1186\/s12864-019-6413-7","article-title":"The advantages of the Matthews correlation coefficient (MCC) over F1 score and accuracy in binary classification evaluation","volume":"21","author":"Chicco","year":"2020","journal-title":"BMC Genomics"},{"key":"2025092219523514700_btaf496-B9","first-page":"273","author":"Chu","year":"2019"},{"key":"2025092219523514700_btaf496-B10","doi-asserted-by":"crossref","first-page":"e1000807","DOI":"10.1371\/journal.pcbi.1000807","article-title":"Protein\u2013protein interactions essentials: key concepts to building and analyzing interactome networks","volume":"6","author":"De Las Rivas","year":"2010","journal-title":"PLoS Comput Biol"},{"key":"2025092219523514700_btaf496-B11","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1038\/nrm2378","article-title":"Exiting the Golgi complex","volume":"9","author":"De Matteis","year":"2008","journal-title":"Nat Rev Mol Cell Biol"},{"key":"2025092219523514700_btaf496-B12","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1016\/S1672-0229(04)02027-3","article-title":"Predicting protein subcellular localization: past, present, and future","volume":"2","author":"D\u00f6nnes","year":"2004","journal-title":"Genom Proteom Bioinform"},{"key":"2025092219523514700_btaf496-B13","first-page":"479","author":"Du","year":"2019"},{"key":"2025092219523514700_btaf496-B14","first-page":"392","article-title":"Subcellular localization of proteins","volume":"3","author":"Dubey","year":"2011","journal-title":"Arch Appl Sci Res"},{"key":"2025092219523514700_btaf496-B15","doi-asserted-by":"crossref","first-page":"7112","DOI":"10.1109\/TPAMI.2021.3095381","article-title":"ProtTrans: toward understanding the language of life through self-supervised learning","volume":"44","author":"Elnaggar","year":"2022","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2025092219523514700_btaf496-B16","doi-asserted-by":"crossref","first-page":"e51","DOI":"10.1093\/nar\/gkz132","article-title":"Functional protein representations from biological networks enable diverse cross-species inference","volume":"47","author":"Fan","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2025092219523514700_btaf496-B17","author":"Grover","year":"2016"},{"key":"2025092219523514700_btaf496-B18","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1146\/annurev-pharmtox-040323-040828","article-title":"Artificial intelligence for drug discovery: are we there yet?","volume":"64","author":"Hasselgren","year":"2024","journal-title":"Annu Rev Pharmacol Toxicol"},{"key":"2025092219523514700_btaf496-B19","first-page":"117","author":"Heimann","year":"2018"},{"key":"2025092219523514700_btaf496-B20","doi-asserted-by":"crossref","first-page":"lqac043","DOI":"10.1093\/nargab\/lqac043","article-title":"Contrastive learning on protein embeddings enlightens midnight zone","volume":"4","author":"Heinzinger","year":"2022","journal-title":"NAR Genom Bioinform"},{"key":"2025092219523514700_btaf496-B21","doi-asserted-by":"crossref","first-page":"D389","DOI":"10.1093\/nar\/gkac1022","article-title":"eggnog 6.0: enabling comparative genomics across 12 535 organisms","volume":"51","author":"Hern\u00e1ndez-Plaza","year":"2023","journal-title":"Nucleic Acids Res"},{"key":"2025092219523514700_btaf496-B22","author":"Joulin","year":"2018"},{"key":"2025092219523514700_btaf496-B23","author":"Kalinowski","year":"2020"},{"key":"2025092219523514700_btaf496-B24","doi-asserted-by":"crossref","first-page":"D545","DOI":"10.1093\/nar\/gkaa970","article-title":"KEGG: integrating viruses and cellular organisms","volume":"49","author":"Kanehisa","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2025092219523514700_btaf496-B25","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3633518","article-title":"A survey on graph representation learning methods","volume":"15","author":"Khoshraftar","year":"2024","journal-title":"ACM Trans Intell Syst Technol"},{"key":"2025092219523514700_btaf496-B26","author":"Kipf","year":"2016"},{"key":"2025092219523514700_btaf496-B27","doi-asserted-by":"crossref","first-page":"13863","DOI":"10.1021\/acscatal.3c02743","article-title":"Machine learning-guided protein engineering","volume":"13","author":"Kouba","year":"2023","journal-title":"ACS Catal"},{"key":"2025092219523514700_btaf496-B28","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1109\/NAFOSTED.2017.8108071","volume-title":"2017 4th NAFOSTED Conference on Information and Computer Science","author":"Le","year":"2017"},{"key":"2025092219523514700_btaf496-B29","doi-asserted-by":"crossref","first-page":"btad529","DOI":"10.1093\/bioinformatics\/btad529","article-title":"Joint embedding of biological networks for cross-species functional alignment","volume":"39","author":"Li","year":"2023","journal-title":"Bioinformatics"},{"key":"2025092219523514700_btaf496-B30","doi-asserted-by":"crossref","first-page":"1123","DOI":"10.1126\/science.ade2574","article-title":"Evolutionary-scale prediction of atomic-level protein structure with a language model","volume":"379","author":"Lin","year":"2023","journal-title":"Science"},{"key":"2025092219523514700_btaf496-B31","doi-asserted-by":"crossref","first-page":"btad047","DOI":"10.1093\/bioinformatics\/btad047","article-title":"Accurately modeling biased random walks on weighted networks using node2vec","volume":"39","author":"Liu","year":"2023","journal-title":"Bioinformatics"},{"key":"2025092219523514700_btaf496-B32","doi-asserted-by":"crossref","first-page":"e1011773","DOI":"10.1371\/journal.pcbi.1011773","article-title":"Joint representation of molecular networks from multiple species improves gene classification","volume":"20","author":"Mancuso","year":"2024","journal-title":"PLoS Comput Biol"},{"key":"2025092219523514700_btaf496-B33","author":"Martins","year":"2023"},{"key":"2025092219523514700_btaf496-B35","author":"Mikolov","year":"2013"},{"key":"2025092219523514700_btaf496-B36","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1016\/S0962-8924(97)01197-5","article-title":"Localization of proteins to the Golgi apparatus","volume":"8","author":"Munro","year":"1998","journal-title":"Trends Cell Biol"},{"key":"2025092219523514700_btaf496-B37","doi-asserted-by":"crossref","first-page":"666","DOI":"10.1109\/TCBB.2021.3080386","article-title":"Identifying protein subcellular locations with embeddings-based node2loc","volume":"19","author":"Pan","year":"2022","journal-title":"IEEE\/ACM Trans Comput Biol Bioinform"},{"key":"2025092219523514700_btaf496-B38","doi-asserted-by":"crossref","first-page":"184","DOI":"10.18653\/v1\/P19-1018","volume-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","author":"Patra","year":"2019"},{"key":"2025092219523514700_btaf496-B39","first-page":"701","author":"Perozzi","year":"2014"},{"key":"2025092219523514700_btaf496-B40","doi-asserted-by":"crossref","first-page":"16933","DOI":"10.1038\/s41598-022-21366-2","article-title":"Improving protein succinylation sites prediction using embeddings from protein language model","volume":"12","author":"Pokharel","year":"2022","journal-title":"Sci Rep"},{"key":"2025092219523514700_btaf496-B41","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J Mach Learn Res"},{"key":"2025092219523514700_btaf496-B42","doi-asserted-by":"crossref","first-page":"e2016239118","DOI":"10.1073\/pnas.2016239118","article-title":"Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences","volume":"118","author":"Rives","year":"2021","journal-title":"Proc Natl Acad Sci USA"},{"key":"2025092219523514700_btaf496-B43","doi-asserted-by":"crossref","first-page":"2327","DOI":"10.1002\/1873-3468.12307","article-title":"Protein function in precision medicine: deep understanding with machine learning","volume":"590","author":"Rost","year":"2016","journal-title":"FEBS Lett"},{"key":"2025092219523514700_btaf496-B44","doi-asserted-by":"crossref","first-page":"1541","DOI":"10.1016\/j.bbamcr.2006.09.005","article-title":"Proteomics of the peroxisome","volume":"1763","author":"Saleem","year":"2006","journal-title":"Biochim Biophys Acta"},{"key":"2025092219523514700_btaf496-B45","doi-asserted-by":"crossref","first-page":"926","DOI":"10.1093\/bioinformatics\/btu739","article-title":"Uniref clusters: a comprehensive and scalable alternative for improving sequence similarity searches","volume":"31","author":"Suzek","year":"2015","journal-title":"Bioinformatics"},{"key":"2025092219523514700_btaf496-B46","doi-asserted-by":"crossref","first-page":"D638","DOI":"10.1093\/nar\/gkac1000","article-title":"The string database in 2023: protein\u2013protein association networks and functional enrichment analyses for any sequenced genome of interest","volume":"51","author":"Szklarczyk","year":"2023","journal-title":"Nucleic Acids Res"},{"key":"2025092219523514700_btaf496-B47","doi-asserted-by":"crossref","first-page":"W228","DOI":"10.1093\/nar\/gkac278","article-title":"Deeploc 2.0: multi-label subcellular localization prediction using protein language models","volume":"50","author":"Thumuluri","year":"2022","journal-title":"Nucleic Acids Res"},{"key":"2025092219523514700_btaf496-B48","doi-asserted-by":"crossref","first-page":"463","DOI":"10.1038\/s41573-019-0024-5","article-title":"Applications of machine learning in drug discovery and development","volume":"18","author":"Vamathevan","year":"2019","journal-title":"Nat Rev Drug Discov"},{"key":"2025092219523514700_btaf496-B49","doi-asserted-by":"crossref","first-page":"5550","DOI":"10.1038\/s41467-022-32940-7","article-title":"Protein condensation diseases: therapeutic opportunities","volume":"13","author":"Vendruscolo","year":"2022","journal-title":"Nat Commun"},{"key":"2025092219523514700_btaf496-B50","doi-asserted-by":"crossref","first-page":"bbac142","DOI":"10.1093\/bib\/bbac142","article-title":"An analysis of protein language model embeddings for fold prediction","volume":"23","author":"Villegas-Morcillo","year":"2022","journal-title":"Brief Bioinform"},{"key":"2025092219523514700_btaf496-B51","doi-asserted-by":"crossref","first-page":"349","DOI":"10.1016\/j.gpb.2023.04.001","article-title":"Netgo 3.0: protein language model improves large-scale functional annotations","volume":"21","author":"Wang","year":"2023","journal-title":"Genom Proteom Bioinform"},{"key":"2025092219523514700_btaf496-B52","doi-asserted-by":"crossref","first-page":"331","DOI":"10.2174\/1574893609666140212000304","article-title":"Review of protein subcellular localization prediction","volume":"9","author":"Wang","year":"2014","journal-title":"CBIO"},{"key":"2025092219523514700_btaf496-B54","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1109\/TETCI.2019.2952908","article-title":"Random walks: a review of algorithms and applications","volume":"4","author":"Xia","year":"2020","journal-title":"IEEE Trans Emerg Top Comput Intell"},{"key":"2025092219523514700_btaf496-B55","doi-asserted-by":"crossref","first-page":"W469","DOI":"10.1093\/nar\/gkab398","article-title":"Netgo 2.0: improving large-scale protein function prediction with massive sequence, text, domain, family and network information","volume":"49","author":"Yao","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2025092219523514700_btaf496-B56","doi-asserted-by":"crossref","first-page":"W379","DOI":"10.1093\/nar\/gkz388","article-title":"Netgo: improving large-scale protein function prediction with massive network information","volume":"47","author":"You","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2025092219523514700_btaf496-B57","author":"Yuan","year":"2024"},{"key":"2025092219523514700_btaf496-B58","doi-asserted-by":"crossref","first-page":"57","DOI":"10.1016\/j.aiopen.2021.01.001","article-title":"Graph neural networks: a review of methods and applications","volume":"1","author":"Zhou","year":"2020","journal-title":"AI Open"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaf496\/64232074\/btaf496.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/9\/btaf496\/64232074\/btaf496.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/9\/btaf496\/64232074\/btaf496.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T23:52:53Z","timestamp":1758585173000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btaf496\/8250101"}},"subtitle":[],"editor":[{"given":"Lenore","family":"Cowen","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2025,9,1]]},"references-count":56,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,9,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaf496","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2024.11.25.625140","asserted-by":"object"}]},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2025,9]]},"published":{"date-parts":[[2025,9,1]]},"article-number":"btaf496"}}