{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T22:45:33Z","timestamp":1776120333417,"version":"3.50.1"},"reference-count":26,"publisher":"Oxford University Press (OUP)","issue":"Supplement_1","license":[{"start":{"date-parts":[[2023,6,30]],"date-time":"2023-06-30T00:00:00Z","timestamp":1688083200000},"content-version":"vor","delay-in-days":29,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["314445"],"award-info":[{"award-number":["314445"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["328401"],"award-info":[{"award-number":["328401"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["338836"],"award-info":[{"award-number":["338836"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["314672"],"award-info":[{"award-number":["314672"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Sigrid Juselius Foundation"},{"DOI":"10.13039\/501100004012","name":"Jane and Aatos Erkko Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004012","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Institute of Health","award":["1R01GM132649"],"award-info":[{"award-number":["1R01GM132649"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,30]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Signal peptides (SPs) are short amino acid segments present at the N-terminus of newly synthesized proteins that facilitate protein translocation into the lumen of the endoplasmic reticulum, after which they are cleaved off. Specific regions of SPs influence the efficiency of protein translocation, and small changes in their primary structure can abolish protein secretion altogether. The lack of conserved motifs across SPs, sensitivity to mutations, and variability in the length of the peptides make SP prediction a challenging task that has been extensively pursued over the years.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>We introduce TSignal, a deep transformer-based neural network architecture that utilizes BERT language models and dot-product attention techniques. TSignal predicts the presence of SPs and the cleavage site between the SP and the translocated mature protein. We use common benchmark datasets and show competitive accuracy in terms of SP presence prediction and state-of-the-art accuracy in terms of cleavage site prediction for most of the SP types and organism groups. We further illustrate that our fully data-driven trained model identifies useful biological information on heterogeneous test sequences.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>TSignal is available at: https:\/\/github.com\/Dumitrescu-Alexandru\/TSignal.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad228","type":"journal-article","created":{"date-parts":[[2023,5,25]],"date-time":"2023-05-25T11:29:12Z","timestamp":1685014152000},"page":"i347-i356","source":"Crossref","is-referenced-by-count":18,"title":["TSignal: a transformer model for signal peptide prediction"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0909-9484","authenticated-orcid":false,"given":"Alexandru","family":"Dumitrescu","sequence":"first","affiliation":[{"name":"Department of Computer Science, Aalto University , Espoo 02150, Finland"},{"name":"Institute of Biotechnology, HiLIFE, University of Helsinki , Helsinki 00014, Finland"}]},{"given":"Emmi","family":"Jokinen","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Aalto University , Espoo 02150, Finland"}]},{"given":"Anja","family":"Paatero","sequence":"additional","affiliation":[{"name":"Institute of Biotechnology, HiLIFE, University of Helsinki , Helsinki 00014, Finland"}]},{"given":"Juho","family":"Kellosalo","sequence":"additional","affiliation":[{"name":"Institute of Biotechnology, HiLIFE, University of Helsinki , Helsinki 00014, Finland"}]},{"given":"Ville O","family":"Paavilainen","sequence":"additional","affiliation":[{"name":"Institute of Biotechnology, HiLIFE, University of Helsinki , Helsinki 00014, Finland"}]},{"given":"Harri","family":"L\u00e4hdesm\u00e4ki","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Aalto University , Espoo 02150, Finland"}]}],"member":"286","published-online":{"date-parts":[[2023,6,30]]},"reference":[{"key":"2023063008143146000_btad228-B1","doi-asserted-by":"crossref","first-page":"420","DOI":"10.1038\/s41587-019-0036-z","article-title":"SignalP 5.0 improves signal peptide predictions using deep neural networks","volume":"37","author":"Almagro Armenteros","year":"2019","journal-title":"Nat Biotechnol"},{"key":"2023063008143146000_btad228-B2","doi-asserted-by":"crossref","first-page":"2811","DOI":"10.1093\/bioinformatics\/btq530","article-title":"Combined prediction of Tat and Sec signal peptides with hidden Markov models","volume":"26","author":"Bagos","year":"2010","journal-title":"Bioinformatics"},{"key":"2023063008143146000_btad228-B3","doi-asserted-by":"crossref","first-page":"D283","DOI":"10.1093\/nar\/gku1119","article-title":"Expediting topology data gathering for the topdb database","volume":"43","author":"Dobson","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2023063008143146000_btad228-B4","doi-asserted-by":"crossref","first-page":"7112","DOI":"10.1109\/TPAMI.2021.3095381","article-title":"ProtTrans: Toward understanding the language of life through self-supervised learning","volume":"44","author":"Elnaggar","year":"2022","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2023063008143146000_btad228-B5","doi-asserted-by":"crossref","first-page":"2172","DOI":"10.1093\/bioinformatics\/btn422","article-title":"High-performance signal peptide prediction based on sequence alignment techniques","volume":"24","author":"Frank","year":"2008","journal-title":"Bioinformatics"},{"key":"2023063008143146000_btad228-B6","first-page":"249","volume-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics","author":"Glorot","year":"2010"},{"key":"2023063008143146000_btad228-B7","first-page":"1321","volume-title":"Proceedings of the 34th International Conference on Machine Learning","author":"Guo","year":"2017"},{"key":"2023063008143146000_btad228-B8","author":"Izmailov","year":"2018"},{"key":"2023063008143146000_btad228-B9","doi-asserted-by":"crossref","first-page":"1652","DOI":"10.1110\/ps.0303703","article-title":"Prediction of lipoprotein signal peptides in gram-negative bacteria","volume":"12","author":"Juncker","year":"2003","journal-title":"Protein Sci"},{"key":"2023063008143146000_btad228-B10","doi-asserted-by":"crossref","first-page":"1027","DOI":"10.1016\/j.jmb.2004.03.016","article-title":"A combined transmembrane topology and signal peptide prediction method","volume":"338","author":"K\u00e4ll","year":"2004","journal-title":"J Mol Biol"},{"key":"2023063008143146000_btad228-B11","doi-asserted-by":"crossref","first-page":"828","DOI":"10.2337\/db11-0878","article-title":"Impaired cleavage of preproinsulin signal peptide linked to autosomal-dominant diabetes","volume":"61","author":"Liu","year":"2012","journal-title":"Diabetes"},{"key":"2023063008143146000_btad228-B12","doi-asserted-by":"crossref","first-page":"422","DOI":"10.1016\/j.ejcb.2018.06.003","article-title":"A comprehensive review of signal peptides: structure, roles, and applications","volume":"97","author":"Owji","year":"2018","journal-title":"Eur J Cell Biol"},{"key":"2023063008143146000_btad228-B13","doi-asserted-by":"crossref","first-page":"2559","DOI":"10.1093\/hmg\/11.21.2559","article-title":"Mutation of the signal peptide region of the bicistronic gene dspp affects translocation to the endoplasmic reticulum and results in defective dentine biomineralization","volume":"11","author":"Rajpar","year":"2002","journal-title":"Hum Mol Genet"},{"key":"2023063008143146000_btad228-B14","doi-asserted-by":"crossref","first-page":"e1000213","DOI":"10.1371\/journal.pcbi.1000213","article-title":"Transmembrane topology and signal peptide prediction using dynamic Bayesian networks","volume":"4","author":"Reynolds","year":"2008","journal-title":"PLoS Comput Biol"},{"key":"2023063008143146000_btad228-B15","doi-asserted-by":"crossref","first-page":"1690","DOI":"10.1093\/bioinformatics\/btx818","article-title":"Deepsig: deep learning improves signal peptide detection in proteins","volume":"34","author":"Savojardo","year":"2018","journal-title":"Bioinformatics"},{"key":"2023063008143146000_btad228-B16","doi-asserted-by":"crossref","first-page":"D344","DOI":"10.1093\/nar\/gks1067","article-title":"New and continuing developments at PROSITE","volume":"41","author":"Sigrist","year":"2013","journal-title":"Nucleic Acids Res"},{"key":"2023063008143146000_btad228-B17","author":"Simonyan","year":"2014"},{"key":"2023063008143146000_btad228-B18","doi-asserted-by":"crossref","first-page":"11591","DOI":"10.1074\/jbc.275.16.11591","article-title":"The twin arginine consensus motif of tat signal peptides is involved in sec-independent protein targeting in Escherichia coli","volume":"275","author":"Stanley","year":"2000","journal-title":"J Biol Chem"},{"key":"2023063008143146000_btad228-B19","doi-asserted-by":"crossref","first-page":"1023","DOI":"10.1038\/s41587-021-01156-3","article-title":"SignalP 6.0 predicts all five types of signal peptides using protein language models","volume":"40","author":"Teufel","year":"2022","journal-title":"Nat Biotechnol"},{"key":"2023063008143146000_btad228-B20","doi-asserted-by":"crossref","first-page":"2255","DOI":"10.1073\/pnas.79.7.2255","article-title":"Post-translational modification and processing of Escherichia coli prolipoprotein in vitro","volume":"79","author":"Tokunaga","year":"1982","journal-title":"Proc Natl Acad Sci USA"},{"key":"2023063008143146000_btad228-B21","doi-asserted-by":"crossref","first-page":"W401","DOI":"10.1093\/nar\/gkv485","article-title":"The TOPCONS web server for consensus prediction of membrane protein topology and signal peptides","volume":"43","author":"Tsirigos","year":"2015","journal-title":"Nucleic Acids Res"},{"key":"2023063008143146000_btad228-B22","doi-asserted-by":"crossref","first-page":"D506","DOI":"10.1093\/nar\/gky1049","article-title":"Uniprot: a worldwide hub of protein knowledge","volume":"47","author":"UniProt Consortium","year":"2019","journal-title":"Nucleic Acids Res"},{"key":"2023063008143146000_btad228-B23","volume-title":"Advances in Neural Information Processing Systems","author":"Vaswani","year":"2017"},{"key":"2023063008143146000_btad228-B24","doi-asserted-by":"crossref","first-page":"2928","DOI":"10.1093\/bioinformatics\/btn550","article-title":"SPOCTOPUS: a combined predictor of signal peptides and membrane protein topology","volume":"24","author":"Viklund","year":"2008","journal-title":"Bioinformatics"},{"key":"2023063008143146000_btad228-B25","doi-asserted-by":"crossref","first-page":"D222","DOI":"10.1093\/nar\/gkm800","article-title":"PPT-DB: the protein property prediction and testing database","volume":"36","author":"Wishart","year":"2008","journal-title":"Nucleic Acids Res"},{"key":"2023063008143146000_btad228-B26","doi-asserted-by":"crossref","first-page":"3679","DOI":"10.1021\/acs.jcim.0c00401","article-title":"Signal-3L 3.0: improving signal peptide prediction through combining attention deep learning with Window-Based scoring","volume":"60","author":"Zhang","year":"2020","journal-title":"J Chem Inf Model"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/Supplement_1\/i347\/50741363\/btad228.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/Supplement_1\/i347\/50741363\/btad228.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,30]],"date-time":"2023-06-30T04:15:38Z","timestamp":1688098538000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/39\/Supplement_1\/i347\/7210432"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,1]]},"references-count":26,"journal-issue":{"issue":"Supplement_1","published-print":{"date-parts":[[2023,6,30]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad228","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2022.06.02.493958","asserted-by":"object"}]},"ISSN":["1367-4803","1367-4811"],"issn-type":[{"value":"1367-4803","type":"print"},{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2023,6,1]]},"published":{"date-parts":[[2023,6,1]]}}}