{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T15:36:17Z","timestamp":1773502577131,"version":"3.50.1"},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T00:00:00Z","timestamp":1755561600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T00:00:00Z","timestamp":1755561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001734","name":"Copenhagen University","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100001734","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["BMC Bioinformatics"],"DOI":"10.1186\/s12859-025-06220-2","type":"journal-article","created":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T11:48:15Z","timestamp":1755604095000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["NetStart 2.0: prediction of eukaryotic translation initiation sites using a protein language model"],"prefix":"10.1186","volume":"26","author":[{"given":"Line Sandvad","family":"Nielsen","sequence":"first","affiliation":[]},{"given":"Anders Gorm","family":"Pedersen","sequence":"additional","affiliation":[]},{"given":"Ole","family":"Winther","sequence":"additional","affiliation":[]},{"given":"Henrik","family":"Nielsen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,19]]},"reference":[{"issue":"4","key":"6220_CR1","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1016\/0092-8674(78)90039-9","volume":"15","author":"M Kozak","year":"1978","unstructured":"Kozak M. How do eucaryotic ribosomes select initiation regions in messenger RNA? Cell. 1978;15(4):1109\u201323. https:\/\/doi.org\/10.1016\/0092-8674(78)90039-9.","journal-title":"Cell"},{"issue":"2","key":"6220_CR2","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1038\/nrm2838","volume":"11","author":"RJ Jackson","year":"2010","unstructured":"Jackson RJ, Hellen CU, Pestova TV. The mechanism of eukaryotic translation initiation and principles of its regulation. Nat Rev Mol Cell Biol. 2010;11(2):113\u201327. https:\/\/doi.org\/10.1038\/nrm2838.","journal-title":"Nat Rev Mol Cell Biol"},{"issue":"2","key":"6220_CR3","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1083\/jcb.108.2.229","volume":"108","author":"M Kozak","year":"1989","unstructured":"Kozak M. The scanning model for translation: an update. J Cell Biol. 1989;108(2):229\u201341. https:\/\/doi.org\/10.1083\/jcb.108.2.229.","journal-title":"J Cell Biol"},{"issue":"1","key":"6220_CR4","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1186\/s13059-022-02674-2","volume":"23","author":"DE Andreev","year":"2022","unstructured":"Andreev DE, Loughran G, Fedorova AD, Mikhaylova MS, Shatsky IN, Baranov PV. Non-AUG translation initiation in mammals. Genome Biol. 2022;23(1):111. https:\/\/doi.org\/10.1186\/s13059-022-02674-2.","journal-title":"Genome Biol"},{"issue":"2","key":"6220_CR5","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1016\/0092-8674(86)90762-2","volume":"44","author":"M Kozak","year":"1986","unstructured":"Kozak M. Point mutations define a sequence flanking the AUG initiator codon that modulates translation by eukaryotic ribosomes. Cell. 1986;44(2):283\u201392. https:\/\/doi.org\/10.1016\/0092-8674(86)90762-2.","journal-title":"Cell"},{"issue":"1\u20132","key":"6220_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0378-1119(02)01056-9","volume":"299","author":"M Kozak","year":"2002","unstructured":"Kozak M. Pushing the limits of the scanning mechanism for initiation of translation. Gene. 2002;299(1\u20132):1\u201334. https:\/\/doi.org\/10.1016\/S0378-1119(02)01056-9.","journal-title":"Gene"},{"issue":"12","key":"6220_CR7","doi-asserted-by":"publisher","first-page":"1009","DOI":"10.1016\/j.tibs.2019.07.001","volume":"44","author":"G Hern\u00e1ndez","year":"2019","unstructured":"Hern\u00e1ndez G, Osnaya VG, P\u00e9rez-Mart\u00ednez X. Conservation and variability of the AUG initiation codon context in eukaryotes. Trends Biochem Sci. 2019;44(12):1009\u201321. https:\/\/doi.org\/10.1016\/j.tibs.2019.07.001.","journal-title":"Trends Biochem Sci"},{"issue":"1","key":"6220_CR8","doi-asserted-by":"publisher","first-page":"1076","DOI":"10.1038\/s41467-021-21394-y","volume":"12","author":"H Zhang","year":"2021","unstructured":"Zhang H, Wang Y, Wu X, Tang X, Wu C, Lu J. Determinants of genome-wide distribution and evolution of uORFs in eukaryotes. Nat Commun. 2021;12(1):1076. https:\/\/doi.org\/10.1038\/s41467-021-21394-y.","journal-title":"Nat Commun"},{"key":"6220_CR9","unstructured":"Miesfeld RL, McEvoy MM. Biochemistry of mRNA translation. In: Biochemistry, 1st ed. New York: W. W. Norton & Company; 2017. pp. 1118\u20131119."},{"issue":"5","key":"6220_CR10","doi-asserted-by":"publisher","first-page":"624","DOI":"10.1101\/gad.1397906","volume":"20","author":"AV Pisarev","year":"2006","unstructured":"Pisarev AV, Kolupaeva VG, Pisareva VP, Merrick WC, Hellen CU, Pestova TV. Specific functional interactions of nucleotides at key$$-$$ 3 and + 4 positions flanking the initiation codon with components of the mammalian 48S translation initiation complex. Genes Dev. 2006;20(5):624\u201336. https:\/\/doi.org\/10.1101\/gad.1397906.","journal-title":"Genes Dev"},{"issue":"11\u201312","key":"6220_CR11","doi-asserted-by":"publisher","first-page":"474","DOI":"10.1101\/gad.350752.123","volume":"37","author":"TE Dever","year":"2023","unstructured":"Dever TE, Ivanov IP, Hinnebusch AG. Translational regulation by uORFs and start codon selection stringency. Genes Dev. 2023;37(11\u201312):474\u201389. https:\/\/doi.org\/10.1101\/gad.350752.123.","journal-title":"Genes Dev"},{"key":"6220_CR12","unstructured":"Pedersen AG, Nielsen H. Neural network prediction of translation initiation sites in eukaryotes: perspectives for EST and genome analysis. In: Gaasterland T, Karp PD, Karplus K, Ouzounis C, Sander C, Valencia A, editors. Proceedings of the 5th international conference on intelligent systems for molecular biology. Washington, DC: AAAI Press; 1997. pp. 226\u2013233."},{"issue":"3","key":"6220_CR13","doi-asserted-by":"publisher","first-page":"861","DOI":"10.1093\/nar\/gkm1102","volume":"36","author":"S Nakagawa","year":"2008","unstructured":"Nakagawa S, Niimura Y, Gojobori T, Tanaka H, Miura K. Diversity of preferred nucleotide sequences around the translation initiation codon in eukaryote genomes. Nucleic Acids Res. 2008;36(3):861\u201371. https:\/\/doi.org\/10.1093\/nar\/gkm1102.","journal-title":"Nucleic Acids Res"},{"key":"6220_CR14","unstructured":"Krebs JE, Goldstein ES, Kilpatrick ST. Translation. In: Lewin\u2019s Genes XII, 12th ed. Burlington: Jones & Bartlett Learning; 2018. pp. 592\u2013596."},{"issue":"4","key":"6220_CR15","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1038\/cr.2010.25","volume":"20","author":"H Xu","year":"2010","unstructured":"Xu H, Wang P, Fu Y, Zheng Y, Tang Q, Si L, et al. Length of the ORF, position of the first AUG and the Kozak motif are important factors in potential dual-coding transcripts. Cell Res. 2010;20(4):445\u201357. https:\/\/doi.org\/10.1038\/cr.2010.25.","journal-title":"Cell Res"},{"issue":"D1","key":"6220_CR16","doi-asserted-by":"publisher","first-page":"D134","DOI":"10.1093\/nar\/gkad903","volume":"52","author":"EW Sayers","year":"2023","unstructured":"Sayers EW, Cavanaugh M, Clark K, Pruitt KD, Sherry ST, Yankie L, et al. GenBank 2024 update. Nucleic Acids Res. 2023;52(D1):D134\u20137. https:\/\/doi.org\/10.1093\/nar\/gkad903.","journal-title":"Nucleic Acids Res"},{"issue":"9","key":"6220_CR17","doi-asserted-by":"publisher","first-page":"e04825","DOI":"10.1016\/j.heliyon.2020.e04825","volume":"6","author":"N Goel","year":"2020","unstructured":"Goel N, Singh S, Aseri TC. Global sequence features based translation initiation site prediction in human genomic sequences. Heliyon. 2020;6(9):e04825. https:\/\/doi.org\/10.1016\/j.heliyon.2020.e04825.","journal-title":"Heliyon"},{"issue":"7978","key":"6220_CR18","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1038\/s41586-023-06500-y","volume":"621","author":"Y Xiang","year":"2023","unstructured":"Xiang Y, Huang W, Tan L, Chen T, He Y, Irving PS, et al. Pervasive downstream RNA hairpins dynamically dictate start-codon selection. Nature. 2023;621(7978):423\u201330. https:\/\/doi.org\/10.1038\/s41586-023-06500-y.","journal-title":"Nature"},{"issue":"1","key":"6220_CR19","doi-asserted-by":"publisher","first-page":"111973","DOI":"10.1016\/j.yexcr.2020.111973","volume":"391","author":"X Cao","year":"2020","unstructured":"Cao X, Slavoff SA. Non-AUG start codons: expanding and regulating the small and alternative ORFeome. Exp Cell Res. 2020;391(1):111973. https:\/\/doi.org\/10.1016\/j.yexcr.2020.111973.","journal-title":"Exp Cell Res"},{"issue":"1","key":"6220_CR20","doi-asserted-by":"publisher","first-page":"lqad021","DOI":"10.1093\/nargab\/lqad021","volume":"5","author":"J Clauwaert","year":"2023","unstructured":"Clauwaert J, McVey Z, Gupta R, Menschaert G. TIS transformer: remapping the human proteome using deep learning. NAR Genomics Bioinform. 2023;5(1):lqad021. https:\/\/doi.org\/10.1093\/nargab\/lqad021.","journal-title":"NAR Genomics Bioinform"},{"issue":"11","key":"6220_CR21","doi-asserted-by":"publisher","first-page":"e110921","DOI":"10.15252\/embj.2022110921","volume":"42","author":"Y Jin","year":"2023","unstructured":"Jin Y, Ivanov M, Dittrich AN, Nelson AD, Marquardt S. LncRNA FLAIL affects alternative splicing and represses flowering in Arabidopsis. EMBO J. 2023;42(11):e110921. https:\/\/doi.org\/10.15252\/embj.2022110921.","journal-title":"EMBO J"},{"issue":"7","key":"6220_CR22","doi-asserted-by":"publisher","first-page":"1907","DOI":"10.1016\/j.celrep.2019.07.025","volume":"28","author":"K Frikstad","year":"2019","unstructured":"Frikstad K, Molinari E, Thoresen M, Ramsbottom SA, Hughes F, Letteboer SJ, et al. A CEP104-CSPP1 complex is required for formation of primary cilia competent in hedgehog signaling. Cell Rep. 2019;28(7):1907\u201322. https:\/\/doi.org\/10.1016\/j.celrep.2019.07.025.","journal-title":"Cell Rep"},{"issue":"1","key":"6220_CR23","doi-asserted-by":"publisher","first-page":"17113","DOI":"10.1038\/s41598-018-35085-0","volume":"8","author":"C Fuster-Garc\u00eda","year":"2018","unstructured":"Fuster-Garc\u00eda C, Garc\u00eda-Garc\u00eda G, Jaijo T, Forn\u00e9s N, Ayuso C, Fern\u00e1ndez-Burriel M, et al. High-throughput sequencing for the molecular diagnosis of Usher syndrome reveals 42 novel mutations and consolidates CEP250 as Usher-like disease causative. Sci Rep. 2018;8(1):17113. https:\/\/doi.org\/10.1038\/s41598-018-35085-0.","journal-title":"Sci Rep"},{"key":"6220_CR24","doi-asserted-by":"publisher","DOI":"10.26508\/lsa.201900593","author":"JL Williams","year":"2020","unstructured":"Williams JL, Paudyal A, Awad S, Nicholson J, Grzesik D, Botta J, et al. Mylk3 null C57BL\/6N mice develop cardiomyopathy, whereas Nnt null C57BL\/6J mice do not. Life Sci Alliance. 2020. https:\/\/doi.org\/10.26508\/lsa.201900593.","journal-title":"Life Sci Alliance"},{"issue":"3","key":"6220_CR25","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1093\/hmg\/ddab258","volume":"31","author":"S Lenglez","year":"2022","unstructured":"Lenglez S, Sablon A, F\u00e9nelon G, Boland A, Deleuze JF, Boutoleau-Bretonni\u00e8re C, et al. Distinct functional classes of PDGFRB pathogenic variants in primary familial brain calcification. Hum Mol Genet. 2022;31(3):399\u2013409. https:\/\/doi.org\/10.1093\/hmg\/ddab258.","journal-title":"Hum Mol Genet"},{"issue":"1","key":"6220_CR26","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1186\/s40246-022-00376-1","volume":"16","author":"B Jankovic","year":"2022","unstructured":"Jankovic B, Gojobori T. From shallow to deep: some lessons learned from application of machine learning for recognition of functional genomic elements in human genome. Hum Genomics. 2022;16(1):7. https:\/\/doi.org\/10.1186\/s40246-022-00376-1.","journal-title":"Hum Genomics"},{"issue":"7","key":"6220_CR27","doi-asserted-by":"publisher","first-page":"1125","DOI":"10.1093\/bioinformatics\/bty752","volume":"35","author":"M Kalkatawi","year":"2019","unstructured":"Kalkatawi M, Magana-Mora A, Jankovic B, Bajic VB. DeepGSR: an optimized deep-learning structure for the recognition of genomic signals and regions. Bioinformatics. 2019;35(7):1125\u201332. https:\/\/doi.org\/10.1093\/bioinformatics\/bty752.","journal-title":"Bioinformatics"},{"issue":"14","key":"6220_CR28","doi-asserted-by":"publisher","first-page":"i234","DOI":"10.1093\/bioinformatics\/btx247","volume":"33","author":"S Zhang","year":"2017","unstructured":"Zhang S, Hu H, Jiang T, Zhang L, Zeng J. TITER: predicting translation initiation sites by deep learning. Bioinformatics. 2017;33(14):i234\u201342. https:\/\/doi.org\/10.1093\/bioinformatics\/btx247.","journal-title":"Bioinformatics"},{"issue":"17","key":"6220_CR29","doi-asserted-by":"publisher","first-page":"4053","DOI":"10.1093\/bioinformatics\/btac454","volume":"38","author":"Q Liu","year":"2022","unstructured":"Liu Q, Fang H, Wang X, Wang M, Li S, Coin LJ, et al. DeepGenGrep: a general deep learning-based predictor for multiple genomic signals and regions. Bioinformatics. 2022;38(17):4053\u201361. https:\/\/doi.org\/10.1093\/bioinformatics\/btac454.","journal-title":"Bioinformatics"},{"issue":"7","key":"6220_CR30","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1038\/s41576-019-0122-6","volume":"20","author":"G Eraslan","year":"2019","unstructured":"Eraslan G, Avsec \u017d, Gagneur J, Theis FJ. Deep learning: new computational modelling techniques for genomics. Nat Rev Genet. 2019;20(7):389\u2013403. https:\/\/doi.org\/10.1038\/s41576-019-0122-6.","journal-title":"Nat Rev Genet"},{"issue":"suppl-2","key":"6220_CR31","doi-asserted-by":"publisher","first-page":"W309","DOI":"10.1093\/nar\/gkh379","volume":"32","author":"M Stanke","year":"2004","unstructured":"Stanke M, Steinkamp R, Waack S, Morgenstern B. AUGUSTUS: a web server for gene finding in eukaryotes. Nucleic Acids Res. 2004;32(suppl-2):W309\u201312. https:\/\/doi.org\/10.1093\/nar\/gkh379.","journal-title":"Nucleic Acids Res"},{"issue":"suppl-2","key":"6220_CR32","doi-asserted-by":"publisher","first-page":"W465","DOI":"10.1093\/nar\/gki458","volume":"33","author":"M Stanke","year":"2005","unstructured":"Stanke M, Morgenstern B. AUGUSTUS: a web server for gene prediction in eukaryotes that allows user-defined constraints. Nucleic Acids Res. 2005;33(suppl-2):W465\u20137. https:\/\/doi.org\/10.1093\/nar\/gki458.","journal-title":"Nucleic Acids Res"},{"key":"6220_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12864-020-6707-9","volume":"21","author":"N Scalzitti","year":"2020","unstructured":"Scalzitti N, Jeannin-Girardon A, Collet P, Poch O, Thompson JD. A benchmark study of ab initio gene prediction methods in diverse eukaryotic organisms. BMC Genomics. 2020;21:1\u201320. https:\/\/doi.org\/10.1186\/s12864-020-6707-9.","journal-title":"BMC Genomics"},{"issue":"W1","key":"6220_CR34","doi-asserted-by":"publisher","first-page":"W123","DOI":"10.1093\/nar\/gkt418","volume":"41","author":"KJ Hoff","year":"2013","unstructured":"Hoff KJ, Stanke M. WebAUGUSTUS\u2014a web service for training AUGUSTUS and predicting genes in eukaryotes. Nucleic Acids Res. 2013;41(W1):W123\u20138. https:\/\/doi.org\/10.1093\/nar\/gkt418.","journal-title":"Nucleic Acids Res"},{"issue":"suppl-2","key":"6220_CR35","doi-asserted-by":"publisher","first-page":"W435","DOI":"10.1093\/nar\/gkl200","volume":"34","author":"M Stanke","year":"2006","unstructured":"Stanke M, Keller O, Gunduz I, Hayes A, Waack S, Morgenstern B. AUGUSTUS: ab initio prediction of alternative transcripts. Nucleic Acids Res. 2006;34(suppl-2):W435\u20139. https:\/\/doi.org\/10.1093\/nar\/gkl200.","journal-title":"Nucleic Acids Res"},{"issue":"12","key":"6220_CR36","doi-asserted-by":"publisher","first-page":"btae685","DOI":"10.1093\/bioinformatics\/btae685","volume":"40","author":"L Gabriel","year":"2024","unstructured":"Gabriel L, Becker F, Hoff KJ, Stanke M. Tiberius: end-to-end deep learning with an HMM for gene prediction. Bioinformatics. 2024;40(12):btae685. https:\/\/doi.org\/10.1093\/bioinformatics\/btae685.","journal-title":"Bioinformatics"},{"issue":"3","key":"6220_CR37","doi-asserted-by":"publisher","first-page":"3713","DOI":"10.1007\/s11042-022-13428-4","volume":"82","author":"D Khurana","year":"2023","unstructured":"Khurana D, Koli A, Khatter K, Singh S. Natural language processing: state of the art, current trends and challenges. Multimed Tools Appl. 2023;82(3):3713\u201344. https:\/\/doi.org\/10.1007\/s11042-022-13428-4.","journal-title":"Multimed Tools Appl"},{"issue":"10","key":"6220_CR38","doi-asserted-by":"publisher","first-page":"7112","DOI":"10.1109\/TPAMI.2021.3095381","volume":"44","author":"A Elnaggar","year":"2021","unstructured":"Elnaggar A, Heinzinger M, Dallago C, Rehawi G, Wang Y, Jones L, et al. ProtTrans: toward understanding the language of life through self-supervised learning. IEEE Trans Pattern Anal Mach Intell. 2021;44(10):7112\u201327. https:\/\/doi.org\/10.1109\/TPAMI.2021.3095381.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"6637","key":"6220_CR39","doi-asserted-by":"publisher","first-page":"1123","DOI":"10.1126\/science.ade2574","volume":"379","author":"Z Lin","year":"2023","unstructured":"Lin Z, Akin H, Rao R, Hie B, Zhu Z, Lu W, et al. Evolutionary-scale prediction of atomic-level protein structure with a language model. Science. 2023;379(6637):1123\u201330. https:\/\/doi.org\/10.1126\/science.ade2574.","journal-title":"Science"},{"key":"6220_CR40","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, et\u00a0al. Attention is all you need. In: Guyon I, von Luxburg U, Bengio S, Wallach H, Fergus S, Vishwanathan S, et\u00a0al., editors. Advances in neural information processing systems, vol.\u00a030. Red Hook: Curran Associates, Inc.; 2017. Available from: https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html."},{"issue":"8","key":"6220_CR41","doi-asserted-by":"publisher","first-page":"2102","DOI":"10.1093\/bioinformatics\/btac020","volume":"38","author":"N Brandes","year":"2022","unstructured":"Brandes N, Ofer D, Peleg Y, Rappoport N, Linial M. ProteinBERT: a universal deep-learning model of protein sequence and function. Bioinformatics. 2022;38(8):2102\u201310. https:\/\/doi.org\/10.1093\/bioinformatics\/btac020.","journal-title":"Bioinformatics"},{"issue":"D1","key":"6220_CR42","doi-asserted-by":"publisher","first-page":"D733","DOI":"10.1093\/nar\/gkv1189","volume":"44","author":"NA O\u2019Leary","year":"2015","unstructured":"O\u2019Leary NA, Wright MW, Brister JR, Ciufo S, Haddad D, McVeigh R, et al. Reference sequence (RefSeq) database at NCBI: current status, taxonomic expansion, and functional annotation. Nucleic Acids Res. 2015;44(D1):D733\u201345. https:\/\/doi.org\/10.1093\/nar\/gkv1189.","journal-title":"Nucleic Acids Res"},{"key":"6220_CR43","unstructured":"Thibaud-Nissen F, Souvorov A, Murphy T, DiCuccio M, Kitts P. Eukaryotic genome annotation pipeline. In: The NCBI handbook [Internet], 2nd ed. Bethesda: National Center for Biotechnology Information (US); 2013. Available from: https:\/\/www.ncbi.nlm.nih.gov\/books\/NBK169439\/."},{"key":"6220_CR44","unstructured":"Souvorov A, Kapustin Y, Kiryutin B, Chetvernin V, Tatusova T, Lipman D. Gnomon\u2014NCBI eukaryotic gene prediction tool. National Center for Biotechnology Information; 2010. Available from: https:\/\/www.ncbi.nlm.nih.gov\/core\/assets\/genome\/files\/Gnomon-description.pdf."},{"issue":"4","key":"6220_CR45","doi-asserted-by":"publisher","first-page":"lqad088","DOI":"10.1093\/nargab\/lqad088","volume":"5","author":"F Teufel","year":"2023","unstructured":"Teufel F, G\u00edslason MH, Almagro Armenteros JJ, Johansen AR, Winther O, Nielsen H. GraphPart: homology partitioning for biological sequence analysis. NAR Genomics Bioinform. 2023;5(4):lqad088. https:\/\/doi.org\/10.1093\/nargab\/lqad088.","journal-title":"NAR Genomics Bioinform"},{"issue":"11","key":"6220_CR46","doi-asserted-by":"publisher","first-page":"1026","DOI":"10.1038\/nbt.3988","volume":"35","author":"M Steinegger","year":"2017","unstructured":"Steinegger M, S\u00f6ding J. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nat Biotechnol. 2017;35(11):1026\u20138. https:\/\/doi.org\/10.1038\/nbt.3988.","journal-title":"Nat Biotechnol"},{"key":"6220_CR47","unstructured":"Teufel F, Stahlhut C, Refsgaard J, Nielsen H, Winther O, Madsen D. SecretoGen: towards prediction of signal peptides for efficient protein secretion. In: NeurIPS 2023 generative AI and biology (GenBio) workshop; 2023. Available from: https:\/\/openreview.net\/forum?id=vXXEfmYsvS."},{"key":"6220_CR48","doi-asserted-by":"publisher","first-page":"baaa062","DOI":"10.1093\/database\/baaa062","volume":"2020","author":"CL Schoch","year":"2020","unstructured":"Schoch CL, Ciufo S, Domrachev M, Hotton CL, Kannan S, Khovanskaya R, et al. NCBI taxonomy: a comprehensive update on curation, resources and tools. Database. 2020;2020:baaa062. https:\/\/doi.org\/10.1093\/database\/baaa062.","journal-title":"Database"},{"key":"6220_CR49","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-024-02523-z","author":"H Dalla-Torre","year":"2024","unstructured":"Dalla-Torre H, Gonzalez L, Mendoza-Revilla J, Lopez Carranza N, Grzywaczewski AH, Oteri F, et al. Nucleotide transformer: building and evaluating robust foundation models for human genomics. Nat Methods. 2024. https:\/\/doi.org\/10.1038\/s41592-024-02523-z.","journal-title":"Nat Methods"},{"key":"6220_CR50","unstructured":"ESM. Documentation of ESM-2 from HuggingFace. Available from: https:\/\/huggingface.co\/docs\/transformers\/v4.52.2\/en\/model_doc\/esm#transformers."},{"key":"6220_CR51","doi-asserted-by":"publisher","unstructured":"Kingma DP. Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980. 2014. https:\/\/doi.org\/10.48550\/arXiv.1412.6980.","DOI":"10.48550\/arXiv.1412.6980"},{"key":"6220_CR52","first-page":"55","volume-title":"Neural networks: tricks of the trade","author":"L Prechelt","year":"2002","unstructured":"Prechelt L. Early stopping\u2014But when? In: Orr GB, M\u00fcller KR, editors. Neural networks: tricks of the trade. Berlin: Springer; 2002. p. 55\u201369."},{"issue":"1","key":"6220_CR53","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R. Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res. 2014;15(1):1929\u201358.","journal-title":"J Mach Learn Res"},{"issue":"2","key":"6220_CR54","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1093\/bioinformatics\/bti774","volume":"22","author":"A Morgulis","year":"2005","unstructured":"Morgulis A, Gertz EM, Sch\u00e4ffer AA, Agarwala R. WindowMasker: window-based masker for sequenced genomes. Bioinformatics. 2005;22(2):134\u201341. https:\/\/doi.org\/10.1093\/bioinformatics\/bti774.","journal-title":"Bioinformatics"},{"key":"6220_CR55","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/gb-2006-7-1-r7","volume":"7","author":"D Zhi","year":"2006","unstructured":"Zhi D, Raphael BJ, Price AL, Tang H, Pevzner PA. Identifying repeat domains in large genomes. Genome Biol. 2006;7:1\u201314. https:\/\/doi.org\/10.1186\/gb-2006-7-1-r7.","journal-title":"Genome Biol"},{"key":"6220_CR56","doi-asserted-by":"publisher","first-page":"305","DOI":"10.3389\/fbioe.2019.00305","volume":"7","author":"N Le","year":"2019","unstructured":"Le N, Yapp E, Nagasundaram N, Yeh HY. Classifying promoters by interpreting the hidden information of DNA sequences via deep learning and combination of continuous fasttext N-grams. Front Bioeng Biotechnol. 2019;7:305. https:\/\/doi.org\/10.3389\/fbioe.2019.00305.","journal-title":"Front Bioeng Biotechnol"},{"key":"6220_CR57","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2024.100994","author":"E Richardson","year":"2024","unstructured":"Richardson E, Trevizani R, Greenbaum JA, Carter H, Nielsen M, Peters B. The receiver operating characteristic curve accurately assesses imbalanced datasets. Patterns. 2024. https:\/\/doi.org\/10.1016\/j.patter.2024.100994.","journal-title":"Patterns"},{"issue":"12","key":"6220_CR58","doi-asserted-by":"publisher","first-page":"1925","DOI":"10.1002\/prot.26582","volume":"91","author":"AD Conte","year":"2023","unstructured":"Conte AD, Mehdiabadi M, Bouhraoua A, Miguel Monzon A, Tosatto SC, Piovesan D. Critical assessment of protein intrinsic disorder prediction (CAID)-Results of round 2. Proteins: structure. Funct Bioinform. 2023;91(12):1925\u201334.","journal-title":"Funct Bioinform"},{"issue":"3","key":"6220_CR59","doi-asserted-by":"publisher","first-page":"e92209","DOI":"10.1371\/journal.pone.0092209","volume":"9","author":"J Keilwagen","year":"2014","unstructured":"Keilwagen J, Grosse I, Grau J. Area under precision-recall curves for weighted and unweighted data. PLoS ONE. 2014;9(3):e92209. https:\/\/doi.org\/10.1371\/journal.pone.0092209.","journal-title":"PLoS ONE"},{"issue":"8","key":"6220_CR60","doi-asserted-by":"publisher","first-page":"1159","DOI":"10.1128\/ec.00113-10","volume":"9","author":"A G\u00fcnzl","year":"2010","unstructured":"G\u00fcnzl A. The pre-mRNA splicing machinery of trypanosomes: Complex or simplified? Eukaryot Cell. 2010;9(8):1159\u201370. https:\/\/doi.org\/10.1128\/ec.00113-10.","journal-title":"Eukaryot Cell"},{"key":"6220_CR61","doi-asserted-by":"publisher","first-page":"4016","DOI":"10.1016\/j.csbj.2020.11.058","volume":"18","author":"J Gr\u00fcnebast","year":"2020","unstructured":"Gr\u00fcnebast J, Clos J. Leishmania: responding to environmental signals and challenges without regulated transcription. Comput Struct Biotechnol J. 2020;18:4016\u201323. https:\/\/doi.org\/10.1016\/j.csbj.2020.11.058.","journal-title":"Comput Struct Biotechnol J"},{"issue":"1","key":"6220_CR62","doi-asserted-by":"publisher","first-page":"7407","DOI":"10.1038\/s41467-024-51844-2","volume":"15","author":"R Schmirler","year":"2024","unstructured":"Schmirler R, Heinzinger M, Rost B. Fine-tuning protein language models boosts predictions across diverse tasks. Nat Commun. 2024;15(1):7407. https:\/\/doi.org\/10.1038\/s41467-024-51844-2.","journal-title":"Nat Commun"}],"container-title":["BMC Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06220-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s12859-025-06220-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s12859-025-06220-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T07:06:23Z","timestamp":1757401583000},"score":1,"resource":{"primary":{"URL":"https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/s12859-025-06220-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,19]]},"references-count":62,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["6220"],"URL":"https:\/\/doi.org\/10.1186\/s12859-025-06220-2","relation":{},"ISSN":["1471-2105"],"issn-type":[{"value":"1471-2105","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,19]]},"assertion":[{"value":"1 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"The authors declare that they have no competing interests.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"216"}}