{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T13:09:17Z","timestamp":1770556157955,"version":"3.49.0"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031902512","type":"print"},{"value":"9783031902529","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-90252-9_7","type":"book-chapter","created":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T20:03:48Z","timestamp":1745525028000},"page":"99-117","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A Phylogenetic Approach to\u00a0Genomic Language Modeling"],"prefix":"10.1007","author":[{"given":"Carlos","family":"Albors","sequence":"first","affiliation":[]},{"given":"Jianan Canal","family":"Li","sequence":"additional","affiliation":[]},{"given":"Gonzalo","family":"Benegas","sequence":"additional","affiliation":[]},{"given":"Chengzhong","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Yun S.","family":"Song","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Benegas, G., Albors, C., Aw, A.J., Ye, C., Song, Y.S.: A DNA language model based on multispecies alignment predicts the effects of genome-wide variants. Nat. Biotechnol. (2025). https:\/\/doi.org\/10.1038\/s41587-024-02511-w","DOI":"10.1038\/s41587-024-02511-w"},{"key":"7_CR2","doi-asserted-by":"publisher","unstructured":"Benegas, G., Eraslan, G., Song, Y.S.: Benchmarking DNA sequence models for causal regulatory variant prediction in human genetics. bioRxiv preprint (2025). https:\/\/doi.org\/10.1101\/2025.02.11.637758","DOI":"10.1101\/2025.02.11.637758"},{"key":"7_CR3","doi-asserted-by":"publisher","unstructured":"Benegas, G., Ye, C., Albors, C., Li, J.C., Song, Y.S.: Genomic language models: opportunities and challenges. Trends Genet. (2025). https:\/\/doi.org\/10.1016\/j.tig.2024.11.013","DOI":"10.1016\/j.tig.2024.11.013"},{"key":"7_CR4","unstructured":"Bommasani, R., et\u00a0al.: On the opportunities and risks of foundation models. arXiv preprint: arXiv:2108.07258 (2021)"},{"key":"7_CR5","doi-asserted-by":"publisher","first-page":"1512","DOI":"10.1038\/s41588-023-01465-0","volume":"55","author":"N Brandes","year":"2023","unstructured":"Brandes, N., Goldman, G., Wang, C.H., Ye, C.J., Ntranos, V.: Genome-wide prediction of disease variant effects with a deep protein language model. Nat. Genet. 55, 1512\u20131522 (2023). https:\/\/doi.org\/10.1038\/s41588-023-01465-0","journal-title":"Nat. Genet."},{"key":"7_CR6","doi-asserted-by":"publisher","unstructured":"Chen, S., et\u00a0al.: A genomic mutational constraint map using variation in 76,156 human genomes. Nature 625, 92\u2013100 (2024). https:\/\/doi.org\/10.1038\/s41586-023-06045-0","DOI":"10.1038\/s41586-023-06045-0"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, J., Novati, G., et\u00a0al.: Accurate proteome-wide missense variant effect prediction with alphamissense. Science 381(6664), eadg7492 (2023)","DOI":"10.1126\/science.adg7492"},{"key":"7_CR8","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1038\/s41592-024-02523-z","volume":"22","author":"H Dalla-Torre","year":"2025","unstructured":"Dalla-Torre, H., Gonzalez, L., Mendoza-Revilla, J., Lopez Carranza, N., Grzywaczewski, A.H., et al.: Nucleotide Transformer: building and evaluating robust foundation models for human genomics. Nat. Methods 22, 287\u2013297 (2025)","journal-title":"Nat. Methods"},{"key":"7_CR9","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint: arXiv:1810.04805 (2018)"},{"issue":"3","key":"7_CR10","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1093\/sysbio\/22.3.240","volume":"22","author":"J Felsenstein","year":"1973","unstructured":"Felsenstein, J.: Maximum likelihood and minimum-steps methods for estimating evolutionary trees from data on discrete characters. Syst. Biol. 22(3), 240\u2013249 (1973)","journal-title":"Syst. Biol."},{"key":"7_CR11","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1007\/BF01734359","volume":"17","author":"J Felsenstein","year":"1981","unstructured":"Felsenstein, J.: Evolutionary trees from DNA sequences: a maximum likelihood approach. J. Mol. Evol. 17, 368\u2013376 (1981). https:\/\/doi.org\/10.1007\/BF01734359","journal-title":"J. Mol. Evol."},{"key":"7_CR12","unstructured":"Felsenstein, J.: Inferring Phylogenies. Sinauer Associates (2003)"},{"key":"7_CR13","doi-asserted-by":"publisher","unstructured":"Fishman, V., et\u00a0al.: GENA-LM: A family of open-source foundational DNA language models for long sequences. bioRxiv preprint (2023). https:\/\/doi.org\/10.1101\/2023.06.12.544594","DOI":"10.1101\/2023.06.12.544594"},{"issue":"1","key":"7_CR14","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1186\/s13059-023-02986-x","volume":"24","author":"DM Fowler","year":"2023","unstructured":"Fowler, D.M., et al.: An atlas of variant effects to understand the genome at nucleotide resolution. Genome Biol. 24(1), 147 (2023). https:\/\/doi.org\/10.1186\/s13059-023-02986-x","journal-title":"Genome Biol."},{"key":"7_CR15","unstructured":"Gao, H., et\u00a0al.: The landscape of tolerated genetic variation in humans and primates. Science 380(6648), eabn8153 (2023)"},{"key":"7_CR16","unstructured":"Gu, A., Dao, T.: Mamba: linear-time sequence modeling with selective state spaces. In: First Conference on Language Modeling (2024). https:\/\/openreview.net\/forum?id=tEYskw1VY2"},{"issue":"1","key":"7_CR17","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1186\/s13059-023-03024-6","volume":"24","author":"M Jagota","year":"2023","unstructured":"Jagota, M., et al.: Cross-protein transfer learning substantially improves disease variant prediction. Genome Biol. 24(1), 182 (2023)","journal-title":"Genome Biol."},{"issue":"15","key":"7_CR18","doi-asserted-by":"publisher","first-page":"2112","DOI":"10.1093\/bioinformatics\/btab083","volume":"37","author":"Y Ji","year":"2021","unstructured":"Ji, Y., Zhou, Z., Liu, H., Davuluri, R.V.: DNABERT: pre-trained bidirectional encoder representations from transformers model for DNA-language in genome. Bioinformatics 37(15), 2112\u20132120 (2021). https:\/\/doi.org\/10.1093\/bioinformatics\/btab083","journal-title":"Bioinformatics"},{"key":"7_CR19","unstructured":"Kalchbrenner, N., et\u00a0al.: Neural machine translation in linear time. arXiv preprint: https:\/\/arxiv.org\/abs\/1610.10099 (2017)"},{"key":"7_CR20","doi-asserted-by":"publisher","first-page":"735","DOI":"10.1038\/s41586-023-06798-8","volume":"625","author":"L Kuderna","year":"2024","unstructured":"Kuderna, L., et al.: Identification of constrained sequence elements across 239 primate genomes. Nature 625, 735\u2013742 (2024). https:\/\/doi.org\/10.1038\/s41586-023-06798-8","journal-title":"Nature"},{"key":"7_CR21","doi-asserted-by":"publisher","first-page":"D1062","DOI":"10.1093\/nar\/gkx1153","volume":"46","author":"MJ Landrum","year":"2018","unstructured":"Landrum, M.J., Lee, J.M., Benson, M., Brown, G.R., Chao, C., et al.: ClinVar: improving access to variant interpretations and supporting evidence. Nucleic Acids Res. 46, D1062\u2013D1067 (2018). https:\/\/doi.org\/10.1093\/nar\/gkx1153","journal-title":"Nucleic Acids Res."},{"key":"7_CR22","unstructured":"Marin, F.I., et\u00a0al.: BEND: benchmarking DNA language models on biologically meaningful tasks. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=uKB4cFNQFg"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Meier, J., et\u00a0al.: Language models enable zero-shot prediction of the effects of mutations on protein function. In: Advances in Neural Information Processing Systems, vol.\u00a034 (2021)","DOI":"10.1101\/2021.07.09.450648"},{"key":"7_CR24","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint: https:\/\/arxiv.org\/abs\/1301.3781 (2013)"},{"key":"7_CR25","unstructured":"Nguyen, E., et\u00a0al.: HyenaDNA: long-range genomic sequence modeling at single nucleotide resolution. In: Advances in Neural Information Processing Systems, vol.\u00a036 (2024)"},{"issue":"10","key":"7_CR26","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"SJ Pan","year":"2010","unstructured":"Pan, S.J., Yang, Q.: A survey on transfer learning. IEEE Trans. Knowl. Data Eng. 22(10), 1345\u20131359 (2010)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"1","key":"7_CR27","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1101\/gr.097857.109","volume":"20","author":"KS Pollard","year":"2009","unstructured":"Pollard, K.S., Hubisz, M.J., Rosenbloom, K.R., Siepel, A.: Detection of nonneutral substitution rates on mammalian phylogenies. Genome Res. 20(1), 110\u2013121 (2009). https:\/\/doi.org\/10.1101\/gr.097857.109","journal-title":"Genome Res."},{"key":"7_CR28","unstructured":"Schiff, Y., et\u00a0al.: Caduceus: Bi-directional equivariant long-range DNA sequence modeling. arXiv preprint: arXiv:2403.03234 (2024)"},{"key":"7_CR29","doi-asserted-by":"publisher","unstructured":"Schubach, M., Mass, T., Nazaretyan, L., R\u00f6ner, S., Kircher, M.: CADD v1.7: using protein language models, regulatory CNNs and other nucleotide-level scores to improve genome-wide variant predictions. Nucleic Acids Res. 52(D1), D1143\u2013D1154 (2024). https:\/\/doi.org\/10.1093\/nar\/gkad989","DOI":"10.1093\/nar\/gkad989"},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Haddow, B., Birch, A.: Neural machine translation of rare words with subword units. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1715\u20131725. Association for Computational Linguistics (2016)","DOI":"10.18653\/v1\/P16-1162"},{"key":"7_CR31","doi-asserted-by":"publisher","unstructured":"Shrikumar, A., Greenside, P., Kundaje, A.: Reverse-complement parameter sharing improves deep learning models for genomics. bioRxiv preprint (2017). https:\/\/doi.org\/10.1101\/103663","DOI":"10.1101\/103663"},{"issue":"8","key":"7_CR32","doi-asserted-by":"publisher","first-page":"1034","DOI":"10.1101\/gr.3715005","volume":"15","author":"A Siepel","year":"2005","unstructured":"Siepel, A., et al.: Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 15(8), 1034\u20131050 (2005)","journal-title":"Genome Res."},{"issue":"3","key":"7_CR33","doi-asserted-by":"publisher","first-page":"595","DOI":"10.1016\/j.ajhg.2016.07.005","volume":"99","author":"D Smedley","year":"2016","unstructured":"Smedley, D., et al.: A whole-genome analysis framework for effective identification of pathogenic regulatory variants in mendelian disease. Am. J. Hum. Genet. 99(3), 595\u2013606 (2016)","journal-title":"Am. J. Hum. Genet."},{"issue":"5","key":"7_CR34","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1016\/j.tree.2020.01.005","volume":"35","author":"SD Smith","year":"2020","unstructured":"Smith, S.D., Pennell, M.W., Dunn, C.W., Edwards, S.V.: Phylogenetics is the new genetics (for most of biodiversity). Trends Ecol. Evol. 35(5), 415\u2013425 (2020). https:\/\/doi.org\/10.1016\/j.tree.2020.01.005","journal-title":"Trends Ecol. Evol."},{"key":"7_CR35","doi-asserted-by":"publisher","unstructured":"Sullivan, P.F., et\u00a0al.: Leveraging base-pair mammalian constraint to understand genetic variation and human disease. Science 380(6643) (2023). https:\/\/doi.org\/10.1126\/science.abn2937","DOI":"10.1126\/science.abn2937"},{"key":"7_CR36","unstructured":"Tavar\u00e9, S.: Some probabilistic and statistical problems in the analysis of DNA sequences. In: Lectures on Mathematics in the Life Sciences, vol.\u00a017, pp. 57\u201386. American Mathematical Society (1986)"},{"key":"7_CR37","unstructured":"Vaswani, A., et\u00a0al.: Attention is all you need (2023). https:\/\/arxiv.org\/abs\/1706.03762"},{"key":"7_CR38","doi-asserted-by":"publisher","unstructured":"Vishniakov, K., et\u00a0al.: Genomic foundationless models: Pretraining does not promise performance. bioRxiv preprint (2024). https:\/\/doi.org\/10.1101\/2024.12.18.628606. posted December 20, 2024","DOI":"10.1101\/2024.12.18.628606"},{"key":"7_CR39","doi-asserted-by":"crossref","unstructured":"Weinstein, E.N., Amin, A.N., Frazer, J., Marks, D.S.: Non-identifiability and the blessings of misspecification in models of molecular fitness. In: Advances in Neural Information Processing Systems (2022)","DOI":"10.1101\/2022.01.29.478324"},{"key":"7_CR40","doi-asserted-by":"publisher","unstructured":"Yan, H., Hu, Z., Thomas, G.W.C., Edwards, S.V., Sackton, T.B. et\u00a0al.: PhyloAcc-GT: a Bayesian method for inferring patterns of substitution rate shifts on targeted lineages accounting for gene tree discordance. Mol. Biol. Evol. 40(9), msad195 (2023). https:\/\/doi.org\/10.1093\/molbev\/msad195","DOI":"10.1093\/molbev\/msad195"},{"key":"7_CR41","doi-asserted-by":"publisher","unstructured":"Yang, K.K., Fusi, N., Lu, A.X.: Convolutions are competitive with transformers for protein sequence pretraining. Cell Syst. 15(3), 286\u2013294.e2 (2024). https:\/\/doi.org\/10.1016\/j.cels.2024.01.008","DOI":"10.1016\/j.cels.2024.01.008"},{"key":"7_CR42","doi-asserted-by":"crossref","unstructured":"Yang, Z.: Computational Molecular Evolution. Oxford University Press (2006)","DOI":"10.1093\/acprof:oso\/9780198567028.001.0001"},{"key":"7_CR43","unstructured":"Zhou, Z., et\u00a0al.: DNABERT-2: efficient foundation model and benchmark for multi-species genomes. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=oMLQB4EZE1"},{"key":"7_CR44","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1038\/s41586-020-2876-6","volume":"587","author":"Zoonomia Consortium","year":"2020","unstructured":"Zoonomia Consortium: A comparative genomics multitool for scientific discovery and conservation. Nature 587, 240\u2013245 (2020). https:\/\/doi.org\/10.1038\/s41586-020-2876-6","journal-title":"Nature"}],"container-title":["Lecture Notes in Computer Science","Research in Computational Molecular Biology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-90252-9_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T20:03:56Z","timestamp":1745525036000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-90252-9_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031902512","9783031902529"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-90252-9_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"25 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The code to reproduce the results of our log-likelihood-based evaluation is available at . The model is available at .","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code and Model Availability"}},{"value":"RECOMB","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Research in Computational Molecular Biology","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Seoul","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"recomb2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/recomb.org\/recomb2025\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}