{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T14:23:11Z","timestamp":1780410191942,"version":"3.54.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T00:00:00Z","timestamp":1769040000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T00:00:00Z","timestamp":1771372800000},"content-version":"vor","delay-in-days":27,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-026-02363-5","type":"journal-article","created":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T21:04:45Z","timestamp":1769115885000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Large language models improve transferability of electronic health record-based predictions across countries and coding systems"],"prefix":"10.1038","volume":"9","author":[{"given":"Matthias","family":"Kirchler","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matteo","family":"Ferro","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Veronica","family":"Lorenzini","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Robin P.","family":"van de Water","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"name":"FinnGen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andrea","family":"Ganna","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Christoph","family":"Lippert","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andrea","family":"Ganna","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,1,22]]},"reference":[{"key":"2363_CR1","doi-asserted-by":"publisher","first-page":"R1","DOI":"10.1186\/bcr2464","volume":"12","author":"GC Wishart","year":"2010","unstructured":"Wishart, G. C. et al. PREDICT: a new UK prognostic model that predicts survival following surgery for invasive breast cancer. Breast Cancer Res. 12, R1 (2010).","journal-title":"Breast Cancer Res."},{"key":"2363_CR2","doi-asserted-by":"crossref","unstructured":"SCORE2 working group and ESC Cardiovascular risk collaboration. SCORE2 risk prediction algorithms: new models to estimate 10-year risk of cardiovascular disease in Europe. Eur. Heart J. 42, 2439\u20132454 (2021).","DOI":"10.1093\/eurheartj\/ehab309"},{"key":"2363_CR3","doi-asserted-by":"publisher","first-page":"1708","DOI":"10.1038\/s41436-018-0406-9","volume":"21","author":"A Lee","year":"2019","unstructured":"Lee, A. et al. BOADICEA: a comprehensive breast cancer risk prediction model incorporating genetic and nongenetic risk factors. Genet. Med. 21, 1708\u20131718 (2019).","journal-title":"Genet. Med."},{"key":"2363_CR4","doi-asserted-by":"publisher","first-page":"1113","DOI":"10.1038\/s41591-023-02332-5","volume":"29","author":"D Placido","year":"2023","unstructured":"Placido, D. et al. A deep learning algorithm to predict risk of pancreatic cancer from disease trajectories. Nat. Med. 29, 1113\u20131122 (2023).","journal-title":"Nat. Med."},{"key":"2363_CR5","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1016\/S0140-6736(22)02079-7","volume":"401","author":"IS Forrest","year":"2023","unstructured":"Forrest, I. S. et al. Machine learning-based marker for coronary artery disease: derivation and validation in two longitudinal cohorts. Lancet Lond. Engl. 401, 215\u2013225 (2023).","journal-title":"Lancet Lond. Engl."},{"key":"2363_CR6","doi-asserted-by":"publisher","first-page":"1155","DOI":"10.1016\/j.jacc.2022.01.021","volume":"79","author":"BO Petrazzini","year":"2022","unstructured":"Petrazzini, B. O. et al. Coronary risk estimation based on clinical data in electronic health records. J. Am. Coll. Cardiol. 79, 1155\u20131166 (2022).","journal-title":"J. Am. Coll. Cardiol."},{"key":"2363_CR7","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-018-36745-x","volume":"9","author":"J Zhao","year":"2019","unstructured":"Zhao, J. et al. Learning from longitudinal data in electronic health record and genetic data to improve cardiovascular event prediction. Sci. Rep. 9, 717 (2019).","journal-title":"Sci. Rep."},{"key":"2363_CR8","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1093\/jamia\/ocu023","volume":"22","author":"EA Voss","year":"2015","unstructured":"Voss, E. A. et al. Feasibility and utility of applications of the common data model to multiple, disparate observational health databases. J. Am. Med. Inform. Assoc. JAMIA 22, 553\u2013564 (2015).","journal-title":"J. Am. Med. Inform. Assoc. JAMIA"},{"key":"2363_CR9","unstructured":"Choi, Y., Chiu, C. Y.-I. & Sontag, D. Learning low-dimensional representations of medical concepts. AMIA Jt Summits Transl. Sci. Proc. 2016, 41\u201350 (2016)."},{"key":"2363_CR10","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-019-11069-0","volume":"10","author":"CA Nelson","year":"2019","unstructured":"Nelson, C. A., Butte, A. J. & Baranzini, S. E. Integrating biomedical research and electronic health records to create knowledge-based biologically meaningful machine-readable embeddings. Nat. Commun. 10, 3045 (2019).","journal-title":"Nat. Commun."},{"key":"2363_CR11","doi-asserted-by":"publisher","first-page":"ooab022","DOI":"10.1093\/jamiaopen\/ooab022","volume":"4","author":"A Finch","year":"2021","unstructured":"Finch, A. et al. Exploiting hierarchy in medical concept embedding. JAMIA Open 4, ooab022 (2021).","journal-title":"JAMIA Open"},{"key":"2363_CR12","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1038\/s41746-020-0301-z","volume":"3","author":"I Landi","year":"2020","unstructured":"Landi, I. et al. Deep representation learning of electronic health records to unlock patient stratification at scale. NPJ Digit. Med. 3, 96 (2020).","journal-title":"NPJ Digit. Med."},{"key":"2363_CR13","unstructured":"Choi, E., Xiao, C., Stewart, W. F. & Sun, J. MiME: Multilevel medical embedding of electronic health records for predictive healthcare. Adv. Neur. Inform. Process. Syst. 31 (2018)."},{"key":"2363_CR14","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1007\/s41666-023-00157-y","volume":"8","author":"D Vithanage","year":"2024","unstructured":"Vithanage, D., Yu, P., Wang, L. & Deng, C. Contextual word embedding for biomedical knowledge extraction: a rapid review and case study. J. Healthc. Inform. Res. 8, 158\u2013179 (2024).","journal-title":"J. Healthc. Inform. Res."},{"key":"2363_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-021-00455-y","volume":"4","author":"L Rasmy","year":"2021","unstructured":"Rasmy, L., Xiang, Y., Xie, Z., Tao, C. & Zhi, D. Med-BERT: pretrained contextualized embeddings on large-scale structured electronic health records for disease prediction. Npj Digit. Med. 4, 1\u201313 (2021).","journal-title":"Npj Digit. Med."},{"key":"2363_CR16","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-020-62922-y","volume":"10","author":"Y Li","year":"2020","unstructured":"Li, Y. et al. BEHRT: transformer for electronic health records. Sci. Rep. 10, 7155 (2020).","journal-title":"Sci. Rep."},{"key":"2363_CR17","unstructured":"Pang, C. et al. CEHR-BERT: Incorporating temporal information from structured EHR data to improve prediction tasks. Proc. Machine Learning for Health 158, 239\u2013260 (2021)."},{"key":"2363_CR18","doi-asserted-by":"publisher","unstructured":"Hegselmann, S. et al. Large Language Models are Powerful EHR Encoders. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2502.17403 (2025).","DOI":"10.48550\/arXiv.2502.17403"},{"key":"2363_CR19","doi-asserted-by":"publisher","unstructured":"Johnson, R. et al. Unified clinical vocabulary embeddings for advancing precision. Preprint at https:\/\/doi.org\/10.1101\/2024.12.03.24318322 (2024).","DOI":"10.1101\/2024.12.03.24318322"},{"key":"2363_CR20","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-023-00774-9","volume":"10","author":"B Abu-Salih","year":"2023","unstructured":"Abu-Salih, B. et al. Healthcare knowledge graph construction: a systematic review of the state-of-the-art, open issues, and opportunities. J. Big Data 10, 81 (2023).","journal-title":"J. Big Data"},{"key":"2363_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-021-00426-3","volume":"4","author":"BK Beaulieu-Jones","year":"2021","unstructured":"Beaulieu-Jones, B. K. et al. Machine learning for patient risk stratification: standing on, or looking over, the shoulders of clinicians? Npj Digit. Med. 4, 1\u20136 (2021).","journal-title":"Npj Digit. Med."},{"key":"2363_CR22","doi-asserted-by":"crossref","unstructured":"Hur, K. et al. Unifying heterogeneous electronic health records systems via text-based code embedding. ACM Conference on Health, Inference, and Learning (2021).","DOI":"10.2196\/preprints.32523"},{"key":"2363_CR23","doi-asserted-by":"crossref","unstructured":"Hur, K. et al. GenHPF: general healthcare predictive framework for multi-task multi-source learning. In IEEE J. Biomed. Health Inform (IEEE, 2023).","DOI":"10.1109\/JBHI.2023.3327951"},{"key":"2363_CR24","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1038\/s41746-025-01777-x","volume":"8","author":"SA Lee","year":"2025","unstructured":"Lee, S. A. et al. Clinical decision support using pseudo-notes from multiple streams of EHR data. npj Digit. Med. 8, 394 (2025).","journal-title":"npj Digit. Med."},{"key":"2363_CR25","doi-asserted-by":"crossref","unstructured":"Agrawal, M., Hegselmann, S., Lang, H., Kim, Y. & Sontag, D. Large language models are few-shot clinical information extractors. In Proc. 2022 Conference on Empirical Methods in Natural Language Processing, 1998\u20132022 (Association for Computational Linguistics, Abu Dhabi, United Arab Emirates, 2022).","DOI":"10.18653\/v1\/2022.emnlp-main.130"},{"key":"2363_CR26","doi-asserted-by":"crossref","unstructured":"Chen, T. & Guestrin, C. XGBoost: a scalable tree boosting system. In Proc. 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining 785\u2013794 (Association for Computing Machinery, 2016).","DOI":"10.1145\/2939672.2939785"},{"key":"2363_CR27","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1038\/s41586-018-0579-z","volume":"562","author":"C Bycroft","year":"2018","unstructured":"Bycroft, C. et al. The UK Biobank resource with deep phenotyping and genomic data. Nature 562, 203\u2013209 (2018).","journal-title":"Nature"},{"key":"2363_CR28","doi-asserted-by":"publisher","first-page":"508","DOI":"10.1038\/s41586-022-05473-8","volume":"613","author":"MI Kurki","year":"2023","unstructured":"Kurki, M. I. et al. FinnGen provides genetic insights from a well-phenotyped isolated population. Nature 613, 508\u2013518 (2023).","journal-title":"Nature"},{"key":"2363_CR29","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1016\/j.inffus.2021.11.011","volume":"81","author":"R Shwartz-Ziv","year":"2022","unstructured":"Shwartz-Ziv, R. & Armon, A. Tabular data: Deep learning is not all you need. Inf. Fusion 81, 84\u201390 (2022).","journal-title":"Inf. Fusion"},{"key":"2363_CR30","unstructured":"Grinsztajn, L., Oyallon, E. & Varoquaux, G. Why do tree-based models still outperform deeplearning on typical tabular data? In Proc. 36th International Conference on Neural Information Processing Systems (NIPS \u201922) 37, 507\u2013520 (Curran Associates Inc, Red Hook, NY, USA, 2022)."},{"key":"2363_CR31","doi-asserted-by":"crossref","unstructured":"Yang, X. et al. A large language model for electronic health records. npj Digit. Med. 5,194 (2022).","DOI":"10.1038\/s41746-022-00742-2"},{"key":"2363_CR32","doi-asserted-by":"crossref","unstructured":"Liu, F. et al. Self-Alignment Pretraining for Biomedical Entity Representations. In Proc. 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 4228\u20134238, Online (Association for Computational Linguistics, 2021).","DOI":"10.18653\/v1\/2021.naacl-main.334"},{"key":"2363_CR33","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1016\/j.jad.2020.01.141","volume":"266","author":"GE Hunt","year":"2020","unstructured":"Hunt, G. E., Malhi, G. S., Lai, H. M. X. & Cleary, M. Prevalence of comorbid substance use in major depressive disorder in community and clinical settings, 1990-2019: Systematic review and meta-analysis. J. Affect. Disord. 266, 288\u2013304 (2020).","journal-title":"J. Affect. Disord."},{"key":"2363_CR34","doi-asserted-by":"publisher","first-page":"2152","DOI":"10.1016\/j.ajhg.2022.10.009","volume":"109","author":"N Mars","year":"2022","unstructured":"Mars, N. et al. Systematic comparison of family history and polygenic risk across 24 common diseases. Am. J. Hum. Genet. 109, 2152\u20132162 (2022).","journal-title":"Am. J. Hum. Genet."},{"key":"2363_CR35","doi-asserted-by":"publisher","first-page":"2708","DOI":"10.1038\/s41591-024-03199-w","volume":"30","author":"JN Kather","year":"2024","unstructured":"Kather, J. N., Ferber, D., Wiest, I. C., Gilbert, S. & Truhn, D. Large language models could make natural language again the universal interface of healthcare. Nat. Med. 30, 2708\u20132710 (2024).","journal-title":"Nat. Med."},{"key":"2363_CR36","doi-asserted-by":"publisher","unstructured":"Dorfner, F. J. et al. Biomedical large languages models seem not to be superior to generalist models on unseen medical data. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2408.13833 (2024).","DOI":"10.48550\/arXiv.2408.13833"},{"key":"2363_CR37","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1038\/s41586-025-09529-3","volume":"647","author":"A Shmatko","year":"2025","unstructured":"Shmatko, A. et al. Learning the natural history of human disease with generative transformers. Nature 647, 248\u2013256 (2025).","journal-title":"Nature"},{"key":"2363_CR38","unstructured":"Oufattole, N. et al. MEDS-torch: an ML pipeline for inductive experiments for EHR medical foundation models. In NeurIPS Workshop on Time Series in the Age of Large Models (2024)."},{"key":"2363_CR39","doi-asserted-by":"publisher","unstructured":"Ranjan, R., Gupta, S. & Singh, S. N. A comprehensive survey of bias in LLMs: current landscape and future directions. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2409.16430 (2024).","DOI":"10.48550\/arXiv.2409.16430"},{"key":"2363_CR40","doi-asserted-by":"publisher","unstructured":"Taubenfeld, A., Dover, Y., Reichart, R. & Goldstein, A. Systematic biases in LLM simulations of debates. In Proc. 2024 Conference on Empirical Methods in Natural Language Processing (eds Al-Onaizan, Y., Bansal, M. & Chen, Y.-N.) 251\u2013267. https:\/\/doi.org\/10.18653\/v1\/2024.emnlp-main.16 (Association for Computational Linguistics, 2024).","DOI":"10.18653\/v1\/2024.emnlp-main.16"},{"key":"2363_CR41","doi-asserted-by":"publisher","first-page":"e078378","DOI":"10.1136\/bmj-2023-078378","volume":"385","author":"GS Collins","year":"2024","unstructured":"Collins, G. S. et al. TRIPOD\u2009+\u2009AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods. BMJ 385, e078378 (2024).","journal-title":"BMJ"},{"key":"2363_CR42","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1038\/s41467-025-55879-x","volume":"16","author":"J Steinfeldt","year":"2025","unstructured":"Steinfeldt, J. et al. Medical history predicts phenome-wide disease onset and enables the rapid response to emerging health threats. Nat. Commun. 16, 585 (2025).","journal-title":"Nat. Commun."},{"key":"2363_CR43","doi-asserted-by":"publisher","unstructured":"Neelakantan, A. et al. Text and code embeddings by contrastive pre-training. Preprint at https:\/\/doi.org\/10.48550\/arXiv.2201.10005 (2022).","DOI":"10.48550\/arXiv.2201.10005"},{"key":"2363_CR44","unstructured":"Vaswani, A. et al. Attention is all you need. In Proc. 31st International Conference on Neural Information Processing Systems (NIPS\u201917). 6000\u20136010 (Curran Associates Inc, Red Hook, NY, USA, 2023)."},{"key":"2363_CR45","doi-asserted-by":"publisher","unstructured":"Ba, J. L., Kiros, J. R. & Hinton, G. E. Layer normalization. Preprint at https:\/\/doi.org\/10.48550\/arXiv.1607.06450 (2016).","DOI":"10.48550\/arXiv.1607.06450"},{"key":"2363_CR46","unstructured":"Loshchilov, I. & Hutter, F. Decoupled weight decay regularization. International Conference on Learning Representations (2017)."},{"key":"2363_CR47","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1111\/j.2517-6161.1972.tb00899.x","volume":"34","author":"DR Cox","year":"1972","unstructured":"Cox, D. R. Regression models and life-tables. J. R. Stat. Soc. Ser. B Methodol. 34, 187\u2013220 (1972).","journal-title":"J. R. Stat. Soc. Ser. B Methodol."},{"key":"2363_CR48","doi-asserted-by":"publisher","DOI":"10.1186\/s12874-018-0482-1","volume":"18","author":"JL Katzman","year":"2018","unstructured":"Katzman, J. L. et al. DeepSurv: personalized treatment recommender system using a Cox proportional hazards deep neural network. BMC Med. Res. Methodol. 18, 24 (2018).","journal-title":"BMC Med. Res. Methodol."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02363-5","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02363-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02363-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T15:03:25Z","timestamp":1771427005000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-026-02363-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,22]]},"references-count":48,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["2363"],"URL":"https:\/\/doi.org\/10.1038\/s41746-026-02363-5","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,22]]},"assertion":[{"value":"12 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A.G. is the founder of Real World Genetics Oy. The other authors do not have a competing interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"177"}}