{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T09:44:22Z","timestamp":1779270262891,"version":"3.51.4"},"reference-count":93,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T00:00:00Z","timestamp":1747440000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T00:00:00Z","timestamp":1747440000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100000051","name":"U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute","doi-asserted-by":"publisher","award":["RM1HG011558"],"award-info":[{"award-number":["RM1HG011558"]}],"id":[{"id":"10.13039\/100000051","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"U.S. Department of Health & Human Services | NIH | National Institute on Aging","doi-asserted-by":"publisher","award":["R01AG080429"],"award-info":[{"award-number":["R01AG080429"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"U.S. Department of Health & Human Services | NIH | National Institute on Aging","doi-asserted-by":"publisher","award":["RF1AG072799"],"award-info":[{"award-number":["RF1AG072799"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"U.S. Department of Health & Human Services | NIH | National Institute on Aging","doi-asserted-by":"publisher","award":["R01AG084236"],"award-info":[{"award-number":["R01AG084236"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"U.S. Department of Health & Human Services | NIH | National Institute on Aging","doi-asserted-by":"publisher","award":["R01AG083039"],"award-info":[{"award-number":["R01AG083039"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"U.S. Department of Health & Human Services | NIH | National Institute on Aging","doi-asserted-by":"publisher","award":["RF1AG072799"],"award-info":[{"award-number":["RF1AG072799"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-01645-8","type":"journal-article","created":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T01:56:55Z","timestamp":1747447015000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Social determinants of health extraction from clinical notes across institutions using large language models"],"prefix":"10.1038","volume":"8","author":[{"given":"Vipina K.","family":"Keloth","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Salih","family":"Selek","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingyu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christopher","family":"Gilman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1691-5179","authenticated-orcid":false,"given":"Sunyang","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4014-2957","authenticated-orcid":false,"given":"Yifang","family":"Dang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinghan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyue","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yujia","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huan","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6349-3752","authenticated-orcid":false,"given":"Jungwei W.","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7866-9498","authenticated-orcid":false,"given":"Karen","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cynthia","family":"Brandt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cui","family":"Tao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2570-3741","authenticated-orcid":false,"given":"Hongfang","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hua","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,17]]},"reference":[{"key":"1645_CR1","doi-asserted-by":"publisher","first-page":"1456","DOI":"10.2105\/AJPH.2010.300086","volume":"101","author":"S Galea","year":"2011","unstructured":"Galea, S., Tracy, M., Hoggatt, K. J., DiMaggio, C. & Karpati, A. Estimated deaths attributable to social factors in the United States. Am. J. Public Health 101, 1456\u20131465 (2011).","journal-title":"Am. J. Public Health"},{"key":"1645_CR2","doi-asserted-by":"publisher","first-page":"1661","DOI":"10.1016\/S0140-6736(08)61690-6","volume":"372","author":"M Marmot","year":"2008","unstructured":"Marmot, M. et al. Closing the gap in a generation: health equity through action on the social determinants of health. Lancet 372, 1661\u20131669 (2008).","journal-title":"Lancet"},{"key":"1645_CR3","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1377\/hlthaff.2009.0730","volume":"29","author":"GK Singh","year":"2010","unstructured":"Singh, G. K., Siahpush, M. & Kogan, M. D. Neighborhood socioeconomic conditions, built environments, and childhood obesity. Health Aff. (Millwood) 29, 503\u2013512 (2010).","journal-title":"Health Aff. (Millwood)"},{"key":"1645_CR4","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1016\/S0749-3797(98)00017-8","volume":"14","author":"VJ Felitti","year":"1998","unstructured":"Felitti, V. J. et al. Relationship of childhood abuse and household dysfunction to many of the leading causes of death in adults: The Adverse Childhood Experiences (ACE) Study. Am. J. Prev. Med. 14, 245\u2013258 (1998).","journal-title":"Am. J. Prev. Med"},{"key":"1645_CR5","unstructured":"Healthy People 2030, https:\/\/health.gov\/healthypeople\/priority-areas\/social-determinants-health (2023)."},{"key":"1645_CR6","doi-asserted-by":"publisher","first-page":"2716","DOI":"10.1093\/jamia\/ocab170","volume":"28","author":"BG Patra","year":"2021","unstructured":"Patra, B. G. et al. Extracting social determinants of health from electronic health records using natural language processing: A systematic review. J. Am. Med Inf. Assoc. 28, 2716\u20132727 (2021).","journal-title":"J. Am. Med Inf. Assoc."},{"key":"1645_CR7","doi-asserted-by":"publisher","first-page":"e1911513","DOI":"10.1001\/jamanetworkopen.2019.11513","volume":"2","author":"R Gold","year":"2019","unstructured":"Gold, R. & Gottlieb, L. National data on social risk screening underscore the need for implementation research. JAMA Netw. Open 2, e1911513\u2013e1911513 (2019).","journal-title":"JAMA Netw. Open"},{"key":"1645_CR8","doi-asserted-by":"publisher","first-page":"315","DOI":"10.3122\/jabfm.2018.03.170249","volume":"31","author":"L Gottlieb","year":"2018","unstructured":"Gottlieb, L. et al. Advancing social prescribing with implementation science. J. Am. Board Fam. Med. 31, 315\u2013321 (2018).","journal-title":"J. Am. Board Fam. Med."},{"key":"1645_CR9","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1370\/afm.2275","volume":"16","author":"R Gold","year":"2018","unstructured":"Gold, R. et al. Adoption of social determinants of health EHR tools by community health centers. Ann. Fam. Med. 16, 399\u2013407 (2018).","journal-title":"Ann. Fam. Med."},{"key":"1645_CR10","unstructured":"Integrating Social Needs Care into the Delivery of Health Care to Improve the Nation\u2019s Health, https:\/\/www.nationalacademies.org\/our-work\/integrating-social-needs-care-into-the-delivery-of-health-care-to-improve-the-nations-health (2019)."},{"key":"1645_CR11","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1038\/gim.2013.72","volume":"15","author":"O Gottesman","year":"2013","unstructured":"Gottesman, O. et al. The electronic medical records and genomics (eMERGE) network: Past, present, and future. Genet Med. 15, 761\u2013771 (2013).","journal-title":"Genet Med"},{"key":"1645_CR12","unstructured":"Patient-Centered Outcomes Research Institute (PCORI), https:\/\/www.pcori.org\/ (2022)."},{"key":"1645_CR13","unstructured":"The Observational Health Data Science and Informatics (OHDSI) consortium, https:\/\/www.ohdsi.org\/ (2022)."},{"key":"1645_CR14","doi-asserted-by":"publisher","first-page":"104343","DOI":"10.1016\/j.jbi.2023.104343","volume":"142","author":"VK Keloth","year":"2023","unstructured":"Keloth, V. K. et al. Representing and utilizing clinical textual data for real world studies: An OHDSI approach. J. Biomed. Inf. 142, 104343 (2023).","journal-title":"J. Biomed. Inf."},{"key":"1645_CR15","first-page":"2072","volume":"2016","author":"TJ Winden","year":"2016","unstructured":"Winden, T. J., Chen, E. S. & Melton, G. B. Representing residence, living situation, and living conditions: An evaluation of terminologies, standards, guidelines, and measures\/surveys. AMIA Annu Symp. Proc. 2016, 2072 (2016).","journal-title":"AMIA Annu Symp. Proc."},{"key":"1645_CR16","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1111\/1475-6773.14039","volume":"58","author":"MM Kepper","year":"2023","unstructured":"Kepper, M. M. et al. The adoption of social determinants of health documentation in clinical settings.Health Serv. Res. 58, 67\u201377 (2023).","journal-title":"Health Serv. Res."},{"key":"1645_CR17","doi-asserted-by":"publisher","first-page":"103851","DOI":"10.1016\/j.jbi.2021.103851","volume":"120","author":"RM Reeves","year":"2021","unstructured":"Reeves, R. M. et al. Adaptation of an NLP system to a new healthcare environment to identify social determinants of health. J. Biomed. Inf. 120, 103851 (2021).","journal-title":"J. Biomed. Inf."},{"key":"1645_CR18","doi-asserted-by":"publisher","first-page":"9759016","DOI":"10.34133\/2021\/9759016","volume":"2021","author":"A Bompelli","year":"2021","unstructured":"Bompelli, A. et al. Social and behavioral determinants of health in the era of artificial intelligence with electronic health records: A scoping review. Health Data Sci. 2021, 9759016 (2021).","journal-title":"Health Data Sci"},{"key":"1645_CR19","doi-asserted-by":"crossref","unstructured":"Ong, J. C. L. et al. Artificial intelligence, ChatGPT, and other large language models for social determinants of health: Current state and future directions. Cell Rep. Med. 5, (2024).","DOI":"10.1016\/j.xcrm.2023.101356"},{"key":"1645_CR20","doi-asserted-by":"publisher","first-page":"1389","DOI":"10.1093\/jamia\/ocad073","volume":"30","author":"K Lybarger","year":"2023","unstructured":"Lybarger, K. et al. Leveraging natural language processing to augment structured social determinants of health data in the electronic health record. J. Am. Med Inf. Assoc. 30, 1389\u20131397 (2023).","journal-title":"J. Am. Med Inf. Assoc."},{"key":"1645_CR21","doi-asserted-by":"publisher","first-page":"1379","DOI":"10.1093\/jamia\/ocad046","volume":"30","author":"R Richie","year":"2023","unstructured":"Richie, R., Ruiz, V. M., Han, S., Shi, L. & Tsui, F. Extracting social determinants of health events with transformer-based multitask, multilabel named entity recognition. J. Am. Med. Inf. Assoc. 30, 1379\u20131388 (2023).","journal-title":"J. Am. Med. Inf. Assoc."},{"key":"1645_CR22","doi-asserted-by":"publisher","first-page":"1448","DOI":"10.1093\/jamia\/ocad071","volume":"30","author":"B Romanowski","year":"2023","unstructured":"Romanowski, B., Ben Abacha, A. & Fan, Y. Extracting social determinants of health from clinical note text with classification and sequence-to-sequence approaches. J. Am. Med. Inf. Assoc. 30, 1448\u20131455 (2023).","journal-title":"J. Am. Med. Inf. Assoc."},{"key":"1645_CR23","doi-asserted-by":"publisher","first-page":"104642","DOI":"10.1016\/j.jbi.2024.104642","volume":"153","author":"Z Yu","year":"2024","unstructured":"Yu, Z. et al. Identifying social determinants of health from clinical narratives: A study of performance, documentation ratio, and potential bias. J. Biomed. Inf. 153, 104642 (2024).","journal-title":"J. Biomed. Inf."},{"key":"1645_CR24","unstructured":"Fu, Y. et al. Extracting social determinants of health from pediatric patient notes using large language models: novel corpus and methods. arXiv preprint arXiv:2404.00826 (2024)."},{"key":"1645_CR25","doi-asserted-by":"publisher","DOI":"10.1093\/jamiaopen\/ooad024","volume":"6","author":"KS Allen","year":"2023","unstructured":"Allen, K. S. et al. Natural language processing-driven state machines to extract social factors from unstructured clinical documentation. JAMIA Open 6, ooad024 (2023).","journal-title":"JAMIA Open"},{"key":"1645_CR26","doi-asserted-by":"publisher","first-page":"1367","DOI":"10.1093\/jamia\/ocad012","volume":"30","author":"K Lybarger","year":"2023","unstructured":"Lybarger, K., Yetisgen, M. & Uzuner, \u00d6. The 2022 n2c2\/UW shared task on extracting social determinants of health. J. Am. Med. Inf. Assoc 30, 1367\u20131368 (2023).","journal-title":"J. Am. Med. Inf. Assoc"},{"key":"1645_CR27","doi-asserted-by":"publisher","first-page":"103631","DOI":"10.1016\/j.jbi.2020.103631","volume":"113","author":"K Lybarger","year":"2021","unstructured":"Lybarger, K., Ostendorf, M. & Yetisgen, M. Annotating social determinants of health using active learning, and characterizing determinants using neural event extraction. J. Biomed. Inf. 113, 103631 (2021).","journal-title":"J. Biomed. Inf."},{"key":"1645_CR28","unstructured":"Gravity project, https:\/\/thegravityproject.net\/ (2023)."},{"key":"1645_CR29","unstructured":"O\u2019Shea, K. & Nash, R. An introduction to convolutional neural networks. arXiv preprint arXiv:1511.08458 (2015)."},{"key":"1645_CR30","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S. & Schmidhuber, J. Long short-term memory. Neural Comput 9, 1735\u20131780 (1997).","journal-title":"Neural Comput"},{"key":"1645_CR31","unstructured":"Devlin, J., Chang, M.-W., Lee, K. & Toutanova, K. Bert: Pre-training of deep bidirectional transformers for language understanding. Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies. Vol. 1, 4171\u20134186 (2019)."},{"key":"1645_CR32","doi-asserted-by":"publisher","first-page":"103984","DOI":"10.1016\/j.jbi.2021.103984","volume":"127","author":"S Han","year":"2022","unstructured":"Han, S. et al. Classifying social determinants of health from unstructured electronic health records using deep learning-based natural language processing. J. Biomed. Inf. 127, 103984 (2022).","journal-title":"J. Biomed. Inf."},{"key":"1645_CR33","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1136\/jamia.2009.001560","volume":"17","author":"GK Savova","year":"2010","unstructured":"Savova, G. K. et al. Mayo clinical Text Analysis and Knowledge Extraction System (cTAKES): architecture, component evaluation and applications. J. Am. Med Inf. Assoc. 17, 507\u2013513 (2010).","journal-title":"J. Am. Med Inf. Assoc."},{"key":"1645_CR34","doi-asserted-by":"publisher","first-page":"e18659","DOI":"10.2196\/18659","volume":"8","author":"LA Carlson","year":"2020","unstructured":"Carlson, L. A. et al. Characterizing chronic pain episodes in clinical text at two health care systems: Comprehensive annotation and corpus analysis. JMIR Med. Inf. 8, e18659 (2020).","journal-title":"JMIR Med. Inf."},{"key":"1645_CR35","first-page":"15","volume":"38","author":"CM Wray","year":"2021","unstructured":"Wray, C. M. et al. Examining the interfacility variation of social determinants of health in the Veterans Health Administration. Fed. Pract. 38, 15 (2021).","journal-title":"Fed. Pract."},{"key":"1645_CR36","first-page":"422","volume":"2018","author":"DJ Feller","year":"2018","unstructured":"Feller, D. J. et al. Towards the inference of social and behavioral determinants of sexual health: development of a gold-standard corpus with semi-supervised learning. AMIA Annu Symp. Proc. 2018, 422 (2018).","journal-title":"AMIA Annu Symp. Proc."},{"key":"1645_CR37","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1093\/jamia\/ocy166","volume":"26","author":"M Afshar","year":"2019","unstructured":"Afshar, M. et al. Natural language processing and machine learning to identify alcohol misuse from the electronic health record in trauma patients: development and internal validation. J. Am. Med. Inf. Assoc. 26, 254\u2013261 (2019).","journal-title":"J. Am. Med. Inf. Assoc."},{"key":"1645_CR38","doi-asserted-by":"publisher","DOI":"10.1093\/jamiaopen\/ooaa069","volume":"4","author":"R Stemerman","year":"2021","unstructured":"Stemerman, R. et al. Identification of social determinants of health using multi-label classification of electronic health record clinical notes. JAMIA open 4, ooaa069 (2021).","journal-title":"JAMIA open"},{"key":"1645_CR39","doi-asserted-by":"publisher","DOI":"10.1101\/2022.03.04.22271541","author":"D Lituiev","year":"2022","unstructured":"Lituiev, D., et al. Automatic extraction of social determinants of health from medical notes of chronic lower back pain patients. medRxiv https:\/\/doi.org\/10.1101\/2022.03.04.22271541 (2022).","journal-title":"medRxiv"},{"key":"1645_CR40","first-page":"1225","volume":"2021","author":"Z Yu","year":"2021","unstructured":"Yu, Z. et al. A study of social and behavioral determinants of health in lung cancer patients using transformers-based natural language processing models. AMIA Annu Symp. Proc. 2021, 1225 (2021).","journal-title":"AMIA Annu Symp. Proc."},{"key":"1645_CR41","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1177\/00333549111260S310","volume":"126","author":"KF Comer","year":"2011","unstructured":"Comer, K. F., Grannis, S., Dixon, B. E., Bodenhamer, D. J. & Wiehe, S. E. Incorporating geospatial capacity within clinical data systems to address social determinants of health. Public Health Rep. 126, 54\u201361 (2011).","journal-title":"Public Health Rep."},{"key":"1645_CR42","doi-asserted-by":"publisher","first-page":"e2412109","DOI":"10.1001\/jamanetworkopen.2024.12109","volume":"7","author":"E Brignone","year":"2024","unstructured":"Brignone, E., LeJeune, K., Mihalko, A. E., Shannon, A. L. & Sinoway, L. I. Self-reported social determinants of health and area-level social vulnerability. JAMA Netw. open 7, e2412109\u2013e2412109 (2024).","journal-title":"JAMA Netw. open"},{"key":"1645_CR43","doi-asserted-by":"publisher","first-page":"E128","DOI":"10.5888\/pcd13.160221","volume":"13","author":"AR Maroko","year":"2016","unstructured":"Maroko, A. R. et al. Integrating social determinants of health with treatment and prevention: a new tool to assess local area deprivation.Prev. Chronic Dis. 13, E128 (2016).","journal-title":"Prev. Chronic Dis."},{"key":"1645_CR44","doi-asserted-by":"publisher","first-page":"1163","DOI":"10.1016\/j.amepre.2023.06.006","volume":"65","author":"EM Brown","year":"2023","unstructured":"Brown, E. M. et al. Assessing area-level deprivation as a proxy for individual-level social risks. Am. J. Prev. Med 65, 1163\u20131171 (2023).","journal-title":"Am. J. Prev. Med"},{"key":"1645_CR45","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-023-00970-0","volume":"7","author":"M Guevara","year":"2024","unstructured":"Guevara, M. et al. Large language models to identify social determinants of health in electronic health records. NPJ Digital Med. 7, 6 (2024).","journal-title":"NPJ Digital Med."},{"key":"1645_CR46","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2320716121","volume":"121","author":"RA Gabriel","year":"2024","unstructured":"Gabriel, R. A. et al. On the development and validation of large language model-based classifiers for identifying social determinants of health. Proc. Natl. Acad. Sci. 121, e2320716121 (2024).","journal-title":"Proc. Natl. Acad. Sci."},{"key":"1645_CR47","doi-asserted-by":"publisher","first-page":"2608","DOI":"10.1093\/jamia\/ocab194","volume":"28","author":"M Wang","year":"2021","unstructured":"Wang, M., Pantell, M. S., Gottlieb, L. M. & Adler-Milstein, J. Documentation and review of social determinants of health data in the EHR: measures and associated insights. J. Am. Med. Inf. Assoc. 28, 2608\u20132616 (2021).","journal-title":"J. Am. Med. Inf. Assoc."},{"key":"1645_CR48","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1016\/j.ijmedinf.2017.09.008","volume":"107","author":"JR Vest","year":"2017","unstructured":"Vest, J. R., Grannis, S. J., Haut, D. P., Halverson, P. K. & Menachemi, N. Using structured and unstructured data to identify patients\u2019 need for services that address the social determinants of health. Int J. Med. Inf. 107, 101\u2013106 (2017).","journal-title":"Int J. Med. Inf."},{"key":"1645_CR49","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2016.35","volume":"3","author":"AE Johnson","year":"2016","unstructured":"Johnson, A. E. et al. MIMIC-III, a freely accessible critical care database. Sci. Data 3, 1\u20139 (2016).","journal-title":"Sci. Data"},{"key":"1645_CR50","doi-asserted-by":"crossref","unstructured":"Alsentzer, E. et al. Publicly available clinical BERT embeddings. Proceedings of the 2nd Clinical Natural Language Processing Workshop. 72\u201378 (2019).","DOI":"10.18653\/v1\/W19-1909"},{"key":"1645_CR51","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btae163","volume":"40","author":"VK Keloth","year":"2024","unstructured":"Keloth, V. K. et al. Advancing entity recognition in biomedicine via instruction tuning of large language models. Bioinformatics 40, btae163 (2024).","journal-title":"Bioinformatics"},{"key":"1645_CR52","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1038\/s41746-025-01533-1","volume":"8","author":"Q Xie","year":"2025","unstructured":"Xie, Q. et al. Medical foundation large language models for comprehensive text analysis and beyond. npj Digital Medicine 8, 141 (2025).","journal-title":"npj Digital Medicine"},{"key":"1645_CR53","doi-asserted-by":"publisher","first-page":"1812","DOI":"10.1093\/jamia\/ocad259","volume":"31","author":"Y Hu","year":"2024","unstructured":"Hu, Y. et al. Improving large language models for clinical named entity recognition via prompt engineering. J. Am. Med. Inform. Assoc. 31, 1812\u20131820 (2024).","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"1645_CR54","doi-asserted-by":"publisher","first-page":"3280","DOI":"10.1038\/s41467-025-56989-2","volume":"16","author":"Q Chen","year":"2025","unstructured":"Chen, Q. et al. Benchmarking large language models for biomedical natural language processing applications and recommendations.Nature Communications. 16, 3280 (2025).","journal-title":"Nature Communications."},{"key":"1645_CR55","unstructured":"Dai, S. et al. in Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining. 6437-6447."},{"key":"1645_CR56","unstructured":"Dong, X., Wang, Y., Yu, P. S. & Caverlee, J. Disclosure and mitigation of gender bias in llms. arXiv preprint arXiv:2402.11190 (2024)."},{"key":"1645_CR57","unstructured":"Ranjan, R., Gupta, S. & Singh, S. N. A Comprehensive Survey of Bias in LLMs: Current Landscape and Future Directions. arXiv preprint arXiv:2409.16430 (2024)."},{"key":"1645_CR58","doi-asserted-by":"crossref","unstructured":"Faisal, F. & Anastasopoulos, A. Geographic and geopolitical biases of language models. Proceedings of the 3rd Workshop on Multi-lingual Representation Learning (MRL). 139\u2013163 (2023).","DOI":"10.18653\/v1\/2023.mrl-1.12"},{"key":"1645_CR59","unstructured":"Introducing Llama 3.1: Our most capable models to date, https:\/\/ai.meta.com\/blog\/meta-llama-3-1\/ (2024)."},{"key":"1645_CR60","unstructured":"Achiam, J. et al. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"1645_CR61","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-020-00323-1","volume":"3","author":"N Rieke","year":"2020","unstructured":"Rieke, N. et al. The future of digital health with federated learning. NPJ Digital Med 3, 119 (2020).","journal-title":"NPJ Digital Med"},{"key":"1645_CR62","doi-asserted-by":"publisher","first-page":"1185","DOI":"10.1038\/s42256-022-00568-3","volume":"4","author":"GM Van de Ven","year":"2022","unstructured":"Van de Ven, G. M. & Tolias, A. S. Three types of incremental learning. Nature Machine Intelligence 4, 1185\u20131197 (2022).","journal-title":"Nature Machine Intelligence"},{"key":"1645_CR63","unstructured":"Wang, J. et al. Prompt engineering for healthcare: Methodologies and applications. arXiv preprint arXiv:2304.14670 (2023)."},{"key":"1645_CR64","doi-asserted-by":"crossref","unstructured":"Reynolds, L. & McDonell, K. Prompt programming for large language models: Beyond the few-shot paradigm. Extended Abstracts of the 2021 CHI Conference on Human Factors in Computing Systems, 1-7 (2021).","DOI":"10.1145\/3411763.3451760"},{"key":"1645_CR65","unstructured":"Yu, Z., He, L., Wu, Z., Dai, X. & Chen, J. Towards better chain-of-thought prompting strategies: A survey. arXiv preprint arXiv:2310.04959 (2023)."},{"key":"1645_CR66","unstructured":"Frayling, E., Lever, J. & McDonald, G. Zero-shot and few-shot generation strategies for artificial clinical records. arXiv preprint arXiv:2403.08664 (2024)."},{"key":"1645_CR67","doi-asserted-by":"crossref","unstructured":"Li, Z., Zhu, H., Lu, Z. & Yin, M. Synthetic data generation with large language models for text classification: Potential and limitations. Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, 10443\u201310461 (2023).","DOI":"10.18653\/v1\/2023.emnlp-main.647"},{"key":"1645_CR68","unstructured":"Touvron, H. et al. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"1645_CR69","doi-asserted-by":"publisher","first-page":"1764","DOI":"10.1093\/jamia\/ocaa143","volume":"27","author":"M Chen","year":"2020","unstructured":"Chen, M., Tan, X. & Padman, R. Social determinants of health in electronic health records and their impact on analysis and risk prediction: A systematic review. J. Am. Med Inf. Assoc. 27, 1764\u20131773 (2020).","journal-title":"J. Am. Med Inf. Assoc."},{"key":"1645_CR70","doi-asserted-by":"publisher","first-page":"1465","DOI":"10.1093\/jamia\/ocad096","volume":"30","author":"Y Dang","year":"2023","unstructured":"Dang, Y. et al. Systematic design and data-driven evaluation of social determinants of health ontology (SDoHO). J. Am. Med. Inform. Assoc. 30, 1465\u20131473 (2023).","journal-title":"J. Am. Med. Inform. Assoc"},{"key":"1645_CR71","doi-asserted-by":"crossref","unstructured":"Kollapally, N. M., Keloth, V. K., Xu, J. & Geller, J. Integrating commercial and social determinants of health: A unified ontology for non-clinical determinants of health. AMIA Annual Symposium Proceedings (2023).","DOI":"10.1109\/BIBM55620.2022.9995544"},{"key":"1645_CR72","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13326-016-0087-8","volume":"7","author":"A Hicks","year":"2016","unstructured":"Hicks, A., Hanna, J., Welch, D., Brochhausen, M. & Hogan, W. R. The ontology of medically related social entities: Recent developments. J. Biomed. Semant. 7, 1\u20134 (2016).","journal-title":"J. Biomed. Semant."},{"key":"1645_CR73","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1016\/j.ins.2016.08.038","volume":"384","author":"N Phan","year":"2017","unstructured":"Phan, N., Dou, D., Wang, H., Kil, D. & Piniewski, B. Ontology-based deep learning for human behavior prediction with explanations in health social networks. Inf. Sci. 384, 298\u2013313 (2017).","journal-title":"Inf. Sci."},{"key":"1645_CR74","first-page":"279","volume":"121","author":"K Donnelly","year":"2006","unstructured":"Donnelly, K. SNOMED-CT: The advanced terminology and coding system for eHealth. Stud. Health Technol. Inf. 121, 279 (2006).","journal-title":"Stud. Health Technol. Inf."},{"key":"1645_CR75","doi-asserted-by":"publisher","first-page":"624","DOI":"10.1373\/49.4.624","volume":"49","author":"CJ McDonald","year":"2003","unstructured":"McDonald, C. J. et al. LOINC, a universal standard for identifying laboratory observations: a 5-year update. Clin. Chem. 49, 624\u2013633 (2003).","journal-title":"Clin. Chem."},{"key":"1645_CR76","unstructured":"Participant Surveys: Social Determinants of Health, https:\/\/www.researchallofus.org\/data-tools\/survey-explorer\/social-determinants-survey\/ (2022)."},{"key":"1645_CR77","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1016\/j.jclinepi.2015.09.016","volume":"70","author":"JM Gaziano","year":"2016","unstructured":"Gaziano, J. M. et al. Million veteran program: A mega-biobank to study genetic influences on health and disease. J. Clin. Epidemiol. 70, 214\u2013223 (2016).","journal-title":"J. Clin. Epidemiol."},{"key":"1645_CR78","unstructured":"Social Determinants of Health Survey - AHIMA, https:\/\/ahima.org\/landing-pages\/social-determinants-of-health-survey\/ (2022)."},{"key":"1645_CR79","doi-asserted-by":"publisher","first-page":"e2233348","DOI":"10.1001\/jamanetworkopen.2022.33348","volume":"5","author":"J Steinkamp","year":"2022","unstructured":"Steinkamp, J., Kantrowitz, J. J. & Airan-Javia, S. Prevalence and sources of duplicate information in the electronic medical record. JAMA Netw. open 5, e2233348\u2013e2233348 (2022).","journal-title":"JAMA Netw. open"},{"key":"1645_CR80","doi-asserted-by":"publisher","first-page":"e9","DOI":"10.1016\/j.amjmed.2009.10.012","volume":"123","author":"A Markel","year":"2010","unstructured":"Markel, A. Copy and paste of electronic health records: A modern medical illness. Am. J. Med 123, e9 (2010).","journal-title":"Am. J. Med"},{"key":"1645_CR81","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0087555","volume":"9","author":"R Cohen","year":"2014","unstructured":"Cohen, R., Aviram, I., Elhadad, M. & Elhadad, N. Redundancy-aware topic modeling for patient record notes. PLoS One 9, e87555 (2014).","journal-title":"PLoS One"},{"key":"1645_CR82","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1471-2105-14-10","volume":"14","author":"R Cohen","year":"2013","unstructured":"Cohen, R., Elhadad, M. & Elhadad, N. Redundancy in electronic health record corpora: analysis, impact on text mining performance and mitigation strategies. BMC Bioinforma. 14, 1\u201315 (2013).","journal-title":"BMC Bioinforma."},{"key":"1645_CR83","unstructured":"Clinical Sectionizer, https:\/\/github.com\/medspacy\/sectionizer (2024)."},{"key":"1645_CR84","doi-asserted-by":"publisher","first-page":"e0280471","DOI":"10.1371\/journal.pone.0280471","volume":"18","author":"K Shafighi","year":"2023","unstructured":"Shafighi, K. et al. Social isolation is linked to classical risk factors of Alzheimer\u2019s disease-related dementias. PLoS One 18, e0280471 (2023).","journal-title":"PLoS One"},{"key":"1645_CR85","doi-asserted-by":"publisher","first-page":"922","DOI":"10.1016\/j.clinthera.2021.05.005","volume":"43","author":"MA Majoka","year":"2021","unstructured":"Majoka, M. A. & Schimming, C. Effect of social determinants of health on cognition and risk of Alzheimer disease and related dementias. Clin. Ther. 43, 922\u2013929 (2021).","journal-title":"Clin. Ther."},{"key":"1645_CR86","doi-asserted-by":"publisher","first-page":"1","DOI":"10.21926\/obm.geriatr.1804021","volume":"2","author":"Y Barak","year":"2018","unstructured":"Barak, Y. & Glue, P. Do Social Isolation and Loneliness Kill People with Alzheimer\u2019s Disease?. OBM Geriatrics 2, 1\u20135 (2018).","journal-title":"OBM Geriatrics"},{"key":"1645_CR87","doi-asserted-by":"publisher","first-page":"2716","DOI":"10.1093\/jamia\/ocab170","volume":"28","author":"BG Patra","year":"2021","unstructured":"Patra, B. G. et al. Extracting social determinants of health from electronic health records using natural language processing: a systematic review - Supplementary Material. J. Am. Med Inf. Assoc. 28, 2716\u20132727 (2021).","journal-title":"J. Am. Med Inf. Assoc."},{"key":"1645_CR88","doi-asserted-by":"crossref","unstructured":"Chen, T. & Guestrin, C. Xgboost: A scalable tree boosting system. Proceedings of the 22nd ACM sigkdd international conference on knowledge discovery and data mining, 785-794 (2016).","DOI":"10.1145\/2939672.2939785"},{"key":"1645_CR89","doi-asserted-by":"crossref","unstructured":"Kim, Y., Jernite, Y., Sontag, D. & Rush, A. M. Character-aware neural language models. Thirtieth AAAI conference on artificial intelligence (2016).","DOI":"10.1609\/aaai.v30i1.10362"},{"key":"1645_CR90","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R. & Manning, C. D. Glove: Global vectors for word representation. Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), 1532-1543 (2014).","DOI":"10.3115\/v1\/D14-1162"},{"key":"1645_CR91","doi-asserted-by":"crossref","unstructured":"Reimers, N. & Gurevych, I. Sentence-bert: Sentence embeddings using siamese bert-networks. Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), 3982\u20133992 (2019).","DOI":"10.18653\/v1\/D19-1410"},{"key":"1645_CR92","unstructured":"Introducing LLaMA: A foundational, 65-billion-parameter large language model, https:\/\/ai.meta.com\/blog\/large-language-model-llama-meta-ai\/ (2023)."},{"key":"1645_CR93","unstructured":"Taori, R. et al. Alpaca: A Strong, Replicable Instruction-Following Model, https:\/\/crfm.stanford.edu\/2023\/03\/13\/alpaca.html (2023)."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01645-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01645-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01645-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T14:04:13Z","timestamp":1747490653000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01645-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,17]]},"references-count":93,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1645"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-01645-8","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,17]]},"assertion":[{"value":"14 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"287"}}