{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T10:23:30Z","timestamp":1780050210326,"version":"3.53.1"},"reference-count":63,"publisher":"Oxford University Press (OUP)","issue":"6","license":[{"start":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T00:00:00Z","timestamp":1776038400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/academic.oup.com\/pages\/standard-publication-reuse-rights"}],"funder":[{"DOI":"10.13039\/100000133","name":"Agency for Healthcare Research and Quality","doi-asserted-by":"publisher","award":["R01 HS027742"],"award-info":[{"award-number":["R01 HS027742"]}],"id":[{"id":"10.13039\/100000133","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000133","name":"Agency for Healthcare Research and Quality","doi-asserted-by":"publisher","award":["R01 HS028637"],"award-info":[{"award-number":["R01 HS028637"]}],"id":[{"id":"10.13039\/100000133","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000056","name":"National Institute of Nursing Research","doi-asserted-by":"publisher","award":["R01NR016865"],"award-info":[{"award-number":["R01NR016865"]}],"id":[{"id":"10.13039\/100000056","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000056","name":"National Institute of Nursing Research","doi-asserted-by":"publisher","award":["T32NR007969"],"award-info":[{"award-number":["T32NR007969"]}],"id":[{"id":"10.13039\/100000056","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000049","name":"National Institute on Aging","doi-asserted-by":"publisher","award":["R01AG074492"],"award-info":[{"award-number":["R01AG074492"]}],"id":[{"id":"10.13039\/100000049","id-type":"DOI","asserted-by":"publisher"}]},{"name":"2024 Marilyn D. Harris Research"},{"name":"2023 Sigma Small Grants"},{"name":"2024 Home Care Dissertation Research Grant"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,6,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Objective<\/jats:title>\n                    <jats:p>Home healthcare (HHC) clinical notes contain critical infection indicators that clinicians need in structured \u201cindicator + context\u201d pairs. Data sparsity and limited computing resources hinder automated extraction in decentralized HHC settings. This study developed and evaluated a resource-efficient pipeline using instruction-tuned, moderate-sized large language models (LLMs) to address these barriers. To address the data sparsity challenge, we also assessed the impact of a targeted LLM-based data augmentation strategy.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Materials and Methods<\/jats:title>\n                    <jats:p>An expert-defined schema of 26 infection indicator categories was developed. We expanded the training set using a 3-stage workflow: targeted annotation, context mutation, and synthetic generation. We adapted 2 moderate-sized models (Gemma-12B and Qwen-14B) via Quantized Low-Rank Adaptation (QLoRA). We compared them to a larger-sized, prompted model and a smaller-sized, fully fine-tuned LLM. We evaluated all models on a held-out test set using partial micro-averaged F1 score, output reliability metrics, and qualitative error analysis.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>Instruction-tuned moderate-sized LLMs outperformed both baselines. The top-performing model, augmented Gemma-12B, achieved a partial micro-averaged F1 score of 0.879. LLM-based data augmentation enhanced overall performance, improving the identification of rare indicators and the interpretation of negations. The best model maintained a partial F1 score above 0.750 across all indicator categories. It also showed high format adherence, confirming its ability to generate reliable structured outputs.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Discussion<\/jats:title>\n                    <jats:p>Instruction-tuning moderate-sized LLMs with QLoRA and targeted data augmentation enables high-accuracy extraction of infection indicators from HHC notes.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Conclusion<\/jats:title>\n                    <jats:p>This resource-efficient pipeline provides a scalable foundation for automated infection surveillance in healthcare settings with limited resources.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/jamia\/ocag040","type":"journal-article","created":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T12:36:43Z","timestamp":1773491803000},"page":"1146-1158","source":"Crossref","is-referenced-by-count":1,"title":["Automating infection indicator extraction in home healthcare through instruction-tuned large language models"],"prefix":"10.1093","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6122-8426","authenticated-orcid":false,"given":"Zidu","family":"Xu","sequence":"first","affiliation":[{"name":"School of Nursing, Columbia University , New York, NY 10032,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0362-0670","authenticated-orcid":false,"given":"Jiyoun","family":"Song","sequence":"additional","affiliation":[{"name":"Department of Biobehavioral Health Sciences, University of Pennsylvania School of Nursing , Philadelphia, PA 19104,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shuang","family":"Zhou","sequence":"additional","affiliation":[{"name":"Department of Surgery, University of Minnesota , Minneapolis, MN 55455,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Danielle","family":"Scharp","sequence":"additional","affiliation":[{"name":"Icahn School of Medicine at Mount Sinai Division of General Internal Medicine, , New York, NY 10029,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2852-4175","authenticated-orcid":false,"given":"Mollie","family":"Hobensack","sequence":"additional","affiliation":[{"name":"Vanderbilt University Medical Center Department of Biomedical Informatics, , Nashville, TN 37232,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2413-5918","authenticated-orcid":false,"given":"Yan","family":"Hu","sequence":"additional","affiliation":[{"name":"McWilliams School of Biomedical Informatics, University of Texas Health Science at Houston , Houston, TX 77030,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jingjing","family":"Shang","sequence":"additional","affiliation":[{"name":"School of Nursing, Columbia University , New York, NY 10032,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2358-9837","authenticated-orcid":false,"given":"Maxim","family":"Topaz","sequence":"additional","affiliation":[{"name":"School of Nursing, Columbia University , New York, NY 10032,","place":["United States"]},{"name":"Center for Home Care Policy & Research, VNS Health , New York, NY 10017,","place":["United States"]},{"name":"Data Science Institute, Columbia University , New York, NY 10027,","place":["United States"]}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"286","published-online":{"date-parts":[[2026,4,13]]},"reference":[{"key":"2026052218575679400_ocag040-B1","author":"National Center for Health Statistics"},{"key":"2026052218575679400_ocag040-B2","author":"Medicare Payment Advisory Commission","year":"2025"},{"key":"2026052218575679400_ocag040-B3","doi-asserted-by":"publisher","first-page":"1388","DOI":"10.1017\/ice.2021.248","article-title":"Infection trends in home health care, 2013\u20132018","volume":"42","author":"Harrison","year":"2021","journal-title":"Infect Control Hosp Epidemiol"},{"key":"2026052218575679400_ocag040-B4","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1097\/01.ASW.0000755928.30524.22","article-title":"Predictive risk models for wound infection-related hospitalization or ED visits in home health care using machine-learning algorithms","volume":"34","author":"Song","year":"2021","journal-title":"Adv Skin Wound Care"},{"key":"2026052218575679400_ocag040-B5","doi-asserted-by":"publisher","first-page":"1015","DOI":"10.1016\/j.jamda.2020.12.010","article-title":"Identifying urinary tract infection-related information in home care nursing notes","volume":"22","author":"Woo","year":"2021","journal-title":"J Am Med Dir Assoc"},{"key":"2026052218575679400_ocag040-B6","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1111\/iwj.13623","article-title":"Exploring prevalence of wound infections and related patient characteristics in homecare using natural language processing","volume":"19","author":"Woo","year":"2022","journal-title":"Int Wound J"},{"key":"2026052218575679400_ocag040-B7","doi-asserted-by":"publisher","first-page":"104039","DOI":"10.1016\/j.jbi.2022.104039","article-title":"Clinical notes: an untapped opportunity for improving risk prediction for hospitalization and emergency department visit during home health care","volume":"128","author":"Song","year":"2022","journal-title":"J Biomed Inform"},{"key":"2026052218575679400_ocag040-B8","doi-asserted-by":"crossref","first-page":"2641","DOI":"10.1093\/jamia\/ocae247","article-title":"Exploring home healthcare clinicians\u2019 needs for using clinical decision support systems for early risk warning","volume":"31","author":"Xu","year":"2024","journal-title":"J Am Med Inform Assoc"},{"key":"2026052218575679400_ocag040-B9","first-page":"583","article-title":"NimbleMiner: an open-source nursing-sensitive natural language processing system based on word embedding","volume":"37","author":"Topaz","year":"2019","journal-title":"Comput Inform Nurs"},{"key":"2026052218575679400_ocag040-B10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"2026052218575679400_ocag040-B11","author":"Huang"},{"key":"2026052218575679400_ocag040-B12","doi-asserted-by":"publisher","DOI":"10.3390\/healthcare13030324","article-title":"Investigation into application of AI and Telemedicine in rural communities: a systematic literature review","author":"Perez","year":"2025","journal-title":"Healthcare (Basel)"},{"key":"2026052218575679400_ocag040-B13","doi-asserted-by":"publisher","first-page":"3020","DOI":"10.1038\/s41467-022-30728-3","article-title":"Challenges in digital medicine applications in under-resourced settings","volume":"13","author":"Curioso","year":"2022","journal-title":"Nat Commun"},{"key":"2026052218575679400_ocag040-B14","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv Neural Inf Process Syst"},{"key":"2026052218575679400_ocag040-B15","author":"Radford","year":"2025"},{"key":"2026052218575679400_ocag040-B16","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"2026052218575679400_ocag040-B17","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J Mach Learn Res"},{"key":"2026052218575679400_ocag040-B18","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1038\/s44387-025-00011-z","article-title":"Large language models for disease diagnosis: a scoping review","volume":"1","author":"Zhou","year":"2025","journal-title":"NPJ Artif Intell"},{"key":"2026052218575679400_ocag040-B19","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Adv Neural Inf Process Syst"},{"key":"2026052218575679400_ocag040-B20","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btae163","article-title":"Advancing entity recognition in biomedicine via instruction tuning of large language models","author":"Keloth","year":"2024","journal-title":"Bioinformatics"},{"key":"2026052218575679400_ocag040-B21","doi-asserted-by":"publisher","first-page":"1865","DOI":"10.1093\/jamia\/ocae037","article-title":"Taiyi: a bilingual fine-tuned large language model for diverse biomedical tasks","volume":"31","author":"Luo","year":"2024","journal-title":"J Am Med Inform Assoc"},{"key":"2026052218575679400_ocag040-B22","author":"Zhang","year":"2023"},{"key":"2026052218575679400_ocag040-B23","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1007\/s41666-025-00198-5","article-title":"Zero-shot extraction of seizure outcomes from clinical notes using generative pretrained transformers","volume":"9","author":"Ojemann","year":"2025","journal-title":"J Healthc Inform Res"},{"key":"2026052218575679400_ocag040-B24","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1038\/s41746-024-01029-4","article-title":"Prompt engineering in consistency and reliability with the evidence-based guideline for LLMs","volume":"7","author":"Wang","year":"2024","journal-title":"NPJ Digit Med"},{"key":"2026052218575679400_ocag040-B25","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1038\/s43856-024-00717-2","article-title":"Current applications and challenges in large language models for patient care: a systematic review","volume":"5","author":"Busch","year":"2025","journal-title":"Commun Med (Lond)"},{"key":"2026052218575679400_ocag040-B26","doi-asserted-by":"publisher","first-page":"553","author":"Hu","DOI":"10.1093\/jamia\/ocaf213"},{"key":"2026052218575679400_ocag040-B27","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1038\/s41746-025-01533-1","article-title":"Medical foundation large language models for comprehensive text analysis and beyond","volume":"8","author":"Xie","year":"2025","journal-title":"NPJ Digit Med"},{"key":"2026052218575679400_ocag040-B28","doi-asserted-by":"publisher","first-page":"e57828","DOI":"10.2196\/57828","article-title":"Harnessing moderate-sized language models for reliable patient data deidentification in emergency department records: algorithm development, validation, and implementation study","volume":"4","author":"Dor\u00e9mus","year":"2025","journal-title":"JMIR AI"},{"key":"2026052218575679400_ocag040-B29","author":"Dettmers"},{"key":"2026052218575679400_ocag040-B30","doi-asserted-by":"crossref","first-page":"10088","DOI":"10.52202\/075280-0441","article-title":"QLoRA: efficient finetuning of quantized LLMs","volume":"36","author":"Dettmers","year":"2023","journal-title":"Adv Neural Inf Process Syst"},{"key":"2026052218575679400_ocag040-B31","first-page":"3","article-title":"LoRA: low-rank adaptation of large language models","volume":"1","author":"Hu","year":"2022","journal-title":"ICLR"},{"key":"2026052218575679400_ocag040-B32","doi-asserted-by":"publisher","article-title":".","author":"Ding","DOI":"10.18653\/v1\/2024.findings-acl.97"},{"key":"2026052218575679400_ocag040-B33","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocaf045","article-title":"Evaluating the effectiveness of biomedical fine-tuning for large language models on clinical tasks","volume":"32","author":"Dorfner","year":"2025","journal-title":"J Am Med Inform Assoc"},{"key":"2026052218575679400_ocag040-B34","first-page":"70638","article-title":"Data augmentations for improved (large) language model generalization","volume":"36","author":"Feder","year":"2023","journal-title":"Adv Neural Inf Process Syst"},{"key":"2026052218575679400_ocag040-B35","author":"Wang","year":"2024"},{"key":"2026052218575679400_ocag040-B36","doi-asserted-by":"publisher","article-title":".","author":"Yue","DOI":"10.18653\/v1\/2020.clinicalnlp-1.23"},{"key":"2026052218575679400_ocag040-B37","doi-asserted-by":"publisher","first-page":"110022","DOI":"10.1016\/j.compbiomed.2025.110022","article-title":"DALL-M: context-aware clinical data augmentation with large language models","volume":"190","author":"Hsieh","year":"2025","journal-title":"Comput Biol Med"},{"key":"2026052218575679400_ocag040-B38","author":"Qin"},{"key":"2026052218575679400_ocag040-B39","doi-asserted-by":"publisher","first-page":"105417","DOI":"10.1016\/j.jamda.2024.105417","article-title":"Building a time-series model to predict hospitalization risks in home health care: insights into development, accuracy, and fairness","volume":"26","author":"Topaz","year":"2025","journal-title":"J Am Med Dir Assoc"},{"key":"2026052218575679400_ocag040-B40","doi-asserted-by":"publisher","first-page":"2011","DOI":"10.1093\/jamia\/ocaa088","article-title":"MINIMAR (MINimum Information for Medical AI Reporting): developing reporting standards for artificial intelligence in health care","volume":"27","author":"Hernandez-Boussard","year":"2020","journal-title":"J Am Med Inform Assoc"},{"key":"2026052218575679400_ocag040-B41","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1093\/jamia\/ocae278","article-title":"Machine learning-based infection diagnostic and prognostic models in post-acute care settings: a systematic review","volume":"32","author":"Xu","year":"2025","journal-title":"J Am Med Inform Assoc"},{"key":"2026052218575679400_ocag040-B42","article-title":".","author":"Henning"},{"key":"2026052218575679400_ocag040-B43","author":"Zhang T, Kishore V, Wu F, Weinberger KQ, Artzi Y. BERTScore: Evaluating Text Generation with BERT"},{"key":"2026052218575679400_ocag040-B44","doi-asserted-by":"publisher","first-page":"3280","DOI":"10.1038\/s41467-025-56989-2","article-title":"Benchmarking large language models for biomedical natural language processing applications and recommendations","volume":"16","author":"Chen","year":"2025","journal-title":"Nat Commun"},{"key":"2026052218575679400_ocag040-B45","author":"Abdin","year":"2024"},{"key":"2026052218575679400_ocag040-B46","doi-asserted-by":"publisher","first-page":"ooaf109","DOI":"10.1093\/jamiaopen\/ooaf109","article-title":"Leveraging open-source large language models for clinical information extraction in resource-constrained settings","volume":"8","author":"Builtjes","year":"2025","journal-title":"JAMIA Open"},{"key":"2026052218575679400_ocag040-B47","year":"2024"},{"key":"2026052218575679400_ocag040-B48"},{"key":"2026052218575679400_ocag040-B49","author":"Yang"},{"key":"2026052218575679400_ocag040-B50","author":"Zhao"},{"key":"2026052218575679400_ocag040-B51","author":"Taori R, Gulrajani I, Zhang T, et al","year":"2023"},{"key":"2026052218575679400_ocag040-B52","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1093\/jamia\/ocae260","article-title":"Extracting social support and social isolation information from clinical psychiatry notes: comparing a rule-based natural language processing system and a large language model","volume":"32","author":"Patra","year":"2025","journal-title":"J Am Med Inform Assoc"},{"key":"2026052218575679400_ocag040-B53","doi-asserted-by":"publisher","first-page":"1134","DOI":"10.1038\/s41591-024-02855-5","article-title":"Adapted large language models can outperform medical experts in clinical text summarization","volume":"30","author":"Van Veen","year":"2024","journal-title":"Nat Med"},{"key":"2026052218575679400_ocag040-B54","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1038\/s41746-023-00970-0","article-title":"Large language models to identify social determinants of health in electronic health records","volume":"7","author":"Guevara","year":"2024","journal-title":"NPJ Digit Med"},{"key":"2026052218575679400_ocag040-B55","author":"Wang","year":"2023"},{"key":"2026052218575679400_ocag040-B56","doi-asserted-by":"publisher","first-page":"2010","DOI":"10.1093\/jamia\/ocae147","article-title":"LEAP: LLM instruction-example adaptive prompting framework for biomedical relation extraction","volume":"31","author":"Zhou","year":"2024","journal-title":"J Am Med Inform Assoc"},{"key":"2026052218575679400_ocag040-B57","author":"Wei"},{"key":"2026052218575679400_ocag040-B58","doi-asserted-by":"publisher","first-page":"1696","DOI":"10.5555\/3716662.3716809","author":"Yang","year":"2025"},{"key":"2026052218575679400_ocag040-B59","author":"Han","year":"2024"},{"key":"2026052218575679400_ocag040-B60","doi-asserted-by":"publisher","first-page":"e080749","DOI":"10.1136\/bmj-2024-080749","article-title":"Uncertainty of risk estimates from clinical prediction models: rationale, challenges, and approaches","volume":"388","author":"Riley","year":"2025","journal-title":"BMJ"},{"key":"2026052218575679400_ocag040-B61","doi-asserted-by":"publisher","author":"Kobayashi","DOI":"10.18653\/v1\/N18-2072"},{"key":"2026052218575679400_ocag040-B62","author":"Huang"},{"key":"2026052218575679400_ocag040-B63","doi-asserted-by":"publisher","author":"Jiang","DOI":"10.18653\/v1\/2024.emnlp-main.272"}],"container-title":["Journal of the American Medical Informatics Association"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/jamia\/advance-article-pdf\/doi\/10.1093\/jamia\/ocag040\/68070863\/ocag040.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/jamia\/article-pdf\/33\/6\/1146\/68070863\/ocag040.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/jamia\/article-pdf\/33\/6\/1146\/68070863\/ocag040.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T22:58:07Z","timestamp":1779490687000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/jamia\/article\/33\/6\/1146\/8653298"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,13]]},"references-count":63,"journal-issue":{"issue":"6","published-online":{"date-parts":[[2026,4,13]]},"published-print":{"date-parts":[[2026,6,1]]}},"URL":"https:\/\/doi.org\/10.1093\/jamia\/ocag040","relation":{},"ISSN":["1067-5027","1527-974X"],"issn-type":[{"value":"1067-5027","type":"print"},{"value":"1527-974X","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2026,6]]},"published":{"date-parts":[[2026,4,13]]}}}