{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T16:56:19Z","timestamp":1779382579651,"version":"3.53.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T00:00:00Z","timestamp":1754179200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T00:00:00Z","timestamp":1754179200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000050","name":"National Heart, Lung, and Blood Institute","doi-asserted-by":"publisher","award":["R01HL168473"],"award-info":[{"award-number":["R01HL168473"]}],"id":[{"id":"10.13039\/100000050","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Healthc Inform Res"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s41666-025-00210-y","type":"journal-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:26:36Z","timestamp":1754256396000},"page":"587-605","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Instruction-Tuned Large Language Models for Clinical Data Extraction: Creating an Aortic Measurement Database from CT Radiology Reports"],"prefix":"10.1007","volume":"9","author":[{"given":"Ely","family":"Erez","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sedem","family":"Dankwa","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"McKenzie","family":"Tuttle","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Afsheen","family":"Nasir","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Prashanth","family":"Vallabhajosyula","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Eric B.","family":"Schneider","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Roland","family":"Assi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chin Siang","family":"Ong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"210_CR1","doi-asserted-by":"publisher","first-page":"e334","DOI":"10.1161\/CIR.0000000000001106","volume":"146","author":"EM Isselbacher","year":"2022","unstructured":"Isselbacher EM, Preventza O, Hamilton Black J et al (2022) 2022 ACC\/AHA Guideline for the Diagnosis and Management of Aortic Disease: A Report of the American Heart Association\/American College of Cardiology Joint Committee on Clinical Practice Guidelines. Circulation 146:e334\u2013e482. https:\/\/doi.org\/10.1161\/CIR.0000000000001106","journal-title":"Circulation"},{"key":"210_CR2","doi-asserted-by":"publisher","DOI":"10.2196\/60164","volume":"12","author":"M Nunes","year":"2024","unstructured":"Nunes M, Bone J, Ferreira JC, Elvas LB (2024) Health care language models and their fine-tuning for information extraction: scoping review. JMIR Med Inform 12:e60164. https:\/\/doi.org\/10.2196\/60164","journal-title":"JMIR Med Inform"},{"key":"210_CR3","doi-asserted-by":"publisher","unstructured":"Hu M, Qian J, Pan S, et al (2024) Advancing medical imaging with language models: featuring a spotlight on ChatGPT. Phys Med Biol 69:10TR01. https:\/\/doi.org\/10.1088\/1361-6560\/ad387d","DOI":"10.1088\/1361-6560\/ad387d"},{"key":"210_CR4","unstructured":"Munnangi M (2024) A Brief History of Named Entity Recognition"},{"key":"210_CR5","doi-asserted-by":"crossref","unstructured":"Tsuruoka Y, Tsujii J (2003) Boosting precision and recall of dictionary-based protein name recognition. In: Proceedings of the ACL 2003 workshop on Natural language processing in biomedicine - Volume 13. Association for Computational Linguistics, USA, pp 41\u201348","DOI":"10.3115\/1118958.1118964"},{"key":"210_CR6","doi-asserted-by":"publisher","unstructured":"Hanisch D, Fundel K, Mevissen H-T, et al (2005) ProMiner: rule-based protein and gene entity recognition. BMC Bioinformatics 6:S14. https:\/\/doi.org\/10.1186\/1471-2105-6-S1-S14","DOI":"10.1186\/1471-2105-6-S1-S14"},{"key":"210_CR7","doi-asserted-by":"crossref","unstructured":"Collier N, Nobata C, Tsujii J (2000) Extracting the names of genes and gene products with a hidden Markov model. In: Proceedings of the 18th conference on Computational linguistics - Volume 1. Association for Computational Linguistics, USA, pp 201\u2013207","DOI":"10.3115\/990820.990850"},{"key":"210_CR8","doi-asserted-by":"crossref","unstructured":"Kazama J, Makino T, Ohta Y, Tsujii J (2002) Tuning support vector machines for biomedical named entity recognition. In: Proceedings of the ACL-02 workshop on Natural language processing in the biomedical domain - Volume 3. Association for Computational Linguistics, USA, pp 1\u20138","DOI":"10.3115\/1118149.1118150"},{"key":"210_CR9","doi-asserted-by":"crossref","unstructured":"Settles B (2004) Biomedical named entity recognition using conditional random fields and rich feature sets. In: Proceedings of the International Joint Workshop on Natural Language Processing in Biomedicine and its Applications. Association for Computational Linguistics, USA, pp 104\u2013107","DOI":"10.3115\/1567594.1567618"},{"key":"210_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3458754","volume":"3","author":"Y Gu","year":"2022","unstructured":"Gu Y, Tinn R, Cheng H et al (2022) Domain-specific language model pretraining for biomedical natural language processing. ACM Trans Comput Healthc 3:1\u201323. https:\/\/doi.org\/10.1145\/3458754","journal-title":"ACM Trans Comput Healthc"},{"key":"210_CR11","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2020","unstructured":"Lee J, Yoon W, Kim S et al (2020) BioBERT: a pre-trained biomedical language representation model for biomedical text mining. Bioinformatics 36:1234\u20131240. https:\/\/doi.org\/10.1093\/bioinformatics\/btz682","journal-title":"Bioinformatics"},{"key":"210_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-022-00590-0","volume":"5","author":"S Khurshid","year":"2022","unstructured":"Khurshid S, Reeder C, Harrington LX et al (2022) Cohort design and natural language processing to reduce bias in electronic health records research. Npj Digit Med 5:1\u201314. https:\/\/doi.org\/10.1038\/s41746-022-00590-0","journal-title":"Npj Digit Med"},{"key":"210_CR13","doi-asserted-by":"publisher","DOI":"10.2196\/38178","volume":"10","author":"P Singh","year":"2022","unstructured":"Singh P, Haimovich J, Reeder C et al (2022) One clinician is all you need\u2013cardiac magnetic resonance imaging measurement extraction: deep learning algorithm development. JMIR Med Inform 10:e38178. https:\/\/doi.org\/10.2196\/38178","journal-title":"JMIR Med Inform"},{"key":"210_CR14","unstructured":"OpenAI, Achiam J, Adler S, et al (2024) GPT-4 Technical Report"},{"key":"210_CR15","unstructured":"Touvron H, Lavril T, Izacard G, et al (2023) LLaMA: open and efficient foundation language models"},{"key":"210_CR16","unstructured":"Kojima T, Gu S (Shane), Reid M, et al (2022) Large language models are zero-shot reasoners. Adv Neural Inf Process Syst 35:22199\u201322213"},{"key":"210_CR17","unstructured":"Brown TB, Mann B, Ryder N, et al (2020) Language models are few-shot learners"},{"key":"210_CR18","doi-asserted-by":"crossref","unstructured":"Agrawal M, Hegselmann S, Lang H, et al (2022) Large language models are few-shot clinical information extractors","DOI":"10.18653\/v1\/2022.emnlp-main.130"},{"key":"210_CR19","doi-asserted-by":"publisher","unstructured":"Hu Y, Chen Q, Du J, et al (2024) Improving large language models for clinical named entity recognition via prompt engineering. J Am Med Inform Assoc ocad259. https:\/\/doi.org\/10.1093\/jamia\/ocad259","DOI":"10.1093\/jamia\/ocad259"},{"key":"210_CR20","unstructured":"Wang S, Sun X, Li X, et al (2023) GPT-NER: named entity recognition via large language models"},{"key":"210_CR21","doi-asserted-by":"crossref","unstructured":"Xie T, Li Q, Zhang J, et al (2023) Empirical study of zero-shot NER with ChatGPT","DOI":"10.18653\/v1\/2023.emnlp-main.493"},{"key":"210_CR22","doi-asserted-by":"crossref","unstructured":"Chen Q, Hu Y, Peng X, et al (2024) A systematic evaluation of large language models for biomedical natural language processing: benchmarks, baselines, and recommendations","DOI":"10.18653\/v1\/2024.emnlp-main.1247"},{"key":"210_CR23","doi-asserted-by":"crossref","unstructured":"Xu D, Chen W, Peng W, et al (2024) Large language models for generative information extraction: a survey","DOI":"10.1007\/s11704-024-40555-y"},{"key":"210_CR24","unstructured":"Wei J, Bosma M, Zhao VY, et al (2022) Finetuned language models are zero-shot learners"},{"key":"210_CR25","doi-asserted-by":"publisher","unstructured":"Keloth VK, Hu Y, Xie Q, et al (2024) Advancing entity recognition in biomedicine via instruction tuning of large language models. Bioinformatics 40:btae163. https:\/\/doi.org\/10.1093\/bioinformatics\/btae163","DOI":"10.1093\/bioinformatics\/btae163"},{"key":"210_CR26","doi-asserted-by":"crossref","unstructured":"Biana J, Zhai W, Huang X, et al (2024) VANER: leveraging large language model for versatile and adaptive biomedical named entity recognition","DOI":"10.3233\/FAIA240664"},{"key":"210_CR27","unstructured":"Tkachenko M, Malyuk M, Holmanyuk A, Liubimov N (2020) Label studio: data labeling software"},{"key":"210_CR28","unstructured":"Dubey A, Jauhri A, Pandey A, et al (2024) The Llama 3 Herd of Models"},{"key":"210_CR29","unstructured":"Introducing Llama 3.1: our most capable models to date. In: Meta AI. https:\/\/ai.meta.com\/blog\/meta-llama-3-1\/. Accessed 18 Nov 2024"},{"key":"210_CR30","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding"},{"key":"210_CR31","unstructured":"Lim, David (2024) dslim\/bert-base-NER \u00b7 hugging face. https:\/\/huggingface.co\/dslim\/bert-base-NER. Accessed 18 Nov 2024"},{"key":"210_CR32","unstructured":"Sang EFTK, Meulder FD (2003) Introduction to the CoNLL-2003 shared task: language-independent named entity recognition"},{"key":"210_CR33","doi-asserted-by":"crossref","unstructured":"Alsentzer E, Murphy JR, Boag W, et al (2019) Publicly available clinical BERT embeddings","DOI":"10.18653\/v1\/W19-1909"},{"key":"210_CR34","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2016.35","volume":"3","author":"AEW Johnson","year":"2016","unstructured":"Johnson AEW, Pollard TJ, Shen L et al (2016) MIMIC-III, a freely accessible critical care database. Sci Data 3:160035. https:\/\/doi.org\/10.1038\/sdata.2016.35","journal-title":"Sci Data"},{"key":"210_CR35","doi-asserted-by":"crossref","unstructured":"Loukas L, Fergadiotis M, Chalkidis I, et al (2022) FiNER: financial numeric entity recognition for XBRL tagging","DOI":"10.18653\/v1\/2022.acl-long.303"},{"key":"210_CR36","doi-asserted-by":"crossref","unstructured":"Wolf T, Debut L, Sanh V, et al (2020) HuggingFace\u2019s transformers: state-of-the-art natural language processing","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"210_CR37","unstructured":"Mosbach M, Andriushchenko M, Klakow D (2021) On the stability of fine-tuning BERT: misconceptions, explanations, and strong baselines"},{"key":"210_CR38","unstructured":"Loshchilov I, Hutter F (2019) Decoupled weight decay regularization"},{"key":"210_CR39","first-page":"10088","volume":"36","author":"T Dettmers","year":"2023","unstructured":"Dettmers T, Pagnoni A, Holtzman A, Zettlemoyer L (2023) QLoRA: efficient finetuning of quantized LLMs. Adv Neural Inf Process Syst 36:10088\u201310115","journal-title":"Adv Neural Inf Process Syst"},{"key":"210_CR40","unstructured":"Hu EJ, Shen Y, Wallis P, et al (2021) LoRA: low-rank adaptation of large language models"},{"key":"210_CR41","doi-asserted-by":"crossref","unstructured":"Daniel Han and Michael Han (2024) unslothai\/unsloth","DOI":"10.1155\/adce\/3403677"},{"key":"210_CR42","doi-asserted-by":"publisher","DOI":"10.1016\/j.clinimag.2023.110021","volume":"105","author":"S Zamirpour","year":"2024","unstructured":"Zamirpour S, Boskovski MT, Pirruccello JP et al (2024) Sex differences in ascending aortic size reporting and growth on chest computed tomography and magnetic resonance imaging. Clin Imaging 105:110021. https:\/\/doi.org\/10.1016\/j.clinimag.2023.110021","journal-title":"Clin Imaging"},{"key":"210_CR43","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1097\/RCT.0000000000000167","volume":"39","author":"N Benedetti","year":"2015","unstructured":"Benedetti N, Hope MD (2015) Prevalence and significance of incidentally noted dilation of the ascending aorta on routine chest computed tomography in older patients. J Comput Assist Tomogr 39:109. https:\/\/doi.org\/10.1097\/RCT.0000000000000167","journal-title":"J Comput Assist Tomogr"},{"key":"210_CR44","doi-asserted-by":"publisher","first-page":"892","DOI":"10.1016\/j.cjca.2019.03.023","volume":"35","author":"M Mori","year":"2019","unstructured":"Mori M, Bin Mahmood SU, Yousef S et al (2019) Prevalence of incidentally identified thoracic aortic dilations: insights for screening criteria. Can J Cardiol 35:892\u2013898. https:\/\/doi.org\/10.1016\/j.cjca.2019.03.023","journal-title":"Can J Cardiol"},{"key":"210_CR45","unstructured":"(2020) Software as a Medical Device (SAMD): clinical evaluation. https:\/\/www.fda.gov\/regulatory-information\/search-fda-guidance-documents\/software-medical-device-samd-clinical-evaluation. Accessed 8 May 2025"},{"key":"210_CR46","unstructured":"Lu Q, Li R, Wen A, et al (2024) Large language models struggle in token-level clinical named entity recognition"},{"key":"210_CR47","unstructured":"Dodge J, Ilharco G, Schwartz R, et al (2020) Fine-tuning pretrained language models: weight initializations, data orders, and early stopping"},{"key":"210_CR48","doi-asserted-by":"publisher","first-page":"808","DOI":"10.1055\/s-0041-1735184","volume":"12","author":"LL Guo","year":"2021","unstructured":"Guo LL, Pfohl SR, Fries J et al (2021) Systematic review of approaches to preserve machine learning performance in the presence of temporal dataset shift in clinical medicine. Appl Clin Inform 12:808\u2013815. https:\/\/doi.org\/10.1055\/s-0041-1735184","journal-title":"Appl Clin Inform"}],"container-title":["Journal of Healthcare Informatics Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41666-025-00210-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41666-025-00210-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41666-025-00210-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,10]],"date-time":"2025-11-10T14:56:54Z","timestamp":1762786614000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41666-025-00210-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":48,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["210"],"URL":"https:\/\/doi.org\/10.1007\/s41666-025-00210-y","relation":{},"ISSN":["2509-4971","2509-498X"],"issn-type":[{"value":"2509-4971","type":"print"},{"value":"2509-498X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"18 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 May 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 July 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 August 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This study was determined to be exempt from review by Yale University\u2019s Institutional Review Board (IRB) under protocol number 2000037866 on May 3, 2024.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"\u00a0Ethics Approval"}},{"value":"The authors declare no competing interests.\u00a0","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}]}}