{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T20:23:23Z","timestamp":1776457403390,"version":"3.51.2"},"reference-count":84,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,8,13]],"date-time":"2025-08-13T00:00:00Z","timestamp":1755043200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,8,13]],"date-time":"2025-08-13T00:00:00Z","timestamp":1755043200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Australian National Health and Medical Research Council","award":["GNT1192469"],"award-info":[{"award-number":["GNT1192469"]}]},{"name":"MRFF","award":["MRF2036251"],"award-info":[{"award-number":["MRF2036251"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-01921-7","type":"journal-article","created":{"date-parts":[[2025,8,13]],"date-time":"2025-08-13T06:11:49Z","timestamp":1755065509000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Leveraging large language models for the deidentification and temporal normalization of sensitive health information in electronic health records"],"prefix":"10.1038","volume":"8","author":[{"given":"Hong-Jie","family":"Dai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tatheer Hussain","family":"Mir","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ching-Tai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chien-Chang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao-Ping","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chung-Hong","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi-Yun","family":"Chou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu-Chin","family":"Teng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shalini","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Omkar","family":"Panchal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Divyabharathy Ramesh","family":"Nadar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei-Hsiang","family":"Liao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu-Chuan","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zi-Rui","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard Tzong-Han","family":"Tsai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yung-Chun","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jitendra","family":"Jonnagaddala","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,8,13]]},"reference":[{"key":"1921_CR1","doi-asserted-by":"publisher","first-page":"547","DOI":"10.1111\/joim.12119","volume":"274","author":"P Coorevits","year":"2013","unstructured":"Coorevits, P. Electronic health records: new opportunities for clinical research. J. Intern. Med. 274, 547\u2013560 (2013).","journal-title":"J. Intern. Med."},{"key":"1921_CR2","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1146\/annurev-publhealth-031914-122747","volume":"36","author":"GS Birkhead","year":"2015","unstructured":"Birkhead, G. S., Klompas, M. & Shah, N. R. Uses of electronic health records for public health surveillance to advance public health. Annu. Rev. Public Health 36, 345\u2013359 (2015).","journal-title":"Annu. Rev. Public Health"},{"key":"1921_CR3","doi-asserted-by":"publisher","first-page":"1133","DOI":"10.1213\/ANE.0000000000004489","volume":"130","author":"DA Colquhoun","year":"2020","unstructured":"Colquhoun, D. A. et al. Considerations for integration of perioperative electronic health records across institutions for research and quality improvement: the approach taken by the Multicenter Perioperative Outcomes Group. Anesthesia Analgesia 130, 1133\u20131146 (2020).","journal-title":"Anesthesia Analgesia"},{"key":"1921_CR4","doi-asserted-by":"publisher","first-page":"1448","DOI":"10.1093\/jamia\/ocad071","volume":"30","author":"B Romanowski","year":"2023","unstructured":"Romanowski, B., Ben Abacha, A. & Fan, Y. Extracting social determinants of health from clinical note text with classification and sequence-to-sequence approaches. J. Am. Med. Inform. Assoc. 30, 1448\u20131455 (2023).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1921_CR5","doi-asserted-by":"publisher","first-page":"1103","DOI":"10.1377\/hlthaff.24.5.1103","volume":"24","author":"R Hillestad","year":"2005","unstructured":"Hillestad, R. et al. Can electronic medical record systems transform health care? Potential health benefits, savings, and costs. Health Aff. 24, 1103\u20131117 (2005).","journal-title":"Health Aff."},{"key":"1921_CR6","doi-asserted-by":"publisher","first-page":"136947","DOI":"10.1109\/ACCESS.2020.3011099","volume":"8","author":"SM Shah","year":"2020","unstructured":"Shah, S. M. & Khan, R. A. Secondary use of electronic health record: opportunities and challenges. IEEE Access 8, 136947\u2013136965 (2020).","journal-title":"IEEE Access"},{"key":"1921_CR7","unstructured":"Congress, t. Health Insurance Portability and Accountability Act of 1996. (ASPE, 1996)."},{"key":"1921_CR8","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1038\/s41746-025-01429-0","volume":"8","author":"J Jonnagaddala","year":"2025","unstructured":"Jonnagaddala, J. & Wong, Z. S. Privacy preserving strategies for electronic health records in the era of large language models. NPJ Digit Med. 8, 34 (2025).","journal-title":"NPJ Digit Med"},{"key":"1921_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2666468","volume":"61","author":"JC Duchi","year":"2014","unstructured":"Duchi, J. C., Jordan, M. I. & Wainwright, M. J. Privacy aware learning. J. ACM 61, 1\u201357 (2014).","journal-title":"J. ACM"},{"key":"1921_CR10","doi-asserted-by":"crossref","unstructured":"Dwork, C., McSherry, F., Nissim, K. & Smith, A. In Theory of Cryptography: Third Theory of Cryptography Conference, 265\u2013284 (Springer, 2006).","DOI":"10.1007\/11681878_14"},{"key":"1921_CR11","doi-asserted-by":"publisher","first-page":"550","DOI":"10.1197\/jamia.M2444","volume":"14","author":"\u00d6 Uzuner","year":"2007","unstructured":"Uzuner, \u00d6, Luo, Y. & Szolovits, P. Evaluating the state-of-the-art in automatic de-identification. J. Am. Med. Inform. Assoc. 14, 550\u2013563 (2007).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1921_CR12","doi-asserted-by":"publisher","first-page":"S4","DOI":"10.1016\/j.jbi.2017.06.011","volume":"75","author":"A Stubbs","year":"2017","unstructured":"Stubbs, A., Filannino, M. & Uzuner, \u00d6 De-identification of psychiatric intake records: overview of 2016 CEGS N-GRID shared tasks Track 1. J. Biomed. Inform. 75, S4\u2013S18 (2017).","journal-title":"J. Biomed. Inform."},{"key":"1921_CR13","doi-asserted-by":"publisher","first-page":"22875","DOI":"10.1109\/ACCESS.2022.3148396","volume":"10","author":"C-K Wang","year":"2022","unstructured":"Wang, C.-K. et al. Principle-based approach for the de-identification of code-mixed electronic health records. IEEE Access 10, 22875\u201322885 (2022).","journal-title":"IEEE Access"},{"key":"1921_CR14","unstructured":"Associates, E. R. (EKOS Research Associates, 2007)."},{"key":"1921_CR15","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.ijmedinf.2012.01.005","volume":"81","author":"T King","year":"2012","unstructured":"King, T., Brankovic, L. & Gillard, P. Perspectives of Australian adults about protecting the privacy of their health information in statistical databases. Int. J. Med. Inform. 81, 279\u2013289 (2012).","journal-title":"Int. J. Med. Inform."},{"key":"1921_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-025-01721-z","volume":"8","author":"M Tran","year":"2025","unstructured":"Tran, M. et al. Situating governance and regulatory concerns for generative artificial intelligence and large language models in medical education. npj Digit. Med. 8, 1\u201310 (2025).","journal-title":"npj Digit. Med."},{"key":"1921_CR17","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1016\/j.jbi.2014.01.014","volume":"50","author":"L Deleger","year":"2014","unstructured":"Deleger, L. et al. Preparing an annotated gold standard corpus to share with extramural investigators for de-identification research. J. Biomed. Inform. 50, 173\u2013183 (2014).","journal-title":"J. Biomed. Inform."},{"key":"1921_CR18","unstructured":"Saeed, M., Lieu, C., Raber, G. & Mark, R. G. In Computers in Cardiology. 641\u2013644 (IEEE)."},{"key":"1921_CR19","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1007\/s10278-011-9422-x","volume":"25","author":"JB Freymann","year":"2012","unstructured":"Freymann, J. B., Kirby, J. S., Perry, J. H., Clunie, D. A. & Jaffe, C. C. Image data sharing for biomedical research\u2014meeting HIPAA requirements for de-identification. J. Digit. Imaging 25, 14\u201324 (2012).","journal-title":"J. Digit. Imaging"},{"key":"1921_CR20","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10916-019-1451-x","volume":"44","author":"S Kundu","year":"2020","unstructured":"Kundu, S. et al. De-identification of Radiomics data retaining longitudinal temporal information. J. Med. Syst. 44, 1\u201315 (2020).","journal-title":"J. Med. Syst."},{"key":"1921_CR21","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1186\/s44247-023-00035-y","volume":"1","author":"S Raza","year":"2023","unstructured":"Raza, S., Dolatabadi, E., Ondrusek, N., Rosella, L. & Schwartz, B. Discovering social determinants of health from case reports using natural language processing: algorithmic development and validation. BMC Digit. Health 1, 35 (2023).","journal-title":"BMC Digit. Health"},{"key":"1921_CR22","doi-asserted-by":"publisher","first-page":"1040","DOI":"10.1093\/jamia\/ocw001","volume":"23","author":"G Hripcsak","year":"2016","unstructured":"Hripcsak, G., Mirhaji, P., Low, A. F. & Malin, B. A. Preserving temporal relations in clinical data while maintaining privacy. J. Am. Med. Inform. Assoc. 23, 1040\u20131045 (2016).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1921_CR23","unstructured":"Chen, A., Jonnagaddala, J., Nekkantti, C. & Liaw, S.-T. In MEDINFO 2019: Health and Wellbeing e-Networks for All 70-73 (IOS Press, 2019)."},{"key":"1921_CR24","unstructured":"Houston, G. 1993)."},{"key":"1921_CR25","unstructured":"Mir, T. H. et al. Proc. International Workshop on Deidentification of Electronic Medical Record Notes (Springer Nature, 2024)."},{"key":"1921_CR26","unstructured":"Biderman, S. et al. In International Conference on Machine Learning. 2397-2430 (PMLR)."},{"key":"1921_CR27","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T. et al. Language models are few-shot learners. Adv. Neural Inf. Process. Syst. 33, 1877\u20131901 (2020).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1921_CR28","unstructured":"Hu, E. J. et al. In International Conference on Learning Representations."},{"key":"1921_CR29","unstructured":"Peters, M. E., Ruder, S. & Smith, N. A. In Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019). 7-14."},{"key":"1921_CR30","unstructured":"McKenzie, I. R. et al. Inverse scaling: when bigger isn\u2019t better. Trans. Mach. Learn. Res. (2023)."},{"key":"1921_CR31","doi-asserted-by":"publisher","first-page":"e63445","DOI":"10.2196\/63445","volume":"26","author":"A Ralevski","year":"2024","unstructured":"Ralevski, A. et al. Using large language models to abstract complex social determinants of health from original and deidentified medical notes: development and validation study. J. Med. Internet Res. 26, e63445 (2024).","journal-title":"J. Med. Internet Res."},{"key":"1921_CR32","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpbup.2021.100024","volume":"1","author":"NLV ALLA","year":"2021","unstructured":"ALLA, N. L. V. et al. Cohort selection for construction of a clinical natural language processing corpus. Comp. Methods Prog. Biomed. Update 1, 100024 (2021).","journal-title":"Comp. Methods Prog. Biomed. Update"},{"key":"1921_CR33","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-021-99554-9","volume":"11","author":"J Jonnagaddala","year":"2021","unstructured":"Jonnagaddala, J., Chen, A., Batongbacal, S. & Nekkantti, C. The OpenDeID corpus for patient de-identification. Sci. Rep. 11, 19973 (2021).","journal-title":"Sci. Rep."},{"key":"1921_CR34","doi-asserted-by":"crossref","unstructured":"Quinn, C. M. et al. Moving with the times: the health science alliance (HSA) Biobank, Pathway to Sustainability. Biomark Insights 16, 11772719211005745 (2021 Mar).","DOI":"10.1177\/11772719211005745"},{"key":"1921_CR35","unstructured":"Dai, H.-J. & Jonnagaddala, J. HSA Study PHI Corpus - Annotation Guidelines (HSA, 2023)."},{"key":"1921_CR36","doi-asserted-by":"publisher","first-page":"e48443","DOI":"10.2196\/48443","volume":"26","author":"Y-Q Lee","year":"2024","unstructured":"Lee, Y.-Q. et al. Unlocking the secrets behind advanced artificial intelligence language models in deidentifying chinese-english mixed clinical text: development and validation study. J. Med. Internet Res. 26, e48443 (2024).","journal-title":"J. Med. Internet Res."},{"key":"1921_CR37","unstructured":"Liu, J. et al. In Proceedings of Deep Learning Inside Out (DeeLIO 2022): the 3rd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures. 100-114."},{"key":"1921_CR38","doi-asserted-by":"crossref","unstructured":"Shi, W., Michael, J., Gururangan, S. & Zettlemoyer, L. kNN-Prompt: Nearest Neighbor Zero-Shot Inference. arXiv preprint arXiv:2205.13792 (2022).","DOI":"10.18653\/v1\/2022.emnlp-main.214"},{"key":"1921_CR39","unstructured":"Houlsby, N. et al. In International conference on machine learning. 2790-2799 (PMLR)."},{"key":"1921_CR40","unstructured":"Dutt, R., Ericsson, L., Sanchez, P., Tsaftaris, S. A. & Hospedales, T. M. In Medical Imaging with Deep Learning. 1-20."},{"key":"1921_CR41","unstructured":"Biderman, D. et al. Lora learns less and forgets less. arXiv preprint arXiv:2405.09673 (2024)."},{"key":"1921_CR42","first-page":"1","volume":"24","author":"A Pavao","year":"2023","unstructured":"Pavao, A. et al. Codalab competitions: an open source platform to organize scientific challenges. J. Mach. Learn. Res. 24, 1\u20136 (2023).","journal-title":"J. Mach. Learn. Res."},{"key":"1921_CR43","unstructured":"Huang, C.-L., Rianto, B., Sun, J.-T., Fu, Z.-X. & Lee, C.-H. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes (Springer Nature, 2024)."},{"key":"1921_CR44","unstructured":"Bai, J. et al. Qwen technical report. arXiv preprint arXiv:2309.16609 (2023)."},{"key":"1921_CR45","unstructured":"Zhao, Z.-R., Chou, P.-C., Mir, T. H. & Dai, H.-J. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 27-38 (Springer Nature, 2024)."},{"key":"1921_CR46","unstructured":"Anil, R. et al. Palm 2 technical report. arXiv preprint arXiv:2305.10403 (2023)."},{"key":"1921_CR47","unstructured":"Huang, P.-W. & Liu, T.-E. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 80-99 (Springer Nature, 2024)."},{"key":"1921_CR48","unstructured":"Li, Y., Wehbe, R. M., Ahmad, F. S., Wang, H. & Luo, Y. Clinical-longformer and clinical-bigbird: Transformers for long clinical sequences. arXiv preprint arXiv:2201.11838 (2022)."},{"key":"1921_CR49","unstructured":"Beltagy, I., Peters, M. E. & Cohan, A. Longformer: The long-document transformer. arXiv preprint arXiv:2004.05150 (2020)."},{"key":"1921_CR50","unstructured":"Wei, J. et al. In International Conference on Learning Representations."},{"key":"1921_CR51","unstructured":"Tseng, F.-P. et al. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 143-156 (Springer Nature, 2024)."},{"key":"1921_CR52","unstructured":"Beltagy, I., Peters, M. E. & Cohan, A. Longformer: The Long-Document Transformer. arXiv e-prints, arXiv: 2004.05150 (2020)."},{"key":"1921_CR53","unstructured":"Chao, C.-Y. & Lin, C.-W. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 39-50 (Springer Nature, 2024)."},{"key":"1921_CR54","unstructured":"Chiu, P.-S., Hou, B.-W., Chen, Y.-T. & Huang, S.-H. In International Workshop on Deidentification of Electronic Medical Record Notes. 202-212 (Springer)."},{"key":"1921_CR55","unstructured":"Gupta, S., Alla, N. L. V., Pan-chal, O., Witowski, J. & Jonnagaddala, J. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 100-113 (Springer Nature, 2024)."},{"key":"1921_CR56","doi-asserted-by":"publisher","first-page":"S11","DOI":"10.1016\/j.jbi.2015.06.007","volume":"58","author":"A Stubbs","year":"2015","unstructured":"Stubbs, A., Kotfila, C. & Uzuner, \u00d6 Automated systems for the de-identification of longitudinal clinical narratives: overview of 2014 i2b2\/UTHealth shared task Track 1. J. Biomed. Inform. 58, S11\u2013S19 (2015).","journal-title":"J. Biomed. Inform."},{"key":"1921_CR57","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2020","unstructured":"Lee, J. et al. BioBERT: a pre-trained biomedical language representation model for biomedical text mining. Bioinformatics 36, 1234\u20131240 (2020).","journal-title":"Bioinformatics"},{"key":"1921_CR58","unstructured":"Ru, Z.-J. et al. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 169-182 (Springer Nature, 2024)."},{"key":"1921_CR59","unstructured":"Chiu, P.-S., Hou, B.-W., Chen, Y.-T. & Huang, H.-H. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 195\u2013207 (Springer Nature, 2024)."},{"key":"1921_CR60","doi-asserted-by":"publisher","first-page":"875","DOI":"10.1109\/LSP.2024.3377590","volume":"31","author":"X Qiu","year":"2024","unstructured":"Qiu, X., Hao, T., Shi, S., Tan, X. & Xiong, Y. J. Chain-of-LoRA: enhancing the instruction fine-tuning performance of low-rank adaptation on diverse instruction set. IEEE Signal Process. Lett. 31, 875\u2013879 (2024).","journal-title":"IEEE Signal Process. Lett."},{"key":"1921_CR61","unstructured":"Jain, N. et al. In The Twelfth International Conference on Learning Representations."},{"key":"1921_CR62","unstructured":"Akani, E., Favre, B., Bechet, F. & Gemignani, R. In Proceedings of the 16th International Natural Language Generation Conference. 437\u2013442."},{"key":"1921_CR63","unstructured":"Li, Z.-E., Zheng, H.-Y., Mao, K.-C. & Wei, Z.-W. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 157\u2013168 (Springer Nature, 2024)."},{"key":"1921_CR64","unstructured":"Cho, Y.-C., Yang, Y.-J., Liu, Y.-D., Tsao, T.-S. & Li, M.-J. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 183\u2013194 (Springer Nature, 2024)."},{"key":"1921_CR65","unstructured":"Huang, T.-Y., Shih, J.-F., Hsieh, Y.-C. & Feng, H.-H. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 129\u2013142 (Springer Nature, 2024)."},{"key":"1921_CR66","unstructured":"Huang, Y.-Z., Peng, T.-C., Lin, H.-Y., Sy, E. & Chang, Y.-C. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 13\u201326 (Springer Nature, 2024)."},{"key":"1921_CR67","unstructured":"Huang, M.-S., Mau, B.-R., Lin, J.-H. & Chen, Y.-Z. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 114\u2013128 (Springer Nature, 2024)."},{"key":"1921_CR68","unstructured":"Huang, S.-X., Cheng, H.-A. & Li, Z.-H. In Proceedings of the 2024 International Workshop on Deidentification of Electronic Medical Record Notes 63\u201379 (Springer Nature, 2024)."},{"key":"1921_CR69","unstructured":"Wang, T. et al. In International Conference on Machine Learning. 22964-22984 (PMLR)."},{"key":"1921_CR70","doi-asserted-by":"publisher","first-page":"e48145","DOI":"10.2196\/48145","volume":"25","author":"J Liu","year":"2023","unstructured":"Liu, J. et al. OpenDeID pipeline for unstructured electronic health record text notes based on rules and transformers: deidentification algorithm development and validation study. J. Med. Internet Res. 25, e48145 (2023).","journal-title":"J. Med. Internet Res."},{"key":"1921_CR71","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01377-1","volume":"8","author":"I Lopez","year":"2025","unstructured":"Lopez, I. et al. Clinical entity augmented retrieval for clinical information extraction. npj Digit. Med. 8, 45 (2025).","journal-title":"npj Digit. Med."},{"key":"1921_CR72","unstructured":"Vertsel, A. & Rumiantsau, M. Hybrid llm\/rule-based approaches to business insights generation from structured data, (2024). arXiv preprint arXiv:2404.15604."},{"key":"1921_CR73","unstructured":"Liang, X. et al. Controllable text generation for large language models: a survey. arXiv preprint arXiv:2408.12599 (2024)."},{"key":"1921_CR74","unstructured":"Hoffmann, J. et al. Training compute-optimal large language models. Proceedings of the 36th International Conference on Neural Information Processing Systems, 30016-30030 (NIPS, 2022)."},{"key":"1921_CR75","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhang, B., Du, Q., Zhang, J. & Chu, D. A Survey on Data Selection for LLM Instruction Tuning. arXiv preprint arXiv:2402.05123 (2024).","DOI":"10.1613\/jair.1.17625"},{"key":"1921_CR76","unstructured":"Carlini, N. et al. In The Eleventh International Conference on Learning Representations. (OpenReview)."},{"key":"1921_CR77","unstructured":"Kandpal, N., Wallace, E. & Raffel, C. In International Conference on Machine Learning. 10697\u201310707 (PMLR)."},{"key":"1921_CR78","unstructured":"Sajith, A. & Kathala, K. C. R. Is Training Data Quality or Quantity More Impactful to Small Language Model Performance? arXiv preprint arXiv:2411.15821 (2024)."},{"key":"1921_CR79","unstructured":"Lee, K. et al. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 8424\u20138445."},{"key":"1921_CR80","first-page":"79155","volume":"36","author":"G Penedo","year":"2023","unstructured":"Penedo, G. et al. The refinedweb dataset for Falcon LLM: outperforming curated corpora with web data only. Adv. Neural Inf. Process. Syst. 36, 79155\u201379172 (2023).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1921_CR81","unstructured":"Hernandez, D. et al. Scaling laws and interpretability of learning from repeated data. arXiv preprint arXiv:2205.10487 (2022)."},{"key":"1921_CR82","doi-asserted-by":"publisher","first-page":"1249","DOI":"10.1162\/tacl_a_00425","volume":"9","author":"P Czarnowska","year":"2021","unstructured":"Czarnowska, P., Vyas, Y. & Shah, K. Quantifying social biases in NLP: a generalization and empirical comparison of extrinsic fairness metrics. Trans. Assoc. Comput. Linguist. 9, 1249\u20131267 (2021).","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"1921_CR83","doi-asserted-by":"publisher","first-page":"S150","DOI":"10.1016\/j.jbi.2015.09.013","volume":"58","author":"N-W Chang","year":"2015","unstructured":"Chang, N.-W. et al. A context-aware approach for progression tracking of medical concepts in electronic medical records. J. Biomed. Inform. 58, S150\u2013S157 (2015).","journal-title":"J. Biomed. Inform."},{"key":"1921_CR84","doi-asserted-by":"crossref","unstructured":"Rajbhandari, S., Rasley, J., Ruwase, O. & He, Y. In SC20: International Conference for High Performance Computing, Networking, Storage and Analysis. 1-16 (IEEE, 2020).","DOI":"10.1109\/SC41405.2020.00024"}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01921-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01921-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01921-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T21:09:51Z","timestamp":1757452191000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01921-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,13]]},"references-count":84,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1921"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-01921-7","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,13]]},"assertion":[{"value":"22 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"517"}}