{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T12:09:47Z","timestamp":1775218187964,"version":"3.50.1"},"reference-count":133,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"NSF","award":["SCH-2205289"],"award-info":[{"award-number":["SCH-2205289"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-01789-7","type":"journal-article","created":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T15:25:03Z","timestamp":1752247503000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["A perspective for adapting generalist AI to specialized medical AI applications and their challenges"],"prefix":"10.1038","volume":"8","author":[{"given":"Zifeng","family":"Wang","sequence":"first","affiliation":[]},{"given":"Hanyin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Benjamin","family":"Danek","sequence":"additional","affiliation":[]},{"given":"Ying","family":"Li","sequence":"additional","affiliation":[]},{"given":"Christina","family":"Mack","sequence":"additional","affiliation":[]},{"given":"Luk","family":"Arbuckle","sequence":"additional","affiliation":[]},{"given":"Devyani","family":"Biswal","sequence":"additional","affiliation":[]},{"given":"Hoifung","family":"Poon","sequence":"additional","affiliation":[]},{"given":"Yajuan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Pranav","family":"Rajpurkar","sequence":"additional","affiliation":[]},{"given":"Cao","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Jimeng","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,11]]},"reference":[{"key":"1789_CR1","unstructured":"Choi, E. et al. Retain: an interpretable predictive model for healthcare using reverse time attention mechanism. In Proc. Advances in Neural Information Processing Systems. Vol. 29 (Curran Associates, Inc., 2016)."},{"key":"1789_CR2","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wu, Z., Agarwal, D. & Sun, J. MedCLIP: Contrastive learning from unpaired medical images and text. In Proc. 2022 Conference on Empirical Methods in Natural Language Processing. 3876\u20133887 (Association for Computational Linguistics, 2022).","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"1789_CR3","doi-asserted-by":"crossref","unstructured":"Wang, Z. & Sun, J. PromptEHR: conditional electronic healthcare records generation with prompt learning. In Proc. Conference on Empirical Methods in Natural Language Processing. 2873\u20132885. (Association for Computational Linguistics, 2022).","DOI":"10.18653\/v1\/2022.emnlp-main.185"},{"key":"1789_CR4","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1038\/s41586-023-05881-4","volume":"616","author":"M Moor","year":"2023","unstructured":"Moor, M. et al. Foundation models for generalist medical artificial intelligence. Nature 616, 259\u2013265 (2023).","journal-title":"Nature"},{"key":"1789_CR5","unstructured":"Brown, T. et al. Language models are few-shot learners. In Proc. Advances in Neural Information Processing Systems. Vol. 33, 1877\u20131901 (Curran Associates, Inc., 2020)."},{"key":"1789_CR6","doi-asserted-by":"crossref","unstructured":"Wan, P. et al. Outpatient reception via collaboration between nurses and a large language model: a randomized controlled trial. Nat. Med. 30, 2878\u20132885 (2024).","DOI":"10.1038\/s41591-024-03148-7"},{"key":"1789_CR7","doi-asserted-by":"crossref","unstructured":"Yang, J. et al. Harnessing the power of LLMs in practice: a survey on ChatGPT and beyond. ACM Trans. Knowl. Discov. Data 18, 1\u201332 (2024).","DOI":"10.1145\/3649506"},{"key":"1789_CR8","doi-asserted-by":"publisher","first-page":"bbad493","DOI":"10.1093\/bib\/bbad493","volume":"25","author":"S Tian","year":"2024","unstructured":"Tian, S. et al. Opportunities and challenges for ChatGPT and large language models in biomedicine and health. Brief. Bioinform. 25, bbad493 (2024).","journal-title":"Brief. Bioinform."},{"key":"1789_CR9","doi-asserted-by":"publisher","first-page":"1161098","DOI":"10.3389\/fdgth.2023.1161098","volume":"5","author":"J Au Yeung","year":"2023","unstructured":"Au Yeung, J. et al. AI chatbots not yet ready for clinical use. Front. Digit. Health 5, 1161098 (2023).","journal-title":"Front. Digit. Health"},{"key":"1789_CR10","doi-asserted-by":"publisher","first-page":"842","DOI":"10.1001\/jama.2023.1044","volume":"329","author":"A Sarraju","year":"2023","unstructured":"Sarraju, A. et al. Appropriateness of cardiovascular disease prevention recommendations obtained from a popular online chat-based artificial intelligence model. JAMA 329, 842\u2013844 (2023).","journal-title":"JAMA"},{"key":"1789_CR11","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal, K. et al. Large language models encode clinical knowledge. Nature 620, 172\u2013180 (2023).","journal-title":"Nature"},{"key":"1789_CR12","unstructured":"Lewis, P. et al. Retrieval-augmented generation for knowledge-intensive NLP tasks. In Proc. Advances in Neural Information Processing Systems. Vol. 33, 9459\u20139474 (Curran Associates, Inc., 2020)."},{"key":"1789_CR13","doi-asserted-by":"crossref","unstructured":"Xiong, G., Jin, Q., Lu, Z. & Zhang, A. Benchmarking retrieval-augmented generation for medicine. In Proc. Findings of the Association for Computational Linguistics: ACL 2024 (eds Ku, L.-W., Martins, A. & Srikumar, V.) 6233\u20136251. https:\/\/aclanthology.org\/2024.findings-acl.372 (Association for Computational Linguistics, Bangkok, Thailand, 2024).","DOI":"10.18653\/v1\/2024.findings-acl.372"},{"key":"1789_CR14","unstructured":"Nakano, R. et al. WebGPT: Browser-assisted question-answering with human feedback. Preprint at arXiv https:\/\/arxiv.org\/abs\/2112.09332 (2021)."},{"key":"1789_CR15","unstructured":"Jin, Q. et al. AgentMD: empowering language agents for risk prediction with large-scale clinical tool learning. Preprint at arXiv https:\/\/arxiv.org\/abs\/2402.13225 (2024)."},{"key":"1789_CR16","first-page":"77","volume":"84","author":"H Nori","year":"2023","unstructured":"Nori, H. et al. Can generalist foundation models outcompete special-purpose tuning? case study in medicine. Medicine 84, 77\u20133 (2023).","journal-title":"Medicine"},{"key":"1789_CR17","unstructured":"Zaharia, M. et al. The shift from models to compound AI systems. https:\/\/bair.berkeley.edu\/blog\/2024\/02\/18\/compound-ai-systems\/ (2024)."},{"key":"1789_CR18","doi-asserted-by":"crossref","unstructured":"Lin, J., Xu, H., Wang, Z., Wang, S. & Sun, J. Panacea: a foundation model for clinical trial search, summarization, design, and recruitment. Preprint at https:\/\/arxiv.org\/abs\/2407.11007 (2024).","DOI":"10.1101\/2024.06.26.24309548"},{"key":"1789_CR19","unstructured":"Wang, H. et al. Towards adapting open-source large language models for expert-level clinical note generation. Preprint at arXiv https:\/\/arxiv.org\/abs\/2405.00715 (2024)."},{"key":"1789_CR20","unstructured":"Khattab, O. et al. DSPy: compiling declarative language model calls into self-improving pipelines. In Proc. R0-FoMo: Robustness of Few-shot and Zero-shot Learning in Large Foundation Models (2023)."},{"key":"1789_CR21","doi-asserted-by":"publisher","first-page":"570","DOI":"10.1038\/s41586-023-06792-0","volume":"624","author":"DA Boiko","year":"2023","unstructured":"Boiko, D. A., MacKnight, R., Kline, B. & Gomes, G. Autonomous chemical research with large language models. Nature 624, 570\u2013578 (2023).","journal-title":"Nature"},{"key":"1789_CR22","unstructured":"Touvron, H. et al. Llama 2: Open foundation and fine-tuned chat models. Preprint at arXiv https:\/\/arxiv.org\/abs\/2307.09288 (2023)."},{"key":"1789_CR23","unstructured":"Chen, Z. et al. MEDITRON-70b: scaling medical pretraining for large language models. Preprint at arXiv https:\/\/arxiv.org\/abs\/2311.16079 (2023)."},{"key":"1789_CR24","unstructured":"Jiang, A. Q. et al. Mixtral of experts. Preprint at arXiv https:\/\/arxiv.org\/abs\/2401.04088 (2024)."},{"key":"1789_CR25","unstructured":"Wei, J. et al. Finetuned language models are zero-shot learners. In Proc. International Conference on Learning Representations. OpenReview.net (2021)."},{"key":"1789_CR26","doi-asserted-by":"crossref","unstructured":"Li, X. L. & Liang, P. Prefix-Tuning: optimizing continuous prompts for generation. In Proc. 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing. Vol. Long Papers, 4582\u20134597 (Association for Computational Linguistics (ACL), 2021).","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"1789_CR27","unstructured":"Hu, E. J. et al. LoRA: low-rank adaptation of large language models. In Proc. International Conference on Learning Representations. OpenReview.net (2023)."},{"key":"1789_CR28","unstructured":"Ouyang, L. et al. Training language models to follow instructions with human feedback. In Proc. Advances in Neural Information Processing Systems. Vol. 35, 27730\u201327744 (Curran Associates, Inc., 2022)."},{"key":"1789_CR29","unstructured":"Wei, J. et al. Chain-of-thought prompting elicits reasoning in large language models. In Proc. Advances in Neural Information Processing Systems. Vol. 35, 24824\u201324837 (Curran Associates, Inc., 2022)."},{"key":"1789_CR30","unstructured":"Wang, X. et al. Self-consistency improves chain of thought reasoning in language models. In Proc. Eleventh International Conference on Learning Representations. OpenReview.net (2022)."},{"key":"1789_CR31","unstructured":"Chen, L. et al. Are more LLM calls all you need? towards scaling laws of compound inference systems. Preprint at arXiv https:\/\/arxiv.org\/abs\/2403.02419 (2024)."},{"key":"1789_CR32","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-53081-z","volume":"15","author":"Q Jin","year":"2024","unstructured":"Jin, Q. et al. Matching patients to clinical trials with large language models. Nat. Commun. 15, 9074 (2024).","journal-title":"Nat. Commun."},{"key":"1789_CR33","doi-asserted-by":"crossref","unstructured":"Shin, T., Razeghi, Y., Logan IV, R. L., Wallace, E. & Singh, S. AutoPrompt: eliciting knowledge from language models with automatically generated prompts. In Proc. 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP). 4222\u20134235 (Association for Computational Linguistics (ACL), 2020).","DOI":"10.18653\/v1\/2020.emnlp-main.346"},{"key":"1789_CR34","unstructured":"Cheng, J. et al. Black-box prompt optimization: aligning large language models without model training. Preprint at arXiv https:\/\/arxiv.org\/abs\/2311.04155 (2023)."},{"key":"1789_CR35","unstructured":"Wen, Y., Wang, Z. & Sun, J. MindMap: knowledge graph prompting sparks graph of thoughts in large language models. Preprint at arXiv https:\/\/arxiv.org\/abs\/2308.09729 (2023)."},{"key":"1789_CR36","doi-asserted-by":"publisher","first-page":"AIoa2300068","DOI":"10.1056\/AIoa2300068","volume":"1","author":"C Zakka","year":"2024","unstructured":"Zakka, C. et al. Almanac-retrieval-augmented language models for clinical medicine. NEJM AI 1, AIoa2300068 (2024).","journal-title":"NEJM AI"},{"key":"1789_CR37","unstructured":"Arasteh, S. T. et al. RadioRAG: factual large language models for enhanced diagnostics in radiology using dynamic retrieval augmented generation. Preprint at arXiv https:\/\/arxiv.org\/abs\/2407.15621 (2024)."},{"key":"1789_CR38","doi-asserted-by":"crossref","unstructured":"Wang, Z. & Sun, J. Trial2vec: Zero-shot clinical trial document similarity search using self-supervision. In Proc. Findings of the Association for Computational Linguistics: EMNLP 2022 6377\u20136390 (2022).","DOI":"10.18653\/v1\/2022.findings-emnlp.476"},{"key":"1789_CR39","doi-asserted-by":"publisher","first-page":"btad651","DOI":"10.1093\/bioinformatics\/btad651","volume":"39","author":"Q Jin","year":"2023","unstructured":"Jin, Q. et al. MedCPT: contrastive pre-trained transformers with large-scale PubMed search logs for zero-shot biomedical information retrieval. Bioinformatics 39, btad651 (2023).","journal-title":"Bioinformatics"},{"key":"1789_CR40","doi-asserted-by":"crossref","unstructured":"Wu, T., Terry, M. & Cai, C. J. AI chains: Transparent and controllable human-ai interaction by chaining large language model prompts. In Proc. 2022 CHI Conference on Human Factors in Computing Systems 1\u201322 (2022).","DOI":"10.1145\/3491102.3517582"},{"key":"1789_CR41","unstructured":"Yao, S. et al. ReAct: synergizing reasoning and acting in language models. In Proc. Eleventh International Conference on Learning Representations. OpenReview.net (2022)."},{"key":"1789_CR42","unstructured":"Wang, Z. et al. Accelerating clinical evidence synthesis with large language models. https:\/\/arxiv.org\/abs\/2406.17755 (2024)."},{"key":"1789_CR43","unstructured":"Wang, Z., Danek, B., Yang, Z., Chen, Z. & Sun, J. Can large language models replace data scientists in clinical research? Preprint at arXiv https:\/\/arxiv.org\/abs\/2410.21591 (2024)."},{"key":"1789_CR44","unstructured":"Asai, A. et al. OpenScholar: synthesizing scientific literature with retrieval-augmented language models. Preprint at Arxiv https:\/\/arxiv.org\/abs\/2411.14199 (2024)."},{"key":"1789_CR45","unstructured":"Introducing deep research. https:\/\/openai.com\/index\/introducing-deep-research\/ (2025)."},{"key":"1789_CR46","unstructured":"Gravitas, S. Autogpt. https:\/\/agpt.co (2023)."},{"key":"1789_CR47","unstructured":"Wu, Q. et al. AutoGen: enabling next-gen LLM applications via multi-agent conversation framework. Preprint at arXiv https:\/\/arxiv.org\/abs\/2308.08155 (2023)."},{"key":"1789_CR48","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-45879-8","volume":"15","author":"S Tayebi Arasteh","year":"2024","unstructured":"Tayebi Arasteh, S. et al. Large language models streamline automated machine learning for clinical studies. Nat. Commun. 15, 1603 (2024).","journal-title":"Nat. Commun."},{"key":"1789_CR49","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1038\/s41586-023-06747-5","volume":"625","author":"TH Trinh","year":"2024","unstructured":"Trinh, T. H., Wu, Y., Le, Q. V., He, H. & Luong, T. Solving olympiad geometry without human demonstrations. Nature 625, 476\u2013482 (2024).","journal-title":"Nature"},{"key":"1789_CR50","unstructured":"Gu, Y. et al. Middleware for LLMs: tools are instrumental for language agents in complex environments. Preprint at arXiv https:\/\/arxiv.org\/abs\/2402.14672 (2024)."},{"key":"1789_CR51","doi-asserted-by":"crossref","unstructured":"Gao, T., Yen, H., Yu, J. & Chen, D. Enabling large language models to generate text with citations. In Proc. 2023 Conference on Empirical Methods in Natural Language Processing 6465\u20136488 (2023).","DOI":"10.18653\/v1\/2023.emnlp-main.398"},{"key":"1789_CR52","unstructured":"Kim, Y. et al. MDAgents: an adaptive collaboration of LLMs for medical decision-making. In Proc. Advances in Neural Information Processing Systems. Vol. 37, 79410\u201379452 (2025)."},{"key":"1789_CR53","unstructured":"Mukherjee, S. et al. Polaris: a safety-focused llm constellation architecture for healthcare. Preprint at arXiv https:\/\/arxiv.org\/abs\/2403.13313 (2024)."},{"key":"1789_CR54","unstructured":"Huang, K. et al. Automated hypothesis validation with agentic sequential falsifications. Preprint at arXiv https:\/\/arxiv.org\/abs\/2502.09858 (2025)."},{"key":"1789_CR55","doi-asserted-by":"crossref","unstructured":"Swanson, K., Wu, W., Bulaong, N. L., Pak, J. E. & Zou, J. The virtual lab: AI agents design new SARS-CoV-2 nanobodies with experimental validation. Preprint at bioRxiv https:\/\/www.biorxiv.org\/content\/10.1101\/2024.11.11.623004v1 (2024).","DOI":"10.1101\/2024.11.11.623004"},{"key":"1789_CR56","doi-asserted-by":"crossref","unstructured":"Semnani, S., Yao, V., Zhang, H. C. & Lam, M. WikiChat: Stopping the hallucination of large language model chatbots by few-shot grounding on Wikipedia. In Proc. 2023 Conference on Empirical Methods in Natural Language Processing (2023).","DOI":"10.18653\/v1\/2023.findings-emnlp.157"},{"key":"1789_CR57","unstructured":"Chen, L., Zaharia, M. & Zou, J. FrugalGPT: How to use large language models while reducing cost and improving performance. Preprint at arXiv https:\/\/arxiv.org\/abs\/2305.05176 (2023)."},{"key":"1789_CR58","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-023-00896-7","volume":"6","author":"L Tang","year":"2023","unstructured":"Tang, L. et al. Evaluating large language models on medical evidence summarization. NPJ Digit. Med. 6, 158 (2023).","journal-title":"NPJ Digit. Med."},{"key":"1789_CR59","doi-asserted-by":"crossref","unstructured":"Bhaskar, A., Fabbri, A. & Durrett, G. Prompted opinion summarization with GPT-3.5. In Proc. Findings of the Association for Computational Linguistics: ACL 2023 9282\u20139300 (2023).","DOI":"10.18653\/v1\/2023.findings-acl.591"},{"key":"1789_CR60","doi-asserted-by":"publisher","first-page":"104649","DOI":"10.1016\/j.jbi.2024.104649","volume":"154","author":"J Park","year":"2024","unstructured":"Park, J. et al. Criteria2query 3.0: leveraging generative large language models for clinical trial eligibility query generation. J. Biomed. Inform. 154, 104649 (2024).","journal-title":"J. Biomed. Inform."},{"key":"1789_CR61","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1093\/jamia\/ocad218","volume":"31","author":"S Datta","year":"2024","unstructured":"Datta, S. et al. AutoCriteria: a generalizable clinical trial eligibility criteria extraction system powered by large language models. J. Am. Med. Inform. Assoc. 31, 375\u2013385 (2024).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1789_CR62","doi-asserted-by":"crossref","unstructured":"Agrawal, M., Hegselmann, S., Lang, H., Kim, Y. & Sontag, D. Large language models are few-shot clinical information extractors. In Proc. Conference on Empirical Methods in Natural Language Processing 1998\u20132022 (2022).","DOI":"10.18653\/v1\/2022.emnlp-main.130"},{"key":"1789_CR63","doi-asserted-by":"crossref","unstructured":"Jeong, D., Garg, S., Lipton, Z. C. & Oberst, M. Medical adaptation of large language and vision-language models: Are we making progress? In Proc. 2024 Conference on Empirical Methods in Natural Language Processing, 12143\u201312170 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.677"},{"key":"1789_CR64","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01239-w","volume":"7","author":"G Zhang","year":"2024","unstructured":"Zhang, G. et al. Closing the gap between open source and commercial large language models for medical evidence summarization. npj Digit. Med. 7, 239 (2024).","journal-title":"npj Digit. Med."},{"key":"1789_CR65","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01315-1","volume":"7","author":"E Klang","year":"2024","unstructured":"Klang, E. et al. A strategy for cost-effective large language model use at health system-scale. npj Digit. Med. 7, 320 (2024).","journal-title":"npj Digit. Med."},{"key":"1789_CR66","unstructured":"OpenAI. Function calling and other api updates. https:\/\/openai.com\/index\/function-calling-and-other-api-updates\/ (2023)."},{"key":"1789_CR67","unstructured":"Chase, H. LangChain. https:\/\/github.com\/langchain-ai\/langchain (2022)."},{"key":"1789_CR68","doi-asserted-by":"publisher","first-page":"btae075","DOI":"10.1093\/bioinformatics\/btae075","volume":"40","author":"Q Jin","year":"2024","unstructured":"Jin, Q., Yang, Y., Chen, Q. & Lu, Z. GeneGPT: augmenting large language models with domain tools for improved access to biomedical information. Bioinformatics 40, btae075 (2024).","journal-title":"Bioinformatics"},{"key":"1789_CR69","doi-asserted-by":"publisher","first-page":"D23","DOI":"10.1093\/nar\/gky1069","volume":"47","author":"EW Sayers","year":"2019","unstructured":"Sayers, E. W. et al. Database resources of the National Center for Biotechnology Information. Nucleic Acids Res. 47, D23 (2019).","journal-title":"Nucleic Acids Res."},{"key":"1789_CR70","unstructured":"Lin, C-Y. ROUGE: a package for automatic evaluation of summaries. In Proc. Text Summarization Branches Out. 74\u201381 (Association for Computational Linguistics, 2004)."},{"key":"1789_CR71","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K. Q. & Artzi, Y. BERTscore: evaluating text generation with BERT. In Proc. International Conference on Learning Representations. OpenReview.net (2019)."},{"key":"1789_CR72","doi-asserted-by":"crossref","unstructured":"Van Veen, D. et al. Adapted large language models can outperform medical experts in clinical text summarization. Nat. Med. 30, 1134-1142 (2024).","DOI":"10.1038\/s41591-024-02855-5"},{"key":"1789_CR73","unstructured":"Abacha, A. B., Yim, W.-w., Adams, G., Snider, N. & Yetisgen-Yildiz, M. Overview of the MEDIQA-Chat 2023 shared tasks on the summarization & generation of doctor-patient conversations. In Proc. 5th Clinical Natural Language Processing Workshop. 503\u2013513 (Association for Computational Linguistics, 2023)."},{"key":"1789_CR74","unstructured":"Nelson, H. Epic announces ambient clinical documentation EHR integration. Accessed 5 September 2024, https:\/\/www.techtarget.com\/searchhealthit\/news\/366564355\/Epic-Announces-Ambient-Clinical-Documentation-EHR-Integration. Accessed on Jun 2025 (2023)."},{"key":"1789_CR75","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02487-3","volume":"10","author":"W-w Yim","year":"2023","unstructured":"Yim, W.-w. et al. Aci-bench: a novel ambient clinical intelligence dataset for benchmarking automatic visit note generation. Sci. Data 10, 586 (2023).","journal-title":"Sci. Data"},{"key":"1789_CR76","doi-asserted-by":"publisher","first-page":"AIdbp2300040","DOI":"10.1056\/AIdbp2300040","volume":"1","author":"A Soroush","year":"2024","unstructured":"Soroush, A. et al. Large language models are poor medical coders-benchmarking of medical code querying. NEJM AI 1, AIdbp2300040 (2024).","journal-title":"NEJM AI"},{"key":"1789_CR77","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-023-00989-3","volume":"7","author":"H Wang","year":"2024","unstructured":"Wang, H., Gao, C., Dantona, C., Hull, B. & Sun, J. Drg-llama: tuning llama model to predict diagnosis-related group for hospitalized patients. npj Digit. Med. 7, 16 (2024).","journal-title":"npj Digit. Med."},{"key":"1789_CR78","unstructured":"Topaz, M., Shafran-Topaz, L. & Bowles, K. H. ICD-9 to ICD-10: evolution, revolution, and current debates in the united states. Perspect. Health Inf. Manag. 10, 1d (2013)."},{"key":"1789_CR79","doi-asserted-by":"crossref","unstructured":"Peggy, D. CPT\u00ae Codes: What Are They, Why Are They Necessary, and How Are They Developed? Advances in Wound Care 2, 583\u2013587 (2013).","DOI":"10.1089\/wound.2013.0483"},{"key":"1789_CR80","unstructured":"ICD-10-CM\/PCS MS-DRG V40.1 Definitions Manual. Accessed June 23, 2025. https:\/\/www.cms.gov\/icd10m\/fy2023-version40.1-fullcode-cms\/fullcode_cms\/P0006.html."},{"key":"1789_CR81","unstructured":"Wornow, M. et al. Zero-shot clinical trial patient matching with LLMs. Preprint at arXiv https:\/\/arxiv.org\/abs\/2402.05125 (2024)."},{"key":"1789_CR82","doi-asserted-by":"crossref","unstructured":"Nievas, M., Basu, A., Wang, Y. & Singh, H. Distilling large language models for matching patients to clinical trials. J. Am. Med. Inform. Assoc. 31, 1953\u20131963 (2024).","DOI":"10.1093\/jamia\/ocae073"},{"key":"1789_CR83","doi-asserted-by":"crossref","unstructured":"Unlu, O. et al. Retrieval-augmented generation\u2013enabled GPT-4 for clinical trial screening. NEJM AI 1, AIoa2400181 (2024).","DOI":"10.1056\/AIoa2400181"},{"key":"1789_CR84","unstructured":"Wong, C. et al. Scaling clinical trial matching using large language models: a case study in oncology. In Proc. Machine Learning for Healthcare Conference. 846\u2013862 (Proceedings of Machine Learning Research (PMLR), 2023)."},{"key":"1789_CR85","doi-asserted-by":"crossref","unstructured":"Shaib, C. et al. Summarizing, simplifying, and synthesizing medical evidence using GPT-3 (with varying success). In Proc. Annual Meeting Of The Association For Computational Linguistics. (Association for Computational Linguistics, 2023).","DOI":"10.18653\/v1\/2023.acl-short.119"},{"key":"1789_CR86","doi-asserted-by":"crossref","unstructured":"Yun, H., Marshall, I., Trikalinos, T. & Wallace, B. Appraising the potential uses and harms of LLMs for medical systematic reviews. In Proc. Conference on Empirical Methods in Natural Language Processing (Association for Computational Linguistics, 2023).","DOI":"10.18653\/v1\/2023.emnlp-main.626"},{"key":"1789_CR87","doi-asserted-by":"crossref","unstructured":"Page, M. J. et al. The PRISMA 2020 Statement: An Updated Guideline for Reporting Systematic Reviews. Syst. Rev. 10, 89 (2021).","DOI":"10.1186\/s13643-021-01626-4"},{"key":"1789_CR88","unstructured":"Wang, Z. et al. A foundation model for human-ai collaboration in medical literature mining. Preprint at arXiv https:\/\/arxiv.org\/abs\/2501.16255 (2025)."},{"key":"1789_CR89","doi-asserted-by":"crossref","unstructured":"Wang, S., Harrisen, S., Bevan, K. & Guido Z. Can ChatGPT Write a Good Boolean Query for Systematic Review Literature Search? In Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval. 1426\u201336 (NewYork, NY, USA: ACM, 2023).","DOI":"10.1145\/3539618.3591703"},{"key":"1789_CR90","unstructured":"National Institute of Standards and Technology. NIST Privacy Framework: A Tool for Improving Privacy through Enterprise Risk Management, Version 1.0. Technical Report (National Institute of Standards and Technology, 2020)."},{"key":"1789_CR91","unstructured":"National Institute of Standards and Technology. The NIST Cybersecurity Framework (CSF) 2.0. Technical Report NIST CSWP 29 (U.S. Department of Commerce, 2024)."},{"key":"1789_CR92","doi-asserted-by":"publisher","unstructured":"Quentin, C., Steinhagen, D., Francis, M. & Streff, K. Towards a Triad for Data Privacy. In Proceedings of the Annual Hawaii International Conference on System Sciences. (Hawaii International Conference on System Sciences (2020) https:\/\/doi.org\/10.24251\/hicss.2020.535.","DOI":"10.24251\/hicss.2020.535"},{"key":"1789_CR93","doi-asserted-by":"crossref","unstructured":"Tabassi, E. Artificial Intelligence Risk Management Framework (AI RMF 1.0): AI RMF (1.0). Technical Report (2023).","DOI":"10.6028\/NIST.AI.100-1"},{"key":"1789_CR94","unstructured":"Nori, H. et al. Can generalist foundation models outcompete special-purpose tuning? Case study in medicine. Preprint at arXiv [cs.CL] https:\/\/arxiv.org\/abs\/2311.16452 (2023)."},{"key":"1789_CR95","unstructured":"Chaves, J. M. Z. et al. Towards a clinically accessible radiology foundation model: open-access and lightweight, with automated evaluation. Preprint at arXiv [cs.CL] https:\/\/arxiv.org\/abs\/2403.08002 (2024)."},{"key":"1789_CR96","unstructured":"Zhang, S. et al. Large-scale domain-specific pretraining for biomedical vision-language processing. Preprint at arXiv https:\/\/www.microsoft.com\/en-us\/research\/publication\/large-scale-domain-specific-pretraining-for-biomedical-vision-language-processing\/ (2023)."},{"key":"1789_CR97","doi-asserted-by":"publisher","first-page":"2307","DOI":"10.1038\/s41591-023-02504-3","volume":"29","author":"Z Huang","year":"2023","unstructured":"Huang, Z., Bianchi, F., Yuksekgonul, M., Montine, T. J. & Zou, J. A visual-language foundation model for pathology image analysis using medical Twitter. Nat. Med. 29, 2307\u20132316 (2023).","journal-title":"Nat. Med."},{"key":"1789_CR98","doi-asserted-by":"publisher","first-page":"863","DOI":"10.1038\/s41591-024-02856-4","volume":"30","author":"MY Lu","year":"2024","unstructured":"Lu, M. Y. et al. A visual-language foundation model for computational pathology. Nat. Med. 30, 863\u2013874 (2024).","journal-title":"Nat. Med."},{"key":"1789_CR99","doi-asserted-by":"crossref","unstructured":"Li, C. et al. LLaVA\u2011Med: Training a Large Language\u2011and\u2011Vision Assistant for Biomedicine in One Day. In Advances in Neural Information Processing Systems 36 (NeurIPS 2023) (Datasets & Benchmarks Spotlight) (2023).","DOI":"10.32388\/VLXB6M"},{"key":"1789_CR100","unstructured":"Gu, Y. et al. BiomedJourney: Counterfactual biomedical image generation by instruction-learning from multimodal patient journeys. Preprint at arXiv [cs.CV] https:\/\/arxiv.org\/abs\/2310.10765 (2023)."},{"key":"1789_CR101","doi-asserted-by":"crossref","unstructured":"Bluethgen, C. et al. A vision-language foundation model for the generation of realistic chest X-ray images. Nat. Biomed. Eng. 9, 494\u2013506 (2025) .","DOI":"10.1038\/s41551-024-01246-y"},{"key":"1789_CR102","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1038\/s41586-024-07441-w","volume":"630","author":"H Xu","year":"2024","unstructured":"Xu, H. et al. A whole-slide foundation model for digital pathology from real-world data. Nature 630, 181\u2013188 (2024).","journal-title":"Nature"},{"key":"1789_CR103","doi-asserted-by":"crossref","unstructured":"Lei, H. et al. A Survey on Hallucination in Large Language Models: Principles, Taxonomy, Challenges, and Open Questions. ACM Transactions on Information Systems. 43, 1\u201355 (2025).","DOI":"10.1145\/3703155"},{"key":"1789_CR104","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.1038\/s41591-023-02448-8","volume":"29","author":"AJ Thirunavukarasu","year":"2023","unstructured":"Thirunavukarasu, A. J. et al. Large language models in medicine. Nat. Med. 29, 1930\u20131940 (2023).","journal-title":"Nat. Med."},{"key":"1789_CR105","doi-asserted-by":"crossref","unstructured":"Kevin, W. et al. An Automated Framework for Assessing How Well LLMs Cite Relevant Medical References. Nat. Commun. 16, 3615 (2025).","DOI":"10.1038\/s41467-025-58551-6"},{"key":"1789_CR106","unstructured":"Sams, C. M., Fanous, A. H. & Daneshjou, R. Human-artificial intelligence interaction research is crucial for medical artificial intelligence implementation. J Investig. Dermatol. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0022202X24019766 (2024)."},{"key":"1789_CR107","unstructured":"Carlini, N. et al. Extracting training data from large language models. In Proc. 30th USENIX Security Symposium (USENIX Security 21). 2633\u20132650 (USENIX Association, 2021)."},{"key":"1789_CR108","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-41093-0","volume":"14","author":"B Theodorou","year":"2023","unstructured":"Theodorou, B., Xiao, C. & Sun, J. Synthesize high-dimensional longitudinal electronic health records via hierarchical autoregressive language model. Nat. Commun. 14, 5305 (2023).","journal-title":"Nat. Commun."},{"key":"1789_CR109","doi-asserted-by":"publisher","unstructured":"Das, T., Wang, Z. & Sun, J. TWIN: Personalized clinical trial digital twin generation. In Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining. 402\u2013413 (Association for Computing Machinery (ACM), 2023).  https:\/\/doi.org\/10.1145\/3580305.3599370.","DOI":"10.1145\/3580305.3599370"},{"key":"1789_CR110","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1016\/j.ins.2021.12.018","volume":"586","author":"A Torfi","year":"2022","unstructured":"Torfi, A., Fox, E. A. & Reddy, C. K. Differentially private synthetic medical data generation using convolutional GANs. Inf. Sci. 586, 485\u2013500 (2022).","journal-title":"Inf. Sci."},{"key":"1789_CR111","unstructured":"Schaeffer, R., Miranda, B. & Koyejo, S. Are emergent abilities of large language models a mirage? In Proc. Advances in Neural Information Processing Systems. Vol. 36 (Curran Associates, Inc., 2024)."},{"key":"1789_CR112","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3529755","volume":"55","author":"JE Zini","year":"2022","unstructured":"Zini, J. E. & Awad, M. On the explainability of natural language processing deep models. ACM Comput. Surv. 55, 1\u201331 (2022).","journal-title":"ACM Comput. Surv."},{"key":"1789_CR113","unstructured":"Chen, W., Ma, X., Wang, X. & Cohen, W. W. Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. Trans. Mach. Learn. Res. https:\/\/openreview.net\/forum?id=YfZ4ZPt8zd (2023)."},{"key":"1789_CR114","unstructured":"Bereska, L. & Gavves, E. Mechanistic interpretability for AI safety\u2014a review. Preprint at arXiv https:\/\/arxiv.org\/abs\/2404.14082 (2024)."},{"key":"1789_CR115","doi-asserted-by":"crossref","unstructured":"Li, X. & Zhang, T. An exploration on artificial intelligence application: From security, privacy and ethic perspective. In Proc. IEEE International Conference on Cloud Computing and Big Data Analysis 416\u2013420 (IEEE, 2017).","DOI":"10.1109\/ICCCBDA.2017.7951949"},{"key":"1789_CR116","first-page":"AIoa2300030","volume":"1","author":"K Wu","year":"2023","unstructured":"Wu, K. et al. Characterizing the clinical adoption of medical AI devices through us insurance claims. NEJM AI 1, AIoa2300030 (2023).","journal-title":"NEJM AI"},{"key":"1789_CR117","doi-asserted-by":"crossref","unstructured":"Suzanne, B. AI in Health: Keeping the Human in the Loop. Journal of the American Medical Informatics Association: JAMIA 30, 1225\u201326 (2023).","DOI":"10.1093\/jamia\/ocad091"},{"key":"1789_CR118","unstructured":"Singhvi, A. et al. DSPy assertions: Computational constraints for self-refining language model pipelines. Preprint at arXiv https:\/\/arxiv.org\/abs\/2312.13382 (2023)."},{"key":"1789_CR119","unstructured":"Chase, H. Langsmith. https:\/\/www.langchain.com\/langsmith (2024)."},{"key":"1789_CR120","doi-asserted-by":"publisher","unstructured":"Lingjiao, L., Zaharia, M. & Zou, J. How Is ChatGPT\u2019s Behavior Changing Over Time? Harvard Data Science Review. 6 (2024) https:\/\/doi.org\/10.1162\/99608f92.5317da47.","DOI":"10.1162\/99608f92.5317da47"},{"key":"1789_CR121","unstructured":"AI, C. DeepEval. https:\/\/github.com\/confident-ai\/deepeval (2023)."},{"key":"1789_CR122","doi-asserted-by":"crossref","unstructured":"Mert, Y. Optimizing Generative AI by Backpropagating Language Model Feedback. Nature 639, 609\u201316 (2025).","DOI":"10.1038\/s41586-025-08661-4"},{"key":"1789_CR123","doi-asserted-by":"publisher","first-page":"595","DOI":"10.1038\/s41591-023-02766-x","volume":"30","author":"J Habicht","year":"2024","unstructured":"Habicht, J. et al. Closing the accessibility gap to mental health treatment with a personalized self-referral chatbot. Nat. Med. 30, 595\u2013602 (2024).","journal-title":"Nat. Med."},{"key":"1789_CR124","doi-asserted-by":"crossref","unstructured":"Pais, C. et al. Large language models for preventing medication direction errors in online pharmacies. Nat. Med. 30, 1574\u20131582 (2024).","DOI":"10.1038\/s41591-024-02933-8"},{"key":"1789_CR125","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01185-7","volume":"7","author":"Q Jin","year":"2024","unstructured":"Jin, Q. et al. Hidden flaws behind expert-level accuracy of multimodal GPT-4 vision in medicine. npj Digit. Med. 7, 190 (2024).","journal-title":"npj Digit. Med."},{"key":"1789_CR126","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-50043-3","volume":"15","author":"J Zhou","year":"2024","unstructured":"Zhou, J. et al. Pre-trained multimodal large language model enhances dermatological diagnosis using SKINGPT-4. Nat. Commun. 15, 5649 (2024).","journal-title":"Nat. Commun."},{"key":"1789_CR127","unstructured":"Zhang, K. et al. A generalist vision\u2013language foundation model for diverse biomedical tasks. Nat. Med. 1\u201313 (2024)."},{"key":"1789_CR128","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01091-y","volume":"7","author":"S Kresevic","year":"2024","unstructured":"Kresevic, S. et al. Optimization of hepatological clinical guidelines interpretation by large language models: a retrieval augmented generation-based framework. NPJ Digit. Med. 7, 102 (2024).","journal-title":"NPJ Digit. Med."},{"key":"1789_CR129","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1038\/s41586-023-06160-y","volume":"619","author":"LY Jiang","year":"2023","unstructured":"Jiang, L. Y. et al. Health system-scale language models are all-purpose prediction engines. Nature 619, 357\u2013362 (2023).","journal-title":"Nature"},{"key":"1789_CR130","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-46411-8","volume":"15","author":"S Sandmann","year":"2024","unstructured":"Sandmann, S., Riepenhausen, S., Plagwitz, L. & Varghese, J. Systematic analysis of ChatGPT, Google Search and Llama 2 for clinical decision support tasks. Nat. Commun. 15, 2050 (2024).","journal-title":"Nat. Commun."},{"key":"1789_CR131","doi-asserted-by":"publisher","first-page":"btae163","DOI":"10.1093\/bioinformatics\/btae163","volume":"40","author":"VK Keloth","year":"2024","unstructured":"Keloth, V. K. et al. Advancing entity recognition in biomedicine via instruction tuning of large language models. Bioinformatics 40, btae163 (2024).","journal-title":"Bioinformatics"},{"key":"1789_CR132","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-024-01079-8","volume":"7","author":"J Huang","year":"2024","unstructured":"Huang, J. et al. A critical assessment of using ChatGPT for extracting structured data from clinical notes. npj Digit. Med. 7, 106 (2024).","journal-title":"npj Digit. Med."},{"key":"1789_CR133","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-50903-y","volume":"15","author":"H He","year":"2024","unstructured":"He, H. et al. De novo generation of SARS-CoV-2 antibody CDRH3 with a pre-trained generative large language model. Nat. Commun. 15, 6867 (2024).","journal-title":"Nat. Commun."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01789-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01789-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01789-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T05:55:45Z","timestamp":1757224545000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-01789-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,11]]},"references-count":133,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1789"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-01789-7","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,11]]},"assertion":[{"value":"25 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"429"}}