{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T15:31:01Z","timestamp":1775662261081,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T00:00:00Z","timestamp":1737936000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T00:00:00Z","timestamp":1737936000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["18DZ2270700"],"award-info":[{"award-number":["18DZ2270700"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"name":"111 plan","award":["BP0719010"],"award-info":[{"award-number":["BP0719010"]}]},{"name":"State Key Laboratory of UHD Video and Audio Production and Presentation"},{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"crossref","award":["2022ZD0160702"],"award-info":[{"award-number":["2022ZD0160702"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-024-01390-4","type":"journal-article","created":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T03:51:14Z","timestamp":1737949874000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":29,"title":["Towards evaluating and building versatile large language models for medicine"],"prefix":"10.1038","volume":"8","author":[{"given":"Chaoyi","family":"Wu","sequence":"first","affiliation":[]},{"given":"Pengcheng","family":"Qiu","sequence":"additional","affiliation":[]},{"given":"Jinxin","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Hongfei","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Na","family":"Li","sequence":"additional","affiliation":[]},{"given":"Ya","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yanfeng","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Weidi","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,27]]},"reference":[{"key":"1390_CR1","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal, K. et al. Large language models encode clinical knowledge. Nature 620, 172\u2013180 (2023).","journal-title":"Nature"},{"key":"1390_CR2","unstructured":"Singhal, K. et al. Towards expert-level medical question answering with large language models. arXiv preprint arXiv:2305.09617 (2023)."},{"key":"1390_CR3","doi-asserted-by":"publisher","first-page":"1134","DOI":"10.1038\/s41591-024-02855-5","volume":"30","author":"D Van Veen","year":"2024","unstructured":"Van Veen, D. et al. Adapted large language models can outperform medical experts in clinical text summarization. Nat. Med. 30, 1134\u20131142 (2024).","journal-title":"Nat. Med."},{"key":"1390_CR4","doi-asserted-by":"publisher","first-page":"AIdbp2300040","DOI":"10.1056\/AIdbp2300040","volume":"1","author":"A Soroush","year":"2024","unstructured":"Soroush, A. et al. Large language models are poor medical coders\u2014benchmarking of medical code querying. NEJM AI 1, AIdbp2300040 (2024).","journal-title":"NEJM AI"},{"key":"1390_CR5","unstructured":"Hager, P. et al. Evaluation and mitigation of the limitations of large language models in clinical decision-making. Nature Medicine 1\u201310 (2024)."},{"key":"1390_CR6","doi-asserted-by":"crossref","unstructured":"Fleming, S. L. et al. Medalign: A clinician-generated dataset for instruction following with electronic medical records. In Proceedings of the AAAI Conference on Artificial Intelligence, 38, 22021\u201322030 (2024).","DOI":"10.1609\/aaai.v38i20.30205"},{"key":"1390_CR7","doi-asserted-by":"crossref","unstructured":"Wu, C. et al. Pmc-llama: toward building open-source language models for medicine. Journal of the American Medical Informatics Association ocae045 (2024).","DOI":"10.1093\/jamia\/ocae045"},{"key":"1390_CR8","unstructured":"Chen, Z. et al. Meditron-70b: Scaling medical pretraining for large language models. arXiv preprint arXiv:2311.16079 (2023)."},{"key":"1390_CR9","unstructured":"Jiang, A. Q. et al. Mistral 7b. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"1390_CR10","unstructured":"Cai, Z. et al. Internlm2 technical report. arXiv preprint arXiv:2403.17297 (2024)."},{"key":"1390_CR11","unstructured":"Touvron, H. et al. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"1390_CR12","unstructured":"Yang, A. et al. Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)."},{"key":"1390_CR13","unstructured":"Yang, A. et al. Baichuan 2: Open large-scale language models. arXiv preprint arXiv:2309.10305 (2023)."},{"key":"1390_CR14","unstructured":"Christophe, C., Kanithi, P. K., Raha, T., Khan, S. & Pimentel, M. A. Med42-v2: A suite of clinical llms. arXiv preprint arXiv:2408.06142 (2024)."},{"key":"1390_CR15","unstructured":"Kasai, J., Kasai, Y., Sakaguchi, K., Yamada, Y. & Radev, D. Evaluating GPT-4 and ChatGPT on Japanese medical licensing examinations (2023)."},{"key":"1390_CR16","unstructured":"Anthropic Team. Introducing the next generation of claudehttps:\/\/www.anthropic.com\/news\/claude-3-family (2024). Accessed on March 4, 2024."},{"key":"1390_CR17","doi-asserted-by":"crossref","unstructured":"Wang, Y. et al. Super-naturalinstructions: Generalization via declarative instructions on 1600+ nlp tasks. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (pp. 5085-5109).","DOI":"10.18653\/v1\/2022.emnlp-main.340"},{"key":"1390_CR18","unstructured":"Jin, T. et al. The cost of down-scaling language models: Fact recall deteriorates before in-context learning. arXiv preprint arXiv:2310.04680 (2023)."},{"key":"1390_CR19","doi-asserted-by":"crossref","unstructured":"Qiu, P. et al. Towards building multilingual language model for medicine. Nature Communications 15.1 (2024): 8384.","DOI":"10.1038\/s41467-024-52417-z"},{"key":"1390_CR20","unstructured":"Pal, A., Minervini, P., Motzfeldt, A. G. & Alex, B. openlifescienceai\/open_medical_llm_leaderboardhttps:\/\/huggingface.co\/spaces\/openlifescienceai\/open_medical_llm_leaderboard (2024). Accessed on November 15, 2024."},{"key":"1390_CR21","doi-asserted-by":"crossref","unstructured":"Wang, Y. et al. Super-naturalinstructions: Generalization via declarative instructions on 1600+ nlp tasks. In 2022 Conference on Empirical Methods in Natural Language Processing, EMNLP 2022 (2022).","DOI":"10.18653\/v1\/2022.emnlp-main.340"},{"key":"1390_CR22","unstructured":"Longpre, S. et al. The flan collection: Designing data and methods for effective instruction tuning. In International Conference on Machine Learning (pp. 22631-22648). PMLR."},{"key":"1390_CR23","unstructured":"Le Scao, T. et al. Bloom: A 176b-parameter open-access multilingual language model (2023)."},{"key":"1390_CR24","doi-asserted-by":"crossref","unstructured":"Lai, V. D. et al. Okapi: Instruction-tuned large language models in multiple languages with reinforcement learning from human feedback. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations (pp. 318\u2013327).","DOI":"10.18653\/v1\/2023.emnlp-demo.28"},{"key":"1390_CR25","doi-asserted-by":"crossref","unstructured":"Lu, Y., Zhu, W., Li, L., Qiao, Y. & Yuan, F. Llamax: Scaling linguistic horizons of llm by enhancing translation capabilities beyond 100 languages. arXiv preprint arXiv:2407.05975 (2024).","DOI":"10.18653\/v1\/2024.findings-emnlp.631"},{"key":"1390_CR26","unstructured":"Nguyen, T. et al. Culturax: A cleaned, enormous, and multilingual dataset for large language models in 167 languages. arXiv preprint arXiv:2309.09400 (2023)."},{"key":"1390_CR27","unstructured":"Crawl, C. Common crawl maintains a free, open repository of web crawl data that can be used by anyone.https:\/\/commoncrawl.org\/ (Accessed on Apr. 2024)."},{"key":"1390_CR28","unstructured":"Tom, K. et al. Findings of the 2023 conference on machine translation (wmt23): Llms are here but not quite there yet. In WMT23-Eighth Conference on Machine Translation, 198\u2013216 (2023)."},{"key":"1390_CR29","doi-asserted-by":"crossref","unstructured":"Ahuja, K. et al. Mega: Multilingual evaluation of generative ai. In The 2023 Conference on Empirical Methods in Natural Language Processing.","DOI":"10.18653\/v1\/2023.emnlp-main.258"},{"key":"1390_CR30","first-page":"5484","volume":"36","author":"W Zhang","year":"2023","unstructured":"Zhang, W., Aljunied, M., Gao, C., Chia, Y. K. & Bing, L. M3exam: A multilingual, multimodal, multilevel benchmark for examining large language models. Adv. Neural Inf. Process. Syst. 36, 5484\u20135505 (2023).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1390_CR31","doi-asserted-by":"crossref","unstructured":"Labrak, Y. et al. Biomistral: A collection of open-source pretrained large language models for medical domains. arXiv preprint arXiv:2402.10373 (2024).","DOI":"10.18653\/v1\/2024.findings-acl.348"},{"key":"1390_CR32","unstructured":"Wang, X. et al. Apollo: Lightweight multilingual medical llms towards democratizing medical ai to 6b people. arXiv preprint arXiv:2403.03640 (2024)."},{"key":"1390_CR33","unstructured":"Zhou, C. et al. Lima: Less is more for alignment. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"1390_CR34","doi-asserted-by":"crossref","unstructured":"Li, Y., Dong, B., Lin, C. & Guerin, F. Compressing context to enhance inference efficiency of large language models. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing (pp. 6342\u20136353).","DOI":"10.18653\/v1\/2023.emnlp-main.391"},{"key":"1390_CR35","unstructured":"Lu, K. et al. # instag: Instruction tagging for analyzing supervised fine-tuning of large language models. In The Twelfth International Conference on Learning Representations (2023)."},{"key":"1390_CR36","unstructured":"Wang, Y. et al. Self-instruct: Aligning language models with self-generated instructions. In The 61st Annual Meeting Of The Association For Computational Linguistics."},{"key":"1390_CR37","unstructured":"Chen, T., Xu, B., Zhang, C. & Guestrin, C. Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 (2016)."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01390-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01390-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01390-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T15:35:05Z","timestamp":1737992105000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01390-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,27]]},"references-count":37,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1390"],"URL":"https:\/\/doi.org\/10.1038\/s41746-024-01390-4","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,27]]},"assertion":[{"value":"24 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"58"}}