{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T22:58:53Z","timestamp":1776898733603,"version":"3.51.2"},"reference-count":86,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T00:00:00Z","timestamp":1768348800000},"content-version":"vor","delay-in-days":36,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"National Science Foundation, United States","award":["DMS-2054346"],"award-info":[{"award-number":["DMS-2054346"]}]},{"name":"National Science Foundation, United States","award":["DMS-2054346"],"award-info":[{"award-number":["DMS-2054346"]}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01DA051464"],"award-info":[{"award-number":["R01DA051464"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01DA051464"],"award-info":[{"award-number":["R01DA051464"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-025-02219-4","type":"journal-article","created":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T12:48:08Z","timestamp":1765284488000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["MoMA: a mixture-of-multimodal-agents architecture for enhancing clinical prediction modelling"],"prefix":"10.1038","volume":"9","author":[{"given":"Jifan","family":"Gao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mahmudur","family":"Rahman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John","family":"Caskey","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Madeline","family":"Oguss","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ann","family":"O\u2019Rourke","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Randall","family":"Brown","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anne","family":"Stey","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anoop","family":"Mayampurath","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthew M.","family":"Churpek","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanhua","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Majid","family":"Afshar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,9]]},"reference":[{"key":"2219_CR1","doi-asserted-by":"publisher","first-page":"133583","DOI":"10.1109\/ACCESS.2019.2941419","volume":"7","author":"Q Cai","year":"2019","unstructured":"Cai, Q., Wang, H., Li, Z. & Liu, X. A survey on multimodal data-driven smart healthcare systems: approaches and applications. IEEE Access 7, 133583\u2013133599 (2019).","journal-title":"IEEE Access"},{"key":"2219_CR2","doi-asserted-by":"publisher","first-page":"2349","DOI":"10.1038\/s41591-024-03019-1","volume":"30.8","author":"B Rohaut","year":"2024","unstructured":"Rohaut, B. et al. Multimodal assessment improves neuroprognosis performance in clinically unresponsive critical-care patients with brain injury. Nat. Med 30.8, 2349\u20132355 (2024).","journal-title":"Nat. Med"},{"key":"2219_CR3","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1038\/s41746-022-00689-4","volume":"5","author":"LR Soenksen","year":"2022","unstructured":"Soenksen, L. R. et al. Integrated multimodal artificial intelligence framework for healthcare applications. NPJ Digit. Med 5, 149 (2022).","journal-title":"NPJ Digit. Med"},{"key":"2219_CR4","doi-asserted-by":"crossref","unstructured":"Winston, C. et al. Multimodal clinical prediction with unified prompts and pretrained large-language models. In 2024 IEEE 12th International Conference on Healthcare Informatics (ICHI), 679\u2013683 (IEEE, 2024).","DOI":"10.1109\/ICHI61247.2024.00108"},{"key":"2219_CR5","doi-asserted-by":"publisher","first-page":"1291","DOI":"10.1093\/jamia\/ocae071","volume":"31","author":"J Gao","year":"2024","unstructured":"Gao, J. et al. Automated stratification of trauma injury severity across multiple body regions using multi-modal, multi-class machine learning models. J. Am. Med. Inform. Assoc. 31, 1291\u20131302 (2024).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"2219_CR6","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1038\/s41746-022-00712-8","volume":"5","author":"A Kline","year":"2022","unstructured":"Kline, A. et al. Multimodal machine learning in precision health: a scoping review. NPJ Digit. Med 5, 171 (2022).","journal-title":"NPJ Digit. Med"},{"key":"2219_CR7","doi-asserted-by":"publisher","first-page":"1773","DOI":"10.1038\/s41591-022-01981-2","volume":"28","author":"JN Acosta","year":"2022","unstructured":"Acosta, J. N., Falcone, G. J., Rajpurkar, P. & Topol, E. J. Multimodal biomedical AI. Nat. Med. 28, 1773\u20131784 (2022).","journal-title":"Nat. Med."},{"key":"2219_CR8","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-020-00341-z","volume":"3","author":"S-C Huang","year":"2020","unstructured":"Huang, S.-C., Pareek, A., Seyyedi, S., Banerjee, I. & Lungren, M. P. Fusion of medical imaging and electronic health records using deep learning: a systematic review and implementation guidelines. NPJ Digit. Med. 3, 136 (2020).","journal-title":"NPJ Digit. Med."},{"key":"2219_CR9","unstructured":"Li, J., Li, D., Xiong, C. & Hoi, S. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning, 12888\u201312900 (PMLR, 2022)."},{"key":"2219_CR10","doi-asserted-by":"crossref","unstructured":"Han X. et al. Fusemoe: Mixture-of-experts transformers for fleximodal fusion. In Advances in Neural Information Processing Systems 37 67850\u201367900 (2024).","DOI":"10.52202\/079017-2167"},{"key":"2219_CR11","unstructured":"Radford, A. et al. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning, 8748\u20138763 (2021)."},{"key":"2219_CR12","doi-asserted-by":"crossref","unstructured":"Bannur, S. et al. Learning to exploit temporal structure for biomedical vision-language processing. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 15016\u201315027 (2023).","DOI":"10.1109\/CVPR52729.2023.01442"},{"key":"2219_CR13","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1038\/s42256-023-00663-z","volume":"5","author":"N Cohen Kalafut","year":"2023","unstructured":"Cohen Kalafut, N., Huang, X. & Wang, D. Joint variational autoencoders for multimodal imputation and embedding. Nat. Mach. Intell. 5, 631\u2013642 (2023).","journal-title":"Nat. Mach. Intell."},{"key":"2219_CR14","doi-asserted-by":"crossref","unstructured":"Guarrasi, V. et al. A systematic review of intermediate fusion in multimodal deep learning for biomedical applications. Image and Vision Computing 105509 (2025).","DOI":"10.1016\/j.imavis.2025.105509"},{"key":"2219_CR15","doi-asserted-by":"publisher","first-page":"bbab569","DOI":"10.1093\/bib\/bbab569","volume":"23","author":"SR Stahlschmidt","year":"2022","unstructured":"Stahlschmidt, S. R., Ulfenborg, B. & Synnergren, J. Multimodal deep learning for biomedical data fusion: a review. Brief. Bioinforma. 23, bbab569 (2022).","journal-title":"Brief. Bioinforma."},{"key":"2219_CR16","unstructured":"Hayat, N., Geras, K. J. & Shamout, F. E. Medfuse: multi-modal fusion with clinical time-series data and chest x-ray images. In Machine Learning for Healthcare Conference, 479\u2013503 (PMLR, 2022)."},{"key":"2219_CR17","unstructured":"Li, J., Li, D., Savarese, S. & Hoi, S. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning, 19730\u201319742 (PMLR, 2023)."},{"key":"2219_CR18","unstructured":"Alayrac, J.-B. et al. Flamingo: a visual language model for few-shot learning. In Advances in Neural Information Processing Systems. 35, 23716\u201323736 (2022)."},{"key":"2219_CR19","unstructured":"Peng, Z. et al. Kosmos-2: Grounding multimodal large language models to the world. In International Conference on Learning Representations (2024)."},{"key":"2219_CR20","unstructured":"Driess, D. et al. Palm-e: an embodied multimodal language model. In International Conference on Machine Learning (2023)."},{"key":"2219_CR21","doi-asserted-by":"crossref","unstructured":"Girdhar, R. et al. Imagebind: One embedding space to bind them all. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition 15180\u201315190 (2023).","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"2219_CR22","doi-asserted-by":"crossref","unstructured":"Han, J. et al. Onellm: One framework to align all modalities with language. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition 26584\u201326595 (2024).","DOI":"10.1109\/CVPR52733.2024.02510"},{"key":"2219_CR23","unstructured":"Li, C. et al. Llava-med: Training a large language-and-vision assistant for biomedicine in one day. In Advances in Neural Information Processing Systems. 36 (2023), pp. 28541\u201328564."},{"key":"2219_CR24","doi-asserted-by":"crossref","unstructured":"Nath, V. et al. Vila-m3: enhancing vision-language models with medical expert knowledge. In Proceedings of the Computer Vision and Pattern Recognition Conference, 14788\u201314798 (2025).","DOI":"10.1109\/CVPR52734.2025.01378"},{"key":"2219_CR25","unstructured":"He, S. et al. Gsco: Towards generalizable ai in medicine via generalist-specialist collaboration. arXiv preprint arXiv:2404.15127 (2024)."},{"key":"2219_CR26","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1038\/s41568-021-00408-3","volume":"22","author":"KM Boehm","year":"2022","unstructured":"Boehm, K. M., Khosravi, P., Vanguri, R., Gao, J. & Shah, S. P. Harnessing multimodal data integration to advance precision oncology. Nat. Rev. Cancer 22, 114\u2013126 (2022).","journal-title":"Nat. Rev. Cancer"},{"key":"2219_CR27","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1038\/s41746-025-01508-2","volume":"8","author":"JS Chang","year":"2025","unstructured":"Chang, J. S. et al. Continuous multimodal data supply chain and expandable clinical decision support for oncology. NPJ Digit Med 8, 128 (2025).","journal-title":"NPJ Digit Med"},{"key":"2219_CR28","unstructured":"Zhao, W. X. et al. A survey of large language models. arXiv preprint arXiv:2303.18223 (2023)."},{"key":"2219_CR29","first-page":"1","volume":"2022","author":"J Wei","year":"2022","unstructured":"Wei, J. et al. Emergent abilities of large language models. Trans. Mach. Learn. Res 2022, 1\u201330 (2022).","journal-title":"Trans. Mach. Learn. Res"},{"key":"2219_CR30","doi-asserted-by":"publisher","first-page":"1930","DOI":"10.1038\/s41591-023-02448-8","volume":"29","author":"AJ Thirunavukarasu","year":"2023","unstructured":"Thirunavukarasu, A. J. et al. Large language models in medicine. Nat. Med. 29, 1930\u20131940 (2023).","journal-title":"Nat. Med."},{"key":"2219_CR31","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1038\/s41746-023-00989-3","volume":"7","author":"H Wang","year":"2024","unstructured":"Wang, H., Gao, C., Dantona, C., Hull, B. & Sun, J. Drg-llama: tuning llama model to predict diagnosis-related group for hospitalized patients. NPJ Digit. Med 7, 16 (2024).","journal-title":"NPJ Digit. Med"},{"key":"2219_CR32","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-023-00952-2","volume":"6","author":"F Liu","year":"2023","unstructured":"Liu, F. et al. A medical multimodal large language model for future pandemics. NPJ Digit. Med. 6, 226 (2023).","journal-title":"NPJ Digit. Med."},{"key":"2219_CR33","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1038\/s41746-024-01366-4","volume":"7.1","author":"B Gu","year":"2024","unstructured":"Gu, B., Desai, R. J., Lin, K. J. & Yang, J. Probabilistic medical predictions of large language models. NPJ Digit. Med 7.1, 367 (2024).","journal-title":"NPJ Digit. Med"},{"key":"2219_CR34","doi-asserted-by":"crossref","unstructured":"Lee, S., Youn, J., Kim, M. & Yoon, S. H. Cxr-llava: Multimodal large language model for interpreting chest x-ray images. European Radiology, pp. 1\u201313 (2025).","DOI":"10.1007\/s00330-024-11339-6"},{"key":"2219_CR35","unstructured":"Zhu, Y. et al. Prompting large language models for zero-shot clinical prediction with structured longitudinal electronic health record data. arXiv preprint arXiv:2402.01713 (2024)."},{"key":"2219_CR36","doi-asserted-by":"crossref","unstructured":"Gao, Y. et al. When raw data prevails: are large language model embeddings effective in numerical data representation for medical machine learning applications? In Findings of the Association for Computational Linguistics: EMNLP, 5414\u20135428 (2024).","DOI":"10.18653\/v1\/2024.findings-emnlp.311"},{"key":"2219_CR37","doi-asserted-by":"publisher","first-page":"516","DOI":"10.1111\/lang.12155","volume":"66","author":"G Lupyan","year":"2016","unstructured":"Lupyan, G. The centrality of language in human cognition. Lang. Learn. 66, 516\u2013553 (2016).","journal-title":"Lang. Learn."},{"key":"2219_CR38","unstructured":"Wang, J., Wang, J., Athiwaratkun, B., Zhang, C. & Zou, J. Mixture-of-agents enhances large language model capabilities. In International Conference on Learning Representations (2025)."},{"key":"2219_CR39","doi-asserted-by":"publisher","first-page":"e333","DOI":"10.1016\/S2589-7500(23)00083-3","volume":"5","author":"H Li","year":"2023","unstructured":"Li, H. et al. Ethics of large language models in medicine and medical research. Lancet Digit. Health 5, e333\u2013e335 (2023).","journal-title":"Lancet Digit. Health"},{"key":"2219_CR40","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1038\/s41746-024-01157-x","volume":"7","author":"J Haltaufderheide","year":"2024","unstructured":"Haltaufderheide, J. & Ranisch, R. The ethics of chatGPT in medicine and healthcare: a systematic review on large language models (LLMS). NPJ Digit Med 7, 183 (2024).","journal-title":"NPJ Digit Med"},{"key":"2219_CR41","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1056\/NEJMp2205162","volume":"387","author":"JP Herrera-Escobar","year":"2022","unstructured":"Herrera-Escobar, J. P. & Schneider, J. C. From survival to survivorship-framing traumatic injury as a chronic condition. N. Engl. J. Med. 387, 581 (2022).","journal-title":"N. Engl. J. Med."},{"key":"2219_CR42","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s00068-011-0168-4","volume":"38","author":"R Lefering","year":"2012","unstructured":"Lefering, R. et al. Epidemiology of in-hospital trauma deaths. Eur. J. trauma Emerg. Surg. 38, 3\u20139 (2012).","journal-title":"Eur. J. trauma Emerg. Surg."},{"key":"2219_CR43","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1097\/MEJ.0000000000000397","volume":"25","author":"A Granstr\u00f6m","year":"2018","unstructured":"Granstr\u00f6m, A., Str\u00f6mmer, L., Schandl, A. & \u00d6stlund, A. A criteria-directed protocol for in-hospital triage of trauma patients. Eur. J. Emerg. Med. 25, 25\u201331 (2018).","journal-title":"Eur. J. Emerg. Med."},{"key":"2219_CR44","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1016\/j.jamcollsurg.2009.07.001","volume":"209","author":"S Shafi","year":"2009","unstructured":"Shafi, S. et al. The trauma quality improvement program of the American College of Surgeons Committee on Trauma. J. Am. Coll. Surg. 209, 521\u2013530e1 (2009).","journal-title":"J. Am. Coll. Surg."},{"key":"2219_CR45","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/j.injury.2015.07.003","volume":"47","author":"CS Palmer","year":"2016","unstructured":"Palmer, C. S., Gabbe, B. J. & Cameron, P. A. Defining major trauma using the 2008 abbreviated injury scale. Injury 47, 109\u2013115 (2016).","journal-title":"Injury"},{"key":"2219_CR46","doi-asserted-by":"crossref","unstructured":"Jinyi, W., Zhang, Y., Wang, K. & Peng, P. Global, regional, and national mortality of tuberculosis attributable to alcohol and tobacco from 1990 to 2019: A modelling study based on the global burden of disease study 2019. J. Global Health14 (2024). p. 04023.","DOI":"10.7189\/jogh.14.04023"},{"key":"2219_CR47","unstructured":"Coulton, S. Alcohol misuse. BMJ Clin. Evid., p. 1017 (2011)."},{"key":"2219_CR48","doi-asserted-by":"publisher","first-page":"690","DOI":"10.7326\/M16-0317","volume":"165","author":"J McNeely","year":"2016","unstructured":"McNeely, J. et al. Performance of the tobacco, alcohol, prescription medication, and other substance use (taps) tool for substance use screening in primary care patients. Ann. Intern. Med. 165, 690\u2013699 (2016).","journal-title":"Ann. Intern. Med."},{"key":"2219_CR49","doi-asserted-by":"crossref","unstructured":"Alsentzer, E. et al. Publicly available clinical bert embeddings. In Proceedings of the 2nd clinical natural language processing workshop, pp. 72\u201378 (2019).","DOI":"10.18653\/v1\/W19-1909"},{"key":"2219_CR50","doi-asserted-by":"publisher","first-page":"e426","DOI":"10.1016\/S2589-7500(22)00041-3","volume":"4","author":"M Afshar","year":"2022","unstructured":"Afshar, M. et al. Development and multimodal validation of a substance misuse algorithm for referral to treatment using artificial intelligence (smart-ai): a retrospective deep learning study. Lancet Digit. Health 4, e426\u2013e435 (2022).","journal-title":"Lancet Digit. Health"},{"key":"2219_CR51","doi-asserted-by":"crossref","unstructured":"Guo, D. & Terzopoulos, D. Prompting medical large vision-language models to diagnose pathologies by visual question answering. In Machine Learning for Biomedical Imaging (MELBA) 3, 59\u201371 (2025).","DOI":"10.59275\/j.melba.2025-1a8b"},{"key":"2219_CR52","doi-asserted-by":"crossref","unstructured":"Zhu, K. et al. Guiding medical vision-language models with explicit visual prompts: Framework design and comprehensive exploration of prompt variations. In Proc. 2025 Conf. of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT 2025), 11726\u201311739 (2025).","DOI":"10.18653\/v1\/2025.naacl-long.587"},{"key":"2219_CR53","doi-asserted-by":"crossref","unstructured":"Yang, X. et al. Medical large vision language models with multi-image visual ability. In International Conference on Medical Image Computing and Computer-Assisted Intervention. 402\u2013412 (2025).","DOI":"10.1007\/978-3-032-04971-1_38"},{"key":"2219_CR54","unstructured":"Jian, L. et al. Rethinking cross-attention for infrared and visible image fusion. arXiv preprint arXiv:2401.11675 (2024)."},{"key":"2219_CR55","doi-asserted-by":"crossref","unstructured":"Zheng, W. et al. Multimodal clinical trial outcome prediction with large language models. In Findings of the Association for Computational Linguistics: EMNLP 2025, 7503\u20137517 (2025).","DOI":"10.18653\/v1\/2025.findings-emnlp.396"},{"key":"2219_CR56","doi-asserted-by":"crossref","unstructured":"Gao, Y., Mahajan, D., Uzuner, \u00d6. & Yetisgen, M. Clinical natural language processing for secondary uses. 150:104596 (2024).","DOI":"10.1016\/j.jbi.2024.104596"},{"key":"2219_CR57","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1038\/s41592-019-0658-6","volume":"16","author":"W Ouyang","year":"2019","unstructured":"Ouyang, W. et al. Analysis of the Human Protein Atlas image classification competition. Nat. Methods 16, 1254\u20131261 (2019).","journal-title":"Nat. Methods"},{"key":"2219_CR58","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1093\/jamia\/ocad159","volume":"31","author":"T Bergquist","year":"2023","unstructured":"Bergquist, T. et al. Evaluation of crowdsourced mortality prediction models as a framework for assessing artificial intelligence in medicine. J. Am. Med. Inform. Assoc. 31, 35\u201344 (2023).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"2219_CR59","doi-asserted-by":"publisher","first-page":"e175","DOI":"10.1017\/cts.2023.549","volume":"7","author":"T Bergquist","year":"2023","unstructured":"Bergquist, T. et al. A framework for future national pediatric pandemic respiratory disease severity triage: the HHS pediatric COVID-19 data challenge. J. Clin. Transl. Sci. 7, e175 (2023).","journal-title":"J. Clin. Transl. Sci."},{"key":"2219_CR60","unstructured":"Team, Q. Qwen2 technical report. arXiv preprint arXiv:2407.10671 (2024)."},{"key":"2219_CR61","unstructured":"Team, Q. Qwen2.5: A party of foundation models. https:\/\/qwenlm.github.io\/blog\/qwen2.5\/ (2024)."},{"key":"2219_CR62","unstructured":"Jiang, B. et al. Multi-modal and multi-agent systems meet rationality: A survey. In ICML 2024 Workshop on LLMs and Cognition (2024)."},{"key":"2219_CR63","unstructured":"Du, Y., Li, S., Torralba, A., Tenenbaum, J. B. & Mordatch, I. Improving factuality and reasoning in language models through multiagent debate. In International Conference on Machine Learning (2023)."},{"key":"2219_CR64","doi-asserted-by":"crossref","unstructured":"Tang, X. et al. Medagents: Large language models as collaborators for zero-shot medical reasoning. In Findings of the Association for Computational Linguistics: ACL 2024, pp. 599\u2013621 (2024).","DOI":"10.18653\/v1\/2024.findings-acl.33"},{"key":"2219_CR65","unstructured":"Li, J., Zhang, Q., Yu, Y., Fu, Q. & Ye, D. More agents is all you need. Trans. Mach. Learn. Res. (2024)."},{"key":"2219_CR66","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-025-57426-0","volume":"16","author":"C-Y Li","year":"2025","unstructured":"Li, C.-Y. et al. Towards a holistic framework for multimodal LLM in 3D brain ct radiology report generation. Nat. Commun. 16, 2258 (2025).","journal-title":"Nat. Commun."},{"key":"2219_CR67","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-025-60014-x","volume":"16","author":"M Tran","year":"2025","unstructured":"Tran, M. et al. Generating dermatopathology reports from gigapixel whole slide images with histogpt. Nat. Commun. 16, 4886 (2025).","journal-title":"Nat. Commun."},{"key":"2219_CR68","doi-asserted-by":"crossref","unstructured":"Lu, Y.-C. et al. scchat: A large language model-powered co-pilot for contextualized single-cell RNA sequencing analysis. bioRxiv 2024.10.01.616063 (2024).","DOI":"10.1101\/2024.10.01.616063"},{"key":"2219_CR69","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01899-x","volume":"10","author":"AE Johnson","year":"2023","unstructured":"Johnson, A. E. et al. Mimic-iv, a freely accessible electronic health record dataset. Sci. Data 10, 1 (2023).","journal-title":"Sci. Data"},{"key":"2219_CR70","doi-asserted-by":"crossref","unstructured":"Johnson, A. E. et al. Mimic-cxr-jpg, a large publicly available database of labeled chest radiographs. arXiv preprint arXiv:1901.07042 (2019).","DOI":"10.1038\/s41597-019-0322-0"},{"key":"2219_CR71","first-page":"1","volume":"43","author":"L Huang","year":"2025","unstructured":"Huang, L. et al. A survey on hallucination in large language models: Principles, taxonomy, challenges, and open questions. ACM Trans. Inf. Syst. 43, 1\u201355 (2025).","journal-title":"ACM Trans. Inf. Syst."},{"key":"2219_CR72","unstructured":"Fourney, A. et al. Magentic-one: A generalist multi-agent system for solving complex tasks. arXiv preprint arXiv:2411.04468 (2024)."},{"key":"2219_CR73","doi-asserted-by":"crossref","unstructured":"Caffagni, D. et al. The revolution of multimodal large language models: a survey. In Findings of the Association for Computational Linguistics: ACL 2024, pp. 13590\u201313618 (2024).","DOI":"10.18653\/v1\/2024.findings-acl.807"},{"key":"2219_CR74","unstructured":"Ben Abacha, A., Hasan, S. A., Datla, V. V., Demner-Fushman, D. & M\u00fcller, H. Vqa-med: Overview of the medical visual question answering task at imageclef 2019. In Proceedings of CLEF (Conference and Labs of the Evaluation Forum) 2019 Working Notes (9-12 September 2019, 2019)."},{"key":"2219_CR75","doi-asserted-by":"crossref","unstructured":"Gallifant, J. et al. The tripod-llm reporting guideline for studies using large language models. Nat. Med. 31.1, pp. 60\u201369 (2025).","DOI":"10.1038\/s41591-024-03425-5"},{"key":"2219_CR76","unstructured":"Shazeer, N. et al. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. In International Conference on Learning Representations (2017)"},{"key":"2219_CR77","doi-asserted-by":"crossref","unstructured":"Muennighoff, N., Tazi, N., Magne, L. & Reimers, N. Mteb: Massive text embedding benchmark. In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics. 2023.","DOI":"10.18653\/v1\/2023.eacl-main.148"},{"key":"2219_CR78","doi-asserted-by":"crossref","unstructured":"Shi, W. et al. Ehragent: Code empowers large language models for few-shot complex tabular reasoning on electronic health records. In ICLR 2024 Workshop on Large Language Model (LLM) Agents (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.1245"},{"key":"2219_CR79","unstructured":"Dubey, A. et al. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"2219_CR80","unstructured":"Chen, C. et al. Clinicalbench: Can llms beat traditional ml models in clinical prediction? arXiv preprint arXiv:2411.06469 (2024)."},{"key":"2219_CR81","unstructured":"Jiang, A. Q. et al. Mistral 7b. https:\/\/arxiv.org\/abs\/2310.06825. 2310.06825 (2023)."},{"key":"2219_CR82","unstructured":"Liu, H., Li, C., Wu, Q. & Lee, Y. J. Visual instruction tuning. In Advances in Neural Information Processing Systems 36, 34892\u201334916 (2023)."},{"key":"2219_CR83","doi-asserted-by":"crossref","unstructured":"Xia, P. et al. Mmed-rag: Versatile multimodal rag system for medical vision language models. In International Conference on Learning Representations (2025).","DOI":"10.18653\/v1\/2024.emnlp-main.62"},{"key":"2219_CR84","doi-asserted-by":"crossref","unstructured":"Xia, P. et al. Rule: Reliable multimodal rag for factuality in medical vision language models. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, 1081\u20131093 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.62"},{"key":"2219_CR85","doi-asserted-by":"crossref","unstructured":"Thawkar, O. et al. Xraygpt: Chest radiographs summarization using medical vision-language models. In Proc. 23rd Workshop on Biomedical Natural Language Processing (BioNLP 2024) 440\u2013448 (2024).","DOI":"10.18653\/v1\/2024.bionlp-1.35"},{"key":"2219_CR86","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1136\/jamia.2009.001560","volume":"17","author":"GK Savova","year":"2010","unstructured":"Savova, G. K. et al. Mayo clinical text analysis and knowledge extraction system (CTakes): architecture, component evaluation and applications. J. Am. Med. Inform. Assoc. 17, 507\u2013513 (2010).","journal-title":"J. Am. Med. Inform. Assoc."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02219-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02219-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02219-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T14:23:15Z","timestamp":1768400595000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-025-02219-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,9]]},"references-count":86,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2026,12]]}},"alternative-id":["2219"],"URL":"https:\/\/doi.org\/10.1038\/s41746-025-02219-4","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,9]]},"assertion":[{"value":"5 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"46"}}