{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T16:07:36Z","timestamp":1777392456196,"version":"3.51.4"},"reference-count":86,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T00:00:00Z","timestamp":1738800000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T00:00:00Z","timestamp":1738800000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100000287","name":"Royal Academy of Engineering","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000287","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000266","name":"RCUK | Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["npj Digit. Med."],"DOI":"10.1038\/s41746-024-01339-7","type":"journal-article","created":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T06:41:56Z","timestamp":1738824116000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["A multimodal multidomain multilingual medical foundation model for zero shot clinical diagnosis"],"prefix":"10.1038","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7715-5228","authenticated-orcid":false,"given":"Fenglin","family":"Liu","sequence":"first","affiliation":[]},{"given":"Zheng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Qingyu","family":"Yin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0081-4106","authenticated-orcid":false,"given":"Jinfa","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1118-9710","authenticated-orcid":false,"given":"Jiebo","family":"Luo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7006-1947","authenticated-orcid":false,"given":"Anshul","family":"Thakur","sequence":"additional","affiliation":[]},{"given":"Kim","family":"Branson","sequence":"additional","affiliation":[]},{"given":"Patrick","family":"Schwab","sequence":"additional","affiliation":[]},{"given":"Bing","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Xian","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4516-9729","authenticated-orcid":false,"given":"Yefeng","family":"Zheng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9848-8555","authenticated-orcid":false,"given":"David A.","family":"Clifton","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,6]]},"reference":[{"key":"1339_CR1","doi-asserted-by":"publisher","first-page":"1459","DOI":"10.1002\/emp2.12297","volume":"1","author":"M Carlile","year":"2020","unstructured":"Carlile, M. et al. Deployment of artificial intelligence for radiographic diagnosis of Covid-19 pneumonia in the emergency department. J. Am. Coll. Emerg. Phys. Open 1, 1459\u20131464 (2020).","journal-title":"J. Am. Coll. Emerg. Phys. Open"},{"key":"1339_CR2","doi-asserted-by":"crossref","unstructured":"Wang, X., Peng, Y., Lu, L., Lu, Z. & Summers, R. M. Tienet: Text-image embedding network for common thorax disease classification and reporting in chest x-rays. In IEEE Conference on Computer Vision and Pattern Recognition (IEEE, 2018).","DOI":"10.1109\/CVPR.2018.00943"},{"key":"1339_CR3","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W. & Zou, Y. Exploring and distilling posterior and prior knowledge for radiology report generation. In IEEE Conference on Computer Vision and Pattern Recognition (IEEE, 2021).","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"1339_CR4","doi-asserted-by":"crossref","unstructured":"Jing, B., Xie, P. & Xing, E. P. On the automatic generation of medical imaging reports. In Annual Meeting of the Association for Computational Linguistics (Association for Computational Linguistics, 2018).","DOI":"10.18653\/v1\/P18-1240"},{"key":"1339_CR5","first-page":"3\u20139","volume":"81","author":"A Brady","year":"2012","unstructured":"Brady, A., Laoide, R. \u00d3., Mccarthy, P. & Mcdermott, R. Discrepancy and error in radiology: concepts, causes and consequences. Ulst. Med. J. 81, 3\u20139 (2012).","journal-title":"Ulst. Med. J."},{"key":"1339_CR6","unstructured":"Liu, F. et al. Auto-encoding knowledge graph for unsupervised medical report generation. In Annual Conference on Neural Information Processing Systems (NeurIPS, 2021)."},{"key":"1339_CR7","doi-asserted-by":"publisher","first-page":"753","DOI":"10.7326\/M16-0961","volume":"165","author":"C Sinsky","year":"2016","unstructured":"Sinsky, C. et al. Allocation of physician time in ambulatory practice: a time and motion study in 4 specialties. Ann. Intern. Med. 165, 753\u2013760 (2016).","journal-title":"Ann. Intern. Med."},{"key":"1339_CR8","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1111\/j.1525-1497.2006.00307.x","volume":"21","author":"M Weiner","year":"2006","unstructured":"Weiner, M. & Biondich, P. The influence of information technology on patient-physician relationships. J. Gen. Intern. Med. 21, 35\u201339 (2006).","journal-title":"J. Gen. Intern. Med."},{"key":"1339_CR9","doi-asserted-by":"crossref","unstructured":"Tawfik, D. S. et al. Physician burnout, well-being, and work unit safety grades in relationship to reported medical errors. In Mayo Clinic Proceedings. 1571\u20131580 (Elsevier, 2018).","DOI":"10.1016\/j.mayocp.2018.05.014"},{"key":"1339_CR10","doi-asserted-by":"publisher","first-page":"516","DOI":"10.1111\/joim.12752","volume":"283","author":"CP West","year":"2018","unstructured":"West, C. P., Dyrbye, L. N. & Shanafelt, T. D. Physician burnout: contributors, consequences and solutions. J. Intern. Med. 283, 516\u2013529 (2018).","journal-title":"J. Intern. Med."},{"key":"1339_CR11","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1038\/s42256-021-00307-0","volume":"3","author":"M Roberts","year":"2021","unstructured":"Roberts, M. et al. Common pitfalls and recommendations for using machine learning to detect and prognosticate for Covid-19 using chest radiographs and CT scans. Nat. Mach. Intell. 3, 199\u2013217 (2021).","journal-title":"Nat. Mach. Intell."},{"key":"1339_CR12","doi-asserted-by":"publisher","first-page":"e210011","DOI":"10.1148\/ryai.2021210011","volume":"3","author":"D Driggs","year":"2021","unstructured":"Driggs, D. et al. Machine learning for covid-19 diagnosis and prognostication: lessons for amplifying the signal while reducing the noise. Radiol. Artif. Intell. 3, e210011 (2021).","journal-title":"Radiol. Artif. Intell."},{"key":"1339_CR13","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1109\/JPROC.2021.3054390","volume":"109","author":"SK Zhou","year":"2021","unstructured":"Zhou, S. K. et al. A review of deep learning in medical imaging: Imaging traits, technology trends, case studies with progress highlights, and future promises. Proc. IEEE 109, 820\u2013838 (2021).","journal-title":"Proc. IEEE"},{"key":"1339_CR14","doi-asserted-by":"crossref","unstructured":"Jing, B., Wang, Z. & Xing, E. P. Show, describe and conclude: On exploiting the structure information of chest x-ray reports. In Annual Meeting of the Association for Computational Linguistics (Association for Computational Linguistics, 2019).","DOI":"10.18653\/v1\/P19-1657"},{"key":"1339_CR15","doi-asserted-by":"crossref","unstructured":"Wang, J., Bhalerao, A. & He, Y. Cross-modal prototype driven network for radiology report generation. In European Conference on Computer Vision (IEEE, 2022).","DOI":"10.1007\/978-3-031-19833-5_33"},{"key":"1339_CR16","doi-asserted-by":"publisher","first-page":"102589","DOI":"10.1016\/j.scs.2020.102589","volume":"65","author":"S Bhattacharya","year":"2021","unstructured":"Bhattacharya, S. et al. Deep learning and medical image processing for coronavirus (Covid-19) pandemic: a survey. Sustain. Cities Soc. 65, 102589 (2021).","journal-title":"Sustain. Cities Soc."},{"key":"1339_CR17","doi-asserted-by":"publisher","first-page":"1409","DOI":"10.1007\/s10462-021-09985-z","volume":"55","author":"TA Soomro","year":"2022","unstructured":"Soomro, T. A. et al. Artificial intelligence (AI) for medical imaging to combat coronavirus disease (Covid-19): a detailed review with direction for future research. Artif. Intell. Rev. 55, 1409\u20131439 (2022).","journal-title":"Artif. Intell. Rev."},{"key":"1339_CR18","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1038\/s41746-023-00952-2","volume":"6","author":"F Liu","year":"2023","unstructured":"Liu, F. et al. A medical multimodal large language model for future pandemics. NPJ Digit. Med. 6, 226 (2023).","journal-title":"NPJ Digit. Med."},{"key":"1339_CR19","first-page":"64","volume":"5","author":"RM Galimova","year":"2019","unstructured":"Galimova, R. M., Buzaev, I. V., Ramilevich, K. A., Yuldybaev, L. K. & Shaykhulova, A. F. Artificial intelligence-developments in medicine in the last two years. Chronic Dis. Transl. Med. 5, 64\u201368 (2019).","journal-title":"Chronic Dis. Transl. Med."},{"key":"1339_CR20","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1001\/jamapediatrics.2022.3828","volume":"177","author":"A Chen","year":"2023","unstructured":"Chen, A. et al. Inclusion of non\u2013English-speaking participants in pediatric health research: a review. JAMA Pediatr. 177, 81\u201388 (2023).","journal-title":"JAMA Pediatr."},{"key":"1339_CR21","doi-asserted-by":"crossref","unstructured":"Budennyy, S. et al. Eco2AI: carbon emissions tracking of machine learning models as the first step towards sustainable AI. In Doklady Mathematics. 1\u201311 (Springer, 2023).","DOI":"10.1134\/S1064562422060230"},{"key":"1339_CR22","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","volume":"6","author":"AEW Johnson","year":"2019","unstructured":"Johnson, A. E. W. et al. MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports. Sci. Data 6, 317 (2019).","journal-title":"Sci. Data"},{"key":"1339_CR23","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1109\/TBDATA.2020.3035935","volume":"7","author":"Y Peng","year":"2021","unstructured":"Peng, Y. et al. COVID-19-CT-CXR: a freely accessible and weakly labeled chest x-ray and CT image collection on COVID-19 from biomedical literature. IEEE Trans. Big Data 7, 3\u201312 (2021).","journal-title":"IEEE Trans. Big Data"},{"key":"1339_CR24","first-page":"339","volume":"5","author":"Y Wu","year":"2016","unstructured":"Wu, Y. et al. Google\u2019s multilingual neural machine translation system: Enabling zero-shot translation. Trans. Assoc Comput Linguist 5, 339\u2013351 (2016).","journal-title":"Trans. Assoc Comput Linguist"},{"key":"1339_CR25","doi-asserted-by":"publisher","first-page":"304","DOI":"10.1093\/jamia\/ocv080","volume":"23","author":"D Demner-Fushman","year":"2016","unstructured":"Demner-Fushman, D. et al. Preparing a collection of radiology examinations for distribution and retrieval. J. Am. Med. Inform. Assoc. 23, 304\u2013310 (2016).","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1339_CR26","doi-asserted-by":"crossref","unstructured":"Li, C. Y., Liang, X., Hu, Z. & Xing, E. P. Knowledge-driven encode, retrieve, paraphrase for medical image report generation. In AAAI Conference on Artificial Intelligence (AAAI, 2019).","DOI":"10.1609\/aaai.v33i01.33016666"},{"key":"1339_CR27","unstructured":"Li, Y., Liang, X., Hu, Z. & Xing, E. P. Hybrid retrieval-generation reinforced agent for medical image report generation. In Annual Conference on Neural Information Processing Systems (NeurIPS, 2018)."},{"key":"1339_CR28","doi-asserted-by":"crossref","unstructured":"Chen, Z., Song, Y., Chang, T. & Wan, X. Generating radiology reports via memory-driven transformer. In Conference on Empirical Methods in Natural Language Processing (Association for Computational Linguistics, 2020).","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"1339_CR29","doi-asserted-by":"publisher","first-page":"3786","DOI":"10.1109\/TNNLS.2021.3099165","volume":"32","author":"G Liu","year":"2021","unstructured":"Liu, G. et al. Medical-vlbert: Medical visual language BERT for COVID-19 CT report generation with alternate learning. IEEE Trans. Neural Netw. Learn. Syst. 32, 3786\u20133797 (2021).","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"1339_CR30","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/s11280-022-01013-6","volume":"26","author":"M Li","year":"2023","unstructured":"Li, M., Liu, R., Wang, F., Chang, X. & Liang, X. Auxiliary signal-guided knowledge encoder-decoder for medical report generation. World Wide Web 26, 253\u2013270 (2023).","journal-title":"World Wide Web"},{"key":"1339_CR31","first-page":"475","volume":"4","author":"S Jaeger","year":"2014","unstructured":"Jaeger, S. et al. Two public chest x-ray datasets for computer-aided screening of pulmonary diseases. Quant. imaging Med. Surg. 4, 475 (2014).","journal-title":"Quant. imaging Med. Surg."},{"key":"1339_CR32","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1038\/s42256-021-00425-9","volume":"4","author":"H Zhou","year":"2022","unstructured":"Zhou, H. et al. Generalized radiograph representation learning via cross-supervision between images and free-text radiology reports. Nat. Mach. Intell. 4, 32\u201340 (2022).","journal-title":"Nat. Mach. Intell."},{"key":"1339_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.59275\/j.melba.2020-48g7","volume":"1","author":"JP Cohen","year":"2020","unstructured":"Cohen, J. P., Morrison, P. & Dao, L. Covid-19 image data collection: Prospective predictions are the future. Mach Learn Biomed Imaging 1, 1\u201338 (2020).","journal-title":"Mach Learn Biomed Imaging"},{"key":"1339_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.59275\/j.melba.2020-48g7","volume":"1","author":"JP Cohen","year":"2020","unstructured":"Cohen, J. P. et al. Covid-19 image data collection: Prospective predictions are the future. Mach. Learn. Biomed. Imaging 1, 1\u201310 (2020).","journal-title":"Mach. Learn. Biomed. Imaging"},{"key":"1339_CR35","doi-asserted-by":"crossref","unstructured":"Wang, X. et al. ChestX-ray8: Hospital-scale chest x-ray database and benchmarks on weakly-supervised classification and localization of common thorax diseases. In IEEE Conference on Computer Vision and Pattern Recognition (IEEE, 2017).","DOI":"10.1109\/CVPR.2017.369"},{"key":"1339_CR36","doi-asserted-by":"crossref","unstructured":"Wu, C., Zhang, X., Zhang, Y., Wang, Y. & Xie, W. Medklip: Medical knowledge enhanced language-image pre-training for x-ray diagnosis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 21372\u201321383 (IEEE, 2023).","DOI":"10.1109\/ICCV51070.2023.01954"},{"key":"1339_CR37","unstructured":"Zhou, H.-Y., Lian, C., Wang, L. & Yu, Y. Advancing radiograph representation learning with masked record modeling. In The Eleventh International Conference on Learning Representations (ICLR, 2023)."},{"key":"1339_CR38","doi-asserted-by":"crossref","unstructured":"Irvin, J. et al. Chexpert: A large chest radiograph dataset with uncertainty labels and expert comparison. In AAAI Conference on Artificial Intelligence (AAAI, 2019).","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"1339_CR39","doi-asserted-by":"crossref","unstructured":"Huang, S., Shen, L., Lungren, M. P. & Yeung, S. Gloria: A multimodal global-local representation learning framework for label-efficient medical image recognition. In International Conference on Computer Vision. 3922\u20133931 (IEEE, 2021).","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"1339_CR40","unstructured":"Zhang, Y., Jiang, H., Miura, Y., Manning, C. D. & Langlotz, C. P. Contrastive learning of medical visual representations from paired images and text. In Proceedings of Machine Learning for Healthcare (PMLR, 2022)."},{"key":"1339_CR41","doi-asserted-by":"publisher","first-page":"e180041","DOI":"10.1148\/ryai.2019180041","volume":"1","author":"G Shih","year":"2019","unstructured":"Shih, G. et al. Augmenting the national institutes of health chest radiograph dataset with expert annotations of possible pneumonia. Radiol. Artif. Intell. 1, e180041 (2019).","journal-title":"Radiol. Artif. Intell."},{"key":"1339_CR42","unstructured":"Society for Imaging Informatics in Medicine (SIIM). Siim-acr pneumothorax segmentation. In Kaggle (https:\/\/www.kaggle.com\/c\/siim-acr-pneumothorax-segmentation) (2019)."},{"key":"1339_CR43","unstructured":"Chen, X. et al. Microsoft COCO captions: Data collection and evaluation server. Preprint at https:\/\/arxiv.org\/abs\/1504.00325 (2015)."},{"key":"1339_CR44","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T. & Zhu, W. BLEU: a method for automatic evaluation of machine translation. In Annual Meeting of the Association for Computational Linguistics (Association for Computational Linguistics, 2002).","DOI":"10.3115\/1073083.1073135"},{"key":"1339_CR45","unstructured":"Lin, C.-Y. ROUGE: A package for automatic evaluation of summaries. In Annual Meeting of the Association for Computational Linguistics (Association for Computational Linguistics, 2004)."},{"key":"1339_CR46","unstructured":"Banerjee, S. & Lavie, A. METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In IEEvaluation@ACL (Association for Computational Linguistics, 2005)."},{"key":"1339_CR47","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C. L. & Parikh, D. Cider: Consensus-based image description evaluation. In IEEE Conference on Computer Vision and Pattern Recognition (IEEE, 2015).","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"1339_CR48","doi-asserted-by":"crossref","unstructured":"Jin, H., Che, H., Lin, Y. & Chen, H. Promptmrg: Diagnosis-driven prompts for medical report generation. In Proceedings of the AAAI Conference on Artificial Intelligence. 2607\u20132615 (AAAI, 2024).","DOI":"10.1609\/aaai.v38i3.28038"},{"key":"1339_CR49","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"van der Maaten, L. & Hinton, G. Visualizing data using t-SNE. JMLR 9, 2579\u20132605 (2008).","journal-title":"JMLR"},{"key":"1339_CR50","unstructured":"OpenAI. Gpt-4 technical report. Preprint at https:\/\/arxiv.org\/abs\/2303.08774 (2023)."},{"key":"1339_CR51","unstructured":"Costa-juss\u00e0, M. R. et al. No language left behind: Scaling human-centered machine translation. Preprint at https:\/\/arxiv.org\/abs\/2207.04672 (2022)."},{"key":"1339_CR52","doi-asserted-by":"crossref","unstructured":"Song, Y., Chen, S., Zhao, Y. & Jin, Q. Unpaired cross-lingual image caption generation with self-supervised rewards. In Proceedings of the 27th ACM International Conference on Multimedia. 784\u2013792 (ACM, 2019).","DOI":"10.1145\/3343031.3350996"},{"key":"1339_CR53","doi-asserted-by":"publisher","first-page":"9255","DOI":"10.1109\/TPAMI.2021.3132229","volume":"44","author":"F Liu","year":"2021","unstructured":"Liu, F. et al. Aligning source visual and target language domains for unpaired video captioning. IEEE Trans. Pattern Anal. Mach. Intell. 44, 9255\u20139268 (2021).","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1339_CR54","unstructured":"Chen, T., Kornblith, S., Norouzi, M. & Hinton, G. E. A simple framework for contrastive learning of visual representations. In International Conference on Machine Learning (PMLR, 2020)."},{"key":"1339_CR55","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S. & Girshick, R. B. Momentum contrast for unsupervised visual representation learning. In IEEE Conference on Computer Vision and Pattern Recognition (IEEE, 2020).","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"1339_CR56","unstructured":"Chen, X., Fan, H., Girshick, R. B. & He, K. Improved baselines with momentum contrastive learning. Preprint at https:\/\/arxiv.org\/abs\/2003.04297 (2020)."},{"key":"1339_CR57","unstructured":"Radford, A. et al. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning (PMLR, 2021)."},{"key":"1339_CR58","unstructured":"Jia, C. et al. Scaling up visual and vision-language representation learning with noisy text supervision. In International Conference on Machine Learning (PMLR, 2021)."},{"key":"1339_CR59","unstructured":"Oord, A. V. D., Li, Y. & Vinyals, O. Representation learning with contrastive predictive coding. Preprint at https:\/\/arxiv.org\/abs\/1807.03748 (2018)."},{"key":"1339_CR60","doi-asserted-by":"crossref","unstructured":"Reimers, N. & Gurevych, I. Making monolingual sentence embeddings multilingual using knowledge distillation. In Conference on Empirical Methods in Natural Language Processing (Association for Computational Linguistics, 2020).","DOI":"10.18653\/v1\/2020.emnlp-main.365"},{"key":"1339_CR61","doi-asserted-by":"publisher","first-page":"5712","DOI":"10.1109\/TPAMI.2024.3371376","volume":"46","author":"B Yang","year":"2024","unstructured":"Yang, B. et al. Zeronlg: Aligning and autoencoding domains for zero-shot multimodal and multilingual natural language generation. IEEE Trans. Pattern Anal. Mach. Intell. 46, 5712\u20135724 (2024).","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1339_CR62","unstructured":"Tang, Y. et al. Multilingual translation with extensible multilingual pretraining and finetuning. Preprint at https:\/\/arxiv.org\/abs\/2008.00401 (2020)."},{"key":"1339_CR63","first-page":"4839","volume":"22","author":"A Fan","year":"2021","unstructured":"Fan, A. et al. Beyond english-centric multilingual machine translation. J. Mach. Learn. Res. 22, 4839\u20134886 (2021).","journal-title":"J. Mach. Learn. Res."},{"key":"1339_CR64","unstructured":"National Institutes of Health. PubMed Corpora (https:\/\/pubmed.ncbi.nlm.nih.gov\/download\/). (National Library of Medicine, 2022)."},{"key":"1339_CR65","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/sdata.2016.35","volume":"3","author":"AEW Johnson","year":"2016","unstructured":"Johnson, A. E. W. et al. MIMIC-III, a freely accessible critical care database. Sci. Data 3, 1\u20139 (2016).","journal-title":"Sci. Data"},{"key":"1339_CR66","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2020","unstructured":"Lee, J. et al. Biobert: a pre-trained biomedical language representation model for biomedical text mining. Bioinform 36, 1234\u20131240 (2020).","journal-title":"Bioinform"},{"key":"1339_CR67","first-page":"2:1\u20132:23","volume":"3","author":"Y Gu","year":"2022","unstructured":"Gu, Y. et al. Domain-specific language model pretraining for biomedical natural language processing. ACM Trans. Comput. Heal. 3, 2:1\u20132:23 (2022).","journal-title":"ACM Trans. Comput. Heal."},{"key":"1339_CR68","doi-asserted-by":"crossref","unstructured":"Alsentzer, E. et al. Publicly available clinical BERT embeddings. In Proceedings of the 2nd Clinical Natural Language Processing Workshop (Association for Computational Linguistics, 2019).","DOI":"10.18653\/v1\/W19-1909"},{"key":"1339_CR69","doi-asserted-by":"crossref","unstructured":"Peng, Y., Yan, S. & Lu, Z. Transfer learning in biomedical natural language processing: an evaluation of BERT and ELMO on ten benchmarking datasets. In BioNLP@ACL. 58\u201365 (Association for Computational Linguistics, 2019).","DOI":"10.18653\/v1\/W19-5006"},{"key":"1339_CR70","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wu, Z., Agarwal, D. & Sun, J. Medclip: Contrastive learning from unpaired medical images and text. In Conference on Empirical Methods in Natural Language Processing. 3876\u20133887 (Association for Computational Linguistics, 2022).","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"1339_CR71","unstructured":"Dosovitskiy, A. et al. An image is worth 16x16 words: Transformers for image recognition at scale. In International Conference on Learning Representations (PMLR, 2021)."},{"key":"1339_CR72","unstructured":"Vaswani, A. et al. Attention is all you need. In Annual Conference on Neural Information Processing Systems (NeurIPS, 2017)."},{"key":"1339_CR73","unstructured":"Loshchilov, I. & Hutter, F. Decoupled weight decay regularization. In International Conference on Learning Representations (ICLR, 2019)."},{"key":"1339_CR74","unstructured":"Paszke, A. et al. Pytorch: An imperative style, high-performance deep learning library. In Annual Conference on Neural Information Processing Systems (NeurIPS, 2019)."},{"key":"1339_CR75","unstructured":"Micikevicius, P. et al. Mixed precision training. In International Conference on Learning Representations (ICLR, 2018)."},{"key":"1339_CR76","doi-asserted-by":"crossref","unstructured":"Liu, F. et al. Contrastive attention for automatic chest x-ray report generation. In Findings of the Association for Computational Linguistics (Association for Computational Linguistics, 2021).","DOI":"10.18653\/v1\/2021.findings-acl.23"},{"key":"1339_CR77","doi-asserted-by":"crossref","unstructured":"You, D. et al. Aligntransformer: Hierarchical alignment of visual regions and disease tags for medical report generation. In International Conference on Medical Image Computing and Computer Assisted Intervention (Springer, 2021).","DOI":"10.1007\/978-3-030-87199-4_7"},{"key":"1339_CR78","unstructured":"Xu, K. et al. Show, attend and tell: Neural image caption generation with visual attention. In International Conference on Machine Learning (PMLR, 2015)."},{"key":"1339_CR79","unstructured":"Zhou, H. et al. A survey of large language models in medicine: Progress, application, and challenge. Preprint at https:\/\/arxiv.org\/abs\/2311.05112 (2023)."},{"key":"1339_CR80","doi-asserted-by":"crossref","unstructured":"Tang, W. et al. Generating Chinese radiology reports from X-ray images: a public dataset and an X-ray-to-reports generation method. In International Conference on Medical Image Computing and Computer-Assisted Intervention. 79\u201388 (Springer, 2023).","DOI":"10.1007\/978-3-031-47401-9_8"},{"key":"1339_CR81","doi-asserted-by":"crossref","unstructured":"Boecking, B. et al. Making the most of text semantics to improve biomedical vision\u2013language processing. In European Conference on Computer Vision, 1\u201321 (Springer, 2022).","DOI":"10.1007\/978-3-031-20059-5_1"},{"key":"1339_CR82","doi-asserted-by":"crossref","unstructured":"Chen, Z., Shen, Y., Song, Y. & Wan, X. Cross-modal memory networks for radiology report generation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Association for Computational Linguistics, 2021).","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"1339_CR83","doi-asserted-by":"publisher","first-page":"101840","DOI":"10.1016\/j.media.2020.101840","volume":"67","author":"Z Zhou","year":"2021","unstructured":"Zhou, Z., Sodha, V., Pang, J., Gotway, M. B. & Liang, J. Models genesis. Med. Image Anal. 67, 101840 (2021).","journal-title":"Med. Image Anal."},{"key":"1339_CR84","doi-asserted-by":"crossref","unstructured":"Jia, X. et al. Few-shot radiology report generation via knowledge transfer and multi-modal alignment. In 2022 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), 1574\u20131579 (IEEE, 2022).","DOI":"10.1109\/BIBM55620.2022.9995533"},{"key":"1339_CR85","unstructured":"Wu, X. et al. Deltanet: Conditional medical report generation for COVID-19 diagnosis. In International Conference on Computational Linguistics (COLING, 2022)."},{"key":"1339_CR86","unstructured":"Chen, Z., Luo, L., Bie, Y. & Chen, H. Dia-LLaMA: Towards large language model-driven ct report generation. Preprint at https:\/\/arxiv.org\/abs\/2403.16386 (2024)."}],"container-title":["npj Digital Medicine"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01339-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01339-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01339-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T06:42:32Z","timestamp":1738824152000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41746-024-01339-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,6]]},"references-count":86,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1339"],"URL":"https:\/\/doi.org\/10.1038\/s41746-024-01339-7","relation":{},"ISSN":["2398-6352"],"issn-type":[{"value":"2398-6352","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,6]]},"assertion":[{"value":"18 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"86"}}