{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T09:14:50Z","timestamp":1765444490556,"version":"3.46.0"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T00:00:00Z","timestamp":1761782400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T00:00:00Z","timestamp":1761782400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Health Inf Sci Syst"],"DOI":"10.1007\/s13755-025-00389-9","type":"journal-article","created":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T19:29:21Z","timestamp":1761852561000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Observe, align, and enhance: a hierarchical retrieval-augmented vision-language model for generating radiology reports"],"prefix":"10.1007","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6108-8283","authenticated-orcid":false,"given":"Kai","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiwen","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wentai","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuoran","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingcai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,30]]},"reference":[{"key":"389_CR1","unstructured":"Banerjee S, Lavie A. METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization, 2005. pp. 65\u201372."},{"key":"389_CR2","doi-asserted-by":"crossref","unstructured":"Chen Z, Shen Y, Song Y, Wan X. Cross-modal memory networks for radiology report generation. arXiv preprint 2022. arXiv:2204.13258.","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"389_CR3","doi-asserted-by":"crossref","unstructured":"Chen Z, Song Y, Chang T-H, Wan X. Generating radiology reports via memory-driven transformer. arXiv preprint 2020. arXiv:2010.16056.","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"389_CR4","doi-asserted-by":"crossref","unstructured":"Cornia M, Stefanini M, Baraldi L, Cucchiara R. Meshed-memory transformer for image captioning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 2020. pp. 10578\u201387.","DOI":"10.1109\/CVPR42600.2020.01059"},{"issue":"2","key":"389_CR5","first-page":"2","volume":"1","author":"DF Dina","year":"2015","unstructured":"Dina DF, Kohli MD, Rosenman MB, Shooshan SE, Laritza R, Sameer A, et al. Preparing a collection of radiology examinations for distribution and retrieval. J Am Med Inform Assoc. 2015;1(2):2.","journal-title":"J Am Med Inform Assoc"},{"key":"389_CR6","doi-asserted-by":"crossref","unstructured":"Dong X, Chen J, Weng H, Chen Z, Wang FL, Hao T. A new multi-level knowledge retrieval model for task-oriented dialogue. In: International conference on neural computing for advanced applications, 2024. Springer; 2024. pp. 46\u201360.","DOI":"10.1007\/978-981-97-7007-6_4"},{"key":"389_CR7","doi-asserted-by":"publisher","first-page":"748725","DOI":"10.3389\/fendo.2021.748725","volume":"12","author":"Y Fang","year":"2021","unstructured":"Fang Y, Wang H, Feng M, Zhang W, Cao L, Ding C, et al. Machine-learning prediction of postoperative pituitary hormonal outcomes in nonfunctioning pituitary adenomas: a multicenter study. Front Endocrinol. 2021;12:748725.","journal-title":"Front Endocrinol"},{"key":"389_CR8","unstructured":"Gao Y, Xiong Y, Gao X, Jia K, Pan J, Bi Y, Jiawei Sun YD, Wang H. Retrieval-augmented generation for large language models: a survey. arXiv preprint 2023. arXiv:2312.10997."},{"key":"389_CR9","doi-asserted-by":"publisher","first-page":"100174","DOI":"10.1016\/j.bdr.2020.100174","volume":"23","author":"F Gong","year":"2021","unstructured":"Gong F, Wang M, Wang H, Wang S, Liu M. SMR: medical knowledge graph embedding for safe medicine recommendation. Big Data Res. 2021;23:100174.","journal-title":"Big Data Res"},{"key":"389_CR10","doi-asserted-by":"crossref","unstructured":"Hartsock I, Rasool G. Vision-language models for medical report generation and visual question answering: a review. arXiv preprint 2024. arXiv:2403.02469.","DOI":"10.3389\/frai.2024.1430984"},{"key":"389_CR11","unstructured":"Hu EJ, Wallis P, Allen-Zhu Z, Li Y, Wang S, Wang L, Chen W, et al. LORA: low-rank adaptation of large language models. In: International conference on learning representations, 2021."},{"key":"389_CR12","doi-asserted-by":"crossref","unstructured":"Jin H, Che H, Lin Y, Chen H. PromptMRG: diagnosis-driven prompts for medical report generation. In: Proceedings of the AAAI conference on artificial intelligence, 2024, vol 38. pp. 2607\u201315.","DOI":"10.1609\/aaai.v38i3.28038"},{"key":"389_CR13","unstructured":"Johnson AEW, Pollard TJ, Greenbaum NR, Lungren MP, Ying Deng C, Peng Y, Lu Z, Mark RG, Berkowitz SJ, Horng S. MIMIC-CXR: a large publicly available database of labeled chest radiographs. CoRR, abs\/1901.07042, 2019."},{"key":"389_CR14","doi-asserted-by":"crossref","unstructured":"Kale K, Bhattacharyya P, Gune M, Shetty A, Lawyer R. KGVL-BART: knowledge graph augmented visual language BART for radiology report generation. In: Proceedings of the 17th conference of the European Chapter of the Association for Computational Linguistics, 2023. pp. 3401\u201311.","DOI":"10.18653\/v1\/2023.eacl-main.246"},{"key":"389_CR15","doi-asserted-by":"crossref","unstructured":"Li M, Lin B, Chen Z, Lin H, Liang X, Chang X. Dynamic graph enhanced contrastive learning for chest X-ray report generation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 2023. pp. 3334\u201343.","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"389_CR16","doi-asserted-by":"crossref","unstructured":"Li Y, Jiang S, Hu B, Wang L, Zhong W, Luo W, Ma L, Zhang M. Uni-MoE: scaling unified multimodal LLMs with mixture of experts. arXiv preprint 2024. arXiv:2405.11273.","DOI":"10.1109\/TPAMI.2025.3532688"},{"key":"389_CR17","unstructured":"Li Y, Wang L, Hu B, Chen X, Zhong W, Lyu C, Zhang M. A comprehensive evaluation of GPT-4V on knowledge-intensive visual question answering. arXiv preprint 2023. arXiv:2311.07536."},{"key":"389_CR18","unstructured":"Lin C-Y. ROUGE: a package for automatic evaluation of summaries. In: Text summarization branches out. Barcelona: Association for Computational Linguistics; 2004. pp. 74\u201381."},{"key":"389_CR19","doi-asserted-by":"crossref","unstructured":"Liu C, Tian Y, Chen W, Song Y, Zhang Y. Bootstrapping large language models for radiology report generation. In: Proceedings of the AAAI conference on artificial intelligence, 2024, vol 38. pp. 18635\u201343.","DOI":"10.1609\/aaai.v38i17.29826"},{"key":"389_CR20","unstructured":"Liu C, Tian Y, Song Y. A systematic review of deep learning-based research on radiology report generation. arXiv e-prints 2023. p. arXiv-2311."},{"key":"389_CR21","doi-asserted-by":"crossref","unstructured":"Liu F, Wu X, Ge S, Fan W, Zou Y. Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 2021. pp. 13753\u201362.","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"389_CR22","unstructured":"Liu X, Yang G, Luo Y, Mao J, Zhang X, Gao M, Zhang S, Shen J, Wang G. Expert-level vision-language foundation model for real-world radiology and comprehensive evaluation. arXiv preprint 2024. arXiv:2409.16183."},{"key":"389_CR23","doi-asserted-by":"crossref","unstructured":"Lu J, Xiong C, Parikh D, Socher R. Knowing when to look: adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, 2017. pp. 375\u201383.","DOI":"10.1109\/CVPR.2017.345"},{"key":"389_CR24","doi-asserted-by":"crossref","unstructured":"Mai Z, Yu P, Liu C, Luo Q, Wei L, Hao T. An enhanced model based on recurrent convolutional neural network for predicting the stage of chronic obstructive pulmonary diseases. In: International conference on neural computing for advanced applications. Springer, 2023. pp. 179\u201390.","DOI":"10.1007\/978-981-99-5847-4_13"},{"key":"389_CR25","doi-asserted-by":"crossref","unstructured":"Nitin Kapadnis M, Patnaik S, Nandy A, Ray S, Goyal P, Sheet D. SERPENT-VIM: self-refining radiology report generation using vision language models. arXiv preprint 2024. arXiv:2404.17912.","DOI":"10.18653\/v1\/2024.clinicalnlp-1.24"},{"issue":"1","key":"389_CR26","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1186\/s12938-023-01113-y","volume":"22","author":"T Pang","year":"2023","unstructured":"Pang T, Li P, Zhao L. A survey on automatic generation of medical imaging reports based on deep learning. Biomed Eng Online. 2023;22(1):48.","journal-title":"Biomed Eng Online"},{"key":"389_CR27","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu W-J. BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the Association for Computational Linguistics, 2002. pp. 311\u20138.","DOI":"10.3115\/1073083.1073135"},{"key":"389_CR28","unstructured":"Pellegrini C, \u00d6zsoy E, Busam B, Navab N, Keicher M. RaDialog: a large vision-language model for radiology report generation and conversational assistance. p. arXiv-2311. arXiv e-prints 2023."},{"key":"389_CR29","doi-asserted-by":"crossref","unstructured":"Shentu J, Al Moubayed N. CXR-IRGen: an integrated vision and language model for the generation of clinically accurate chest X-ray image\u2013report pairs. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, 2024. pp. 5212\u201321.","DOI":"10.1109\/WACV57701.2024.00513"},{"key":"389_CR30","doi-asserted-by":"publisher","first-page":"368","DOI":"10.1109\/RBME.2024.3408456","volume":"18","author":"P Sloan","year":"2024","unstructured":"Sloan P, Clatworthy P, Simpson E, Mirmehdi M. Automated radiology report generation: a review of recent advances. IEEE Rev Biomed Eng. 2024;18:368\u201387.","journal-title":"IEEE Rev Biomed Eng"},{"key":"389_CR31","doi-asserted-by":"crossref","unstructured":"Vedantam R, Lawrence Zitnick C, Parikh D. CIDEr: consensus-based image description evaluation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, 2015. pp. 4566\u201375.","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"389_CR32","unstructured":"Wang X, Li Y, Wang F, Wang S, Li C, Jiang B. R2GenCSR: retrieving context samples for large language model based X-ray medical report generation. arXiv e-prints. 2024, p. arXiv-2408."},{"key":"389_CR33","doi-asserted-by":"crossref","unstructured":"Wang Z, Liu L, Wang L, Zhou L. METransformer: radiology report generation by transformer with multiple learnable expert tokens. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 2023. pp. 11558\u201367.","DOI":"10.1109\/CVPR52729.2023.01112"},{"issue":"3","key":"389_CR34","doi-asserted-by":"publisher","first-page":"100033","DOI":"10.1016\/j.metrad.2023.100033","volume":"1","author":"Z Wang","year":"2023","unstructured":"Wang Z, Liu L, Wang L, Zhou L. R2GenGPT: radiology report generation with frozen LLMs. Meta-Radiology. 2023;1(3):100033.","journal-title":"Meta-Radiology"},{"key":"389_CR35","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville A, Salakhudinov R, Zemel R, Bengio Y. Show, attend and tell: neural image caption generation with visual attention. In: International conference on machine learning, 2015. PMLR, 2015. pp. 2048\u201357."},{"key":"389_CR36","doi-asserted-by":"crossref","unstructured":"Yan B, Pei M. Clinical-BERT: vision-language pre-training for radiograph diagnosis and reports generation. In: Proceedings of the AAAI conference on artificial intelligence, 2022, vol 36. pp. 2982\u201390.","DOI":"10.1609\/aaai.v36i3.20204"},{"issue":"11","key":"389_CR37","doi-asserted-by":"publisher","first-page":"4017","DOI":"10.1109\/TMI.2024.3412402","volume":"43","author":"Y Yang","year":"2024","unstructured":"Yang Y, Yu J, Fu Z, Zhang K, Yu T, Wang X, et al. Token-mixer: bind image and text in one embedding space for medical image reporting. IEEE Trans Med Imaging. 2024;43(11):4017\u201328.","journal-title":"IEEE Trans Med Imaging"},{"key":"389_CR38","doi-asserted-by":"crossref","unstructured":"You D, Liu F, Ge S, Xie X, Zhang J, Wu X. AlignTransformer: hierarchical alignment of visual regions and disease tags for medical report generation. In: Medical image computing and computer assisted intervention\u2014MICCAI 2021: 24th international conference, Strasbourg, France, 27 September\u20131 October 2021, Proceedings, Part III 24. Springer; 2021. pp. 72\u201382.","DOI":"10.1007\/978-3-030-87199-4_7"},{"key":"389_CR39","doi-asserted-by":"publisher","first-page":"635795","DOI":"10.3389\/fendo.2021.635795","volume":"12","author":"W Zhang","year":"2021","unstructured":"Zhang W, Sun M, Fan Y, Wang H, Feng M, Zhou S, et al. Machine learning in preoperative prediction of postoperative immediate remission of histology-positive Cushing\u2019s disease. Front Endocrinol. 2021;12:635795.","journal-title":"Front Endocrinol"},{"key":"389_CR40","doi-asserted-by":"crossref","unstructured":"Zhao Y, Li Y, Wu Y, Hu B, Chen Q, Wang X, Ding Y, Zhang M. Medical dialogue response generation with pivotal information recalling. In: Proceedings of the 28th ACM SIGKDD conference on knowledge discovery and data mining, 2022. pp. 4763\u201371.","DOI":"10.1145\/3534678.3542674"}],"container-title":["Health Information Science and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13755-025-00389-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13755-025-00389-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13755-025-00389-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T09:09:34Z","timestamp":1765444174000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13755-025-00389-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,30]]},"references-count":40,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["389"],"URL":"https:\/\/doi.org\/10.1007\/s13755-025-00389-9","relation":{},"ISSN":["2047-2501"],"issn-type":[{"type":"electronic","value":"2047-2501"}],"subject":[],"published":{"date-parts":[[2025,10,30]]},"assertion":[{"value":"31 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This declaration is not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"72"}}