{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T23:28:55Z","timestamp":1773444535201,"version":"3.50.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032049803","type":"print"},{"value":"9783032049810","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T00:00:00Z","timestamp":1758326400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04981-0_46","type":"book-chapter","created":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T05:11:26Z","timestamp":1758258686000},"page":"484-494","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["RadAlign: Advancing Radiology Report Generation with Vision-Language Concept Alignment"],"prefix":"10.1007","author":[{"given":"Difei","family":"Gu","sequence":"first","affiliation":[]},{"given":"Yunhe","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Mu","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Dimitris","family":"Metaxas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,20]]},"reference":[{"key":"46_CR1","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6077\u20136086 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"46_CR2","doi-asserted-by":"crossref","unstructured":"Asif, S., Wenhui, Y., Jin, H., Jinhai, S.: Classification of covid-19 from chest X-ray images using deep convolutional neural network. In: 2020 IEEE 6th International Conference on Computer and Communications (ICCC), pp. 426\u2013433. IEEE (2020)","DOI":"10.1109\/ICCC51575.2020.9344870"},{"key":"46_CR3","doi-asserted-by":"crossref","unstructured":"Boecking, B., et al.: Making the most of text semantics to improve biomedical vision\u2013language processing. In: European Conference on Computer Vision, pp. 1\u201321. Springer (2022)","DOI":"10.1007\/978-3-031-20059-5_1"},{"key":"46_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Z., Shen, Y., Song, Y., Wan, X.: Cross-modal memory networks for radiology report generation. arXiv preprint arXiv:2204.13258 (2022)","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"46_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Z., Song, Y., Chang, T.H., Wan, X.: Generating radiology reports via memory-driven transformer. arXiv preprint arXiv:2010.16056 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"issue":"2","key":"46_CR6","doi-asserted-by":"publisher","first-page":"304","DOI":"10.1093\/jamia\/ocv080","volume":"23","author":"D Demner-Fushman","year":"2016","unstructured":"Demner-Fushman, D., Antani, S., Simpson, M., Thoma, G.R., McDonald, C.J.: Preparing a collection of radiology examinations for distribution and retrieval. J. Am. Med. Inform. Assoc. 23(2), 304\u2013310 (2016)","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"46_CR7","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"46_CR8","doi-asserted-by":"crossref","unstructured":"Gao, Y., Gu, D., Zhou, M., Metaxas, D.: Aligning human knowledge with visual concepts towards explainable medical image classification. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 46\u201356. Springer (2024)","DOI":"10.1007\/978-3-031-72117-5_5"},{"key":"46_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"46_CR10","doi-asserted-by":"crossref","unstructured":"Hodler, J., Kubik-Huch, R.A., von Schulthess, G.K.: Diseases of the chest, breast, heart and vessels 2019-2022: diagnostic and interventional imaging (2019)","DOI":"10.1007\/978-3-030-11149-6"},{"key":"46_CR11","doi-asserted-by":"crossref","unstructured":"Jing, B., Wang, Z., Xing, E.: Show, describe and conclude: on exploiting the structure information of chest X-ray reports. arXiv preprint arXiv:2004.12274 (2020)","DOI":"10.18653\/v1\/P19-1657"},{"key":"46_CR12","doi-asserted-by":"crossref","unstructured":"Johnson, A.E., et al.: MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports. Sci. Data 6(1), 317 (2019)","DOI":"10.1038\/s41597-019-0322-0"},{"key":"46_CR13","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: International Conference on Machine Learning, pp. 19730\u201319742. PMLR (2023)"},{"key":"46_CR14","unstructured":"Li, Y., Liang, X., Hu, Z., Xing, E.P.: Hybrid retrieval-generation reinforced agent for medical image report generation. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"46_CR15","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, Barcelona, Spain, pp. 74\u201381. Association for Computational Linguistics (2004). https:\/\/aclanthology.org\/W04-1013"},{"key":"46_CR16","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13753\u201313762 (2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"issue":"11","key":"46_CR17","doi-asserted-by":"publisher","first-page":"1472","DOI":"10.1016\/j.acra.2018.02.018","volume":"25","author":"MP McBee","year":"2018","unstructured":"McBee, M.P., et al.: Deep learning in radiology. Acad. Radiol. 25(11), 1472\u20131480 (2018)","journal-title":"Acad. Radiol."},{"key":"46_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2022.107141","volume":"226","author":"GI Okolo","year":"2022","unstructured":"Okolo, G.I., Katsigiannis, S., Ramzan, N.: IEViT: an enhanced vision transformer architecture for chest X-ray image classification. Comput. Methods Programs Biomed. 226, 107141 (2022)","journal-title":"Comput. Methods Programs Biomed."},{"key":"46_CR19","doi-asserted-by":"crossref","unstructured":"Ostmeier, S., et al.: Green: generative radiology report evaluation and error notation. arXiv preprint arXiv:2405.03595 (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.21"},{"key":"46_CR20","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"46_CR21","doi-asserted-by":"crossref","unstructured":"Qin, H., Song, Y.: Reinforced cross-modal alignment for radiology report generation. In: Findings of the Association for Computational Linguistics: ACL 2022, pp. 448\u2013458 (2022)","DOI":"10.18653\/v1\/2022.findings-acl.38"},{"key":"46_CR22","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML, pp. 8748\u20138763. PMLR (2021)"},{"key":"46_CR23","unstructured":"Ramesh, V., Chi, N.A., Rajpurkar, P.: Improving radiology report generation systems by removing hallucinated references to non-existent priors. In: Machine Learning for Health, pp. 456\u2013473. PMLR (2022)"},{"key":"46_CR24","doi-asserted-by":"crossref","unstructured":"Reale-Nosei, G., Amador-Dom\u00ednguez, E., Serrano, E.: From vision to text: a comprehensive review of natural image captioning in medical diagnosis and radiology report generation. Med. Image Anal. 103264 (2024)","DOI":"10.1016\/j.media.2024.103264"},{"key":"46_CR25","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"46_CR26","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhao, Z., Ouyang, X., Wang, Q., Shen, D.: Chatcad: interactive computer-aided diagnosis on medical image using large language models. arXiv preprint arXiv:2302.07257 (2023)","DOI":"10.1038\/s44172-024-00271-8"},{"key":"46_CR27","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wu, Z., Agarwal, D., Sun, J.: Medclip: contrastive learning from unpaired medical images and text. arXiv preprint arXiv:2210.10163 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"46_CR28","doi-asserted-by":"crossref","unstructured":"Wu, C., Zhang, X., Zhang, Y., Wang, Y., Xie, W.: Medklip: medical knowledge enhanced language-image pre-training in radiology. arXiv preprint arXiv:2301.02228 (2023)","DOI":"10.1101\/2023.01.10.23284412"},{"key":"46_CR29","doi-asserted-by":"crossref","unstructured":"Yang, Y., Panagopoulou, A., Zhou, S., Jin, D., Callison-Burch, C., Yatskar, M.: Language in a bottle: language model guided concept bottlenecks for interpretable image classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19187\u201319197 (2023)","DOI":"10.1109\/CVPR52729.2023.01839"},{"key":"46_CR30","unstructured":"Ye, W., Yao, J., Xue, H., Li, Y.: Weakly supervised lesion localization with probabilistic-cam pooling. arXiv preprint arXiv:2005.14480 (2020)"},{"key":"46_CR31","unstructured":"Zhang, S., et al.: Biomedclip: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs. arXiv preprint arXiv:2303.00915 (2023)"},{"key":"46_CR32","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: Bertscore: evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)"},{"key":"46_CR33","unstructured":"Zhang, Y., et al.: Data-centric foundation models in computational healthcare: a survey. arXiv preprint arXiv:2401.02458 (2024)"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04981-0_46","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T05:33:26Z","timestamp":1767418406000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04981-0_46"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,20]]},"ISBN":["9783032049803","9783032049810"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04981-0_46","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,20]]},"assertion":[{"value":"20 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}