{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T07:04:13Z","timestamp":1766127853530,"version":"3.48.0"},"reference-count":78,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T00:00:00Z","timestamp":1761868800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T00:00:00Z","timestamp":1761868800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s00530-025-02040-9","type":"journal-article","created":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T14:44:49Z","timestamp":1761921889000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mwcl: Memory-driven and mapping alignment with weighted contrastive learning for radiology reports"],"prefix":"10.1007","volume":"31","author":[{"given":"Muhammad","family":"Usman","sequence":"first","affiliation":[]},{"given":"Ziwei","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Zhang","family":"Yijia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,31]]},"reference":[{"key":"2040_CR1","unstructured":"Bannur, S., Bouzid, K., Castro, D.C., Schwaighofer, A., Thieme, A., Bond-Taylor, S., Ilse, M., P\u00e9rez-Garc\u00eda, F., Salvatelli, V., Sharma, H., et al.: Maira-2: Grounded radiology report generation. arXiv preprint arXiv: 2406.04449 (2024)"},{"key":"2040_CR2","doi-asserted-by":"crossref","unstructured":"Sun, Y., Lee, Y.Z., Woodard, G.A., Zhu, H., Lian, C., Liu, M.: R2gen-mamba: a selective state space model for radiology report generation. arXiv preprint arXiv: 2410.18135 (2024)","DOI":"10.1109\/ISBI60581.2025.10980814"},{"key":"2040_CR3","doi-asserted-by":"publisher","first-page":"121260","DOI":"10.1016\/j.eswa.2023.121260","volume":"237","author":"Y Xue","year":"2024","unstructured":"Xue, Y., Tan, Y., Tan, L., Qin, J., Xiang, X.: Generating radiology reports via auxiliary signal guidance and a memory-driven network. Expert Syst. Appl. 237, 121260 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2023.121260","journal-title":"Expert Syst. Appl."},{"key":"2040_CR4","doi-asserted-by":"publisher","first-page":"103377","DOI":"10.1016\/j.media.2024.103377","volume":"99","author":"W Lang","year":"2025","unstructured":"Lang, W., Liu, Z., Zhang, Y.: Dacg: dual attention and context guidance model for radiology report generation. Med. Image Anal. 99, 103377 (2025)","journal-title":"Med. Image Anal."},{"key":"2040_CR5","first-page":"4776","volume":"38","author":"H Shen","year":"2024","unstructured":"Shen, H., Pei, M., Liu, J., Tian, Z.: Automatic radiology reports generation via memory alignment network. Proc. AAAI Conf. Artificial Intell. 38, 4776\u20134783 (2024)","journal-title":"Proc. AAAI Conf. Artificial Intell."},{"key":"2040_CR6","doi-asserted-by":"publisher","first-page":"104764","DOI":"10.1016\/j.jbi.2024.104764","volume":"161","author":"J Chen","year":"2025","unstructured":"Chen, J., Huang, G., Yuan, X., Zhong, G., Tan, Z., Pun, C.-M., Yang, Q.: Visual-linguistic diagnostic semantic enhancement for medical report generation. J. Biomed. Inform. 161, 104764 (2025)","journal-title":"J. Biomed. Inform."},{"key":"2040_CR7","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13753\u2013 13762 ( 2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"2040_CR8","unstructured":"Chen, Q., Zhao, R., Wang, S., Phan, V.M.H., Hengel, A., Verjans, J., Liao, Z., To, M.-S., Xia, Y., Chen, J., et al.: A survey of medical vision-and-language applications and their techniques. arXiv preprint arXiv: 2411.12195 (2024)"},{"key":"2040_CR9","doi-asserted-by":"publisher","first-page":"105669","DOI":"10.1016\/j.bspc.2023.105669","volume":"88","author":"P Lu","year":"2024","unstructured":"Lu, P., Hu, L., Mitelpunkt, A., Bhatnagar, S., Lu, L., Liang, H.: A hierarchical attention-based multimodal fusion framework for predicting the progression of Alzheimer\u2019s disease. Biomed. Signal Process. Control 88, 105669 (2024)","journal-title":"Biomed. Signal Process. Control"},{"key":"2040_CR10","unstructured":"Subedi, G.: Multimodal learning: generating precise chest x-ray report on thorax abnormality. Master\u2019s thesis, University of South Dakota (2023)"},{"key":"2040_CR11","doi-asserted-by":"crossref","unstructured":"Shin, H.-C., Roberts, K., Lu, L., Demner-Fushman, D., Yao, J., Summers, R.M.: Learning to read chest x-rays: recurrent neural cascade model for automated image annotation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2497\u20132506 (2016)","DOI":"10.1109\/CVPR.2016.274"},{"key":"2040_CR12","doi-asserted-by":"publisher","unstructured":"Ma, X., Liu, F., Yin, C., Wu, X., Ge, S., Zou, Y., Zhang, P., Sun, X.: Contrastive attention for automatic chest x-ray report generation. In: Findings of the Association for Computational Linguistics: ACL IJCNLP 2021, pp. 269\u2013280. Association for Computational Linguistics, ??? (2021). https:\/\/doi.org\/10.18653\/v1\/2021.findings-acl.23","DOI":"10.18653\/v1\/2021.findings-acl.23"},{"key":"2040_CR13","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"2040_CR14","doi-asserted-by":"publisher","unstructured":"Jing, B., Wang, Z., Xing, E.: Show, describe and conclude: on exploiting the structure information of chest X-ray reports. In: Korhonen, A., Traum, D., M\u00e0rquez, L. (eds.) Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 6570\u20136580. Association for Computational Linguistics, Florence, Italy (2019). https:\/\/doi.org\/10.18653\/v1\/P19-1657. https:\/\/aclanthology.org\/P19-1657\/","DOI":"10.18653\/v1\/P19-1657"},{"key":"2040_CR15","doi-asserted-by":"publisher","unstructured":"Chen, Z., Shen, Y., Song, Y., Wan, X.: Cross-modal memory networks for radiology report generation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 5904\u20135914 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.459","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"2040_CR16","unstructured":"Ji, S., Sun, W., Dong, H., Wu, H., Marttinen, P.: A unified review of deep learning for automated medical coding. arXiv preprint arXiv: 2201.02797 (2022)"},{"issue":"12","key":"2040_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3664615","volume":"56","author":"S Ji","year":"2024","unstructured":"Ji, S., Li, X., Sun, W., Dong, H., Taalas, A., Zhang, Y., Wu, H., Pitk\u00e4nen, E., Marttinen, P.: A unified review of deep learning for automated medical coding. ACM Comput. Surv. 56(12), 1\u201341 (2024)","journal-title":"ACM Comput. Surv."},{"key":"2040_CR18","doi-asserted-by":"crossref","unstructured":"Xue, Y., Xu, T., Rodney\u00a0Long, L., Xue, Z., Antani, S., Thoma, G.R., Huang, X.: Multimodal recurrent model with attention for automated radiology report generation. In: Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2018: 21st International Conference, Granada, Spain, September 16-20, 2018, Proceedings, Part I, pp. 457\u2013466. Springer, ??? (2018)","DOI":"10.1007\/978-3-030-00928-1_52"},{"key":"2040_CR19","unstructured":"Li, Y., Liang, X., Hu, Z., Xing, E.P.: Hybrid retrieval-generation reinforced agent for medical image report generation, vol. 31 (2018)"},{"key":"2040_CR20","doi-asserted-by":"crossref","unstructured":"Reale-Nosei, G., Amador-Dom\u00ednguez, E., Serrano, E.: From vision to text: a comprehensive review of natural image captioning in medical diagnosis and radiology report generation. Med. Image Anal. 97, 103264 (2024)","DOI":"10.1016\/j.media.2024.103264"},{"issue":"1","key":"2040_CR21","doi-asserted-by":"publisher","first-page":"4171","DOI":"10.1038\/s41598-023-31223-5","volume":"13","author":"A Selivanov","year":"2023","unstructured":"Selivanov, A., Rogov, O.Y., Chesakov, D., Shelmanov, A., Fedulova, I., Dylov, D.V.: Medical image captioning via generative pretrained transformers. Sci. Rep. 13(1), 4171 (2023)","journal-title":"Sci. Rep."},{"key":"2040_CR22","doi-asserted-by":"crossref","unstructured":"Jamil, A., Mahmood, K., Villar, M.G., Prola, T., Diez, I.D.L.T., Samad, M.A., Ashraf, I., et al.: Deep learning approaches for image captioning: opportunities, challenges and future potential. IEEE Access (2024)","DOI":"10.1109\/ACCESS.2024.3365528"},{"issue":"4","key":"2040_CR23","doi-asserted-by":"publisher","first-page":"1429","DOI":"10.3390\/s22041429","volume":"22","author":"H Lee","year":"2022","unstructured":"Lee, H., Cho, H., Park, J., Chae, J., Kim, J.: Cross encoder-decoder transformer with global-local visual extractor for medical image captioning. Sensors 22(4), 1429 (2022)","journal-title":"Sensors"},{"key":"2040_CR24","unstructured":"Yu, T., Lu, W., Yang, Y., Han, W., Huang, Q., Yu, J., Zhang, K.: Adapter-enhanced hierarchical cross-modal pre-training for lightweight medical report generation. IEEE J. Biomed. Health Inform. 22(4), 1429 (2025)"},{"key":"2040_CR25","doi-asserted-by":"crossref","unstructured":"Ji, W., Chung, A.C.: Unsupervised domain adaptation for medical image segmentation using transformer with meta attention. IEEE Trans. Med. Imaging 43(2), 820-831 (2023)","DOI":"10.1109\/TMI.2023.3322581"},{"key":"2040_CR26","unstructured":"Chen, W., Liu, Y., Wang, C., Li, G., Zhu, J., Lin, L.: Visual-linguistic causal intervention for radiology report generation. arXiv preprint arXiv: 2303.091171(8) (2023)"},{"issue":"4","key":"2040_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00530-025-01858-7","volume":"31","author":"M Usman","year":"2025","unstructured":"Usman, M., Hou, X., Guo, Y., Liang, Z., Yijia, Z.: Imgef: integrated multimodal graph-enhanced framework for radiology report generation. Multimedia Syst. 31(4), 1\u201315 (2025). https:\/\/doi.org\/10.1007\/s00530-025-01858-7","journal-title":"Multimedia Syst."},{"key":"2040_CR28","doi-asserted-by":"crossref","unstructured":"You, D., Liu, F., Ge, S., Xie, X., Zhang, J., Wu, X.: Aligntransformer: Hierarchical alignment of visual regions and disease tags for medical report generation. In: Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2021: 24th International Conference, Strasbourg, France, September 27\u2013October 1, 2021, Proceedings, Part III, pp. 72\u2013 82. Springer,( 2021)","DOI":"10.1007\/978-3-030-87199-4_7"},{"key":"2040_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Wang, X., Xu, Z., Yu, Q., Yuille, A., Xu, D.: When radiology report generation meets knowledge graph. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 12910\u2013 12917 ( 2020)","DOI":"10.1609\/aaai.v34i07.6989"},{"issue":"1","key":"2040_CR30","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1109\/TTS.2023.3234203","volume":"4","author":"T Dhar","year":"2023","unstructured":"Dhar, T., Dey, N., Borra, S., Sherratt, R.S.: Challenges of deep learning in medical image analysis-improving explainability and trust. IEEE Trans. Technol. Soc. 4(1), 68\u201375 (2023)","journal-title":"IEEE Trans. Technol. Soc."},{"key":"2040_CR31","unstructured":"Endo, M., Krishnan, R., Krishna, V., Ng, A.Y., Rajpurkar, P.: Retrieval-based chest x-ray report generation using a pre-trained contrastive language-image model. In: Machine Learning for Health, pp. 209\u2013 219 (2021). PMLR"},{"key":"2040_CR32","unstructured":"Liu, C., Tian, Y., Song, Y.: A systematic review of deep learning-based research on radiology report generation. arXiv preprint arXiv: 2311.14199 (2023)"},{"key":"2040_CR33","doi-asserted-by":"crossref","unstructured":"Chen, Q., Xie, Y., Wu, B., Chen, X., Ang, J., To, M.-S., Chang, X., Wu, Q.: Act like a radiologist: radiology report generation across anatomical regions. In: Proceedings of the Asian Conference on Computer Vision, pp. 1\u2013 17 (2024)","DOI":"10.1007\/978-981-96-0960-4_3"},{"key":"2040_CR34","doi-asserted-by":"crossref","unstructured":"Wang, Z., Liu, L., Wang, L., Zhou, L.: Metransformer: radiology report generation by transformer with multiple learnable expert tokens. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11558\u2013 11567 (2023)","DOI":"10.1109\/CVPR52729.2023.01112"},{"key":"2040_CR35","doi-asserted-by":"crossref","unstructured":"Pan, L., Zhao, Z., Lu, Y., Tang, K., Fu, L., Liang, Q., Peng, S.: Opportunities and challenges in the application of large artificial intelligence models in radiology. Meta-Radiology, 2(2), 100080 (2024)","DOI":"10.1016\/j.metrad.2024.100080"},{"key":"2040_CR36","doi-asserted-by":"crossref","unstructured":"Zong, Y., Mac\u00a0Aodha, O., Hospedales, T.: Self-supervised multimodal learning: a survey. IEEE Trans. Pattern Anal. Mach. Intell.47(7), 5299--5318 (2024)","DOI":"10.1109\/TPAMI.2024.3429301"},{"issue":"4","key":"2040_CR37","doi-asserted-by":"publisher","first-page":"2152","DOI":"10.1109\/JBHI.2024.3350077","volume":"28","author":"X Yi","year":"2024","unstructured":"Yi, X., Fu, Y., Liu, R., Zhang, H., Hua, R.: Tsget: two-stage global enhanced transformer for automatic radiology report generation. IEEE J. Biomed. Health Inform. 28(4), 2152\u20132162 (2024)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"2040_CR38","doi-asserted-by":"crossref","unstructured":"Yi, X., Fu, Y., Yu, J., Liu, R., Zhang, H., Hua, R.: Lhr-rfl: linear hybrid-reward based reinforced focal learning for automatic radiology report generation. IEEE Trans. Med. Imaging (2024)","DOI":"10.1109\/TMI.2024.3507073"},{"key":"2040_CR39","unstructured":"Oord, A.v.d., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv: 1807.03748 (2018)"},{"key":"2040_CR40","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u2013 1607 (2020)"},{"key":"2040_CR41","unstructured":"Chen, X., Fan, H., Girshick, R., He, K.: Improved baselines with momentum contrastive learning. arXiv preprint arXiv: 2003.04297 (2020)"},{"key":"2040_CR42","doi-asserted-by":"publisher","first-page":"104396","DOI":"10.1016\/j.jbi.2023.104396","volume":"143","author":"K Niu","year":"2023","unstructured":"Niu, K., Wu, Y., Li, Y., Li, M.: Retrieve and rerank for automated ICD coding via contrastive learning. J. Biomed. Inform. 143, 104396 (2023)","journal-title":"J. Biomed. Inform."},{"key":"2040_CR43","doi-asserted-by":"crossref","unstructured":"Huang, J., Li, Y., Ping, W., Huang, L.: Large margin neural language model. arXiv preprint arXiv: 1808.08987 (2018)","DOI":"10.18653\/v1\/D18-1150"},{"key":"2040_CR44","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607 (2020). PMLR"},{"key":"2040_CR45","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2040_CR46","unstructured":"Zhong, Q., Huang, M.: A contrastive learning method integrating pathological prior information for effective differentiation of histological categories in lung squamous cell carcinoma. Available at SSRN 4952226"},{"key":"2040_CR47","doi-asserted-by":"crossref","unstructured":"Qayyum, A., Razzak, I., Mazher, M., Khan, T., Ding, W., Niederer, S.: Two-stage self-supervised contrastive learning aided transformer for real-time medical image segmentation. IEEE J. Biomed. Health Inform. 47(7), 5299--5318 (2023)","DOI":"10.1109\/JBHI.2023.3340956"},{"key":"2040_CR48","doi-asserted-by":"crossref","unstructured":"Tian, Y., Pang, G., Liu, Y., Wang, C., Chen, Y., Liu, F., Singh, R., Verjans, J.W., Wang, M., Carneiro, G.: Unsupervised anomaly detection in medical images with a memory-augmented multi-level cross-attentional masked autoencoder. In: International Workshop on Machine Learning in Medical Imaging, pp. 11\u201321 (2023). Springer","DOI":"10.1007\/978-3-031-45676-3_2"},{"key":"2040_CR49","unstructured":"Guo, H., Zhang, Y., Gao, T., Su, J., Lv, P., Xu, M.: Remember and recall: associative-memory-based trajectory prediction. arXiv preprint arXiv: 2410.02201 (2024)"},{"key":"2040_CR50","doi-asserted-by":"crossref","unstructured":"Tao, Y., Ma, L., Yu, J., Zhang, H.: Memory-based cross-modal semantic alignment network for radiology report generation. IEEE J. Biomed. Health Inform. 28(7), 4145-4156  (2024)","DOI":"10.1109\/JBHI.2024.3393018"},{"key":"2040_CR51","doi-asserted-by":"crossref","unstructured":"Shahzadi, I., Madni, T.M., Janjua, U.I., Batool, G., Naz, B., Ali, M.Q.: Csamdt: conditional self attention memory-driven transformers for radiology report generation from chest x-ray. J. Imaging Inform. Med. 37(6), 1\u201313 (2024)","DOI":"10.1007\/s10278-024-01126-6"},{"key":"2040_CR52","doi-asserted-by":"crossref","unstructured":"Yan, A., He, Z., Lu, X., Du, J., Chang, E., Gentili, A., McAuley, J., Hsu, C.-N.: Weakly supervised contrastive learning for chest x-ray report generation. In: Findings of the Association for Computational Linguistics: EMNLP 2021, pp. 4009\u20134015. Association for Computational Linguistics, ??? (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.336"},{"key":"2040_CR53","doi-asserted-by":"crossref","unstructured":"Zeng, D., Kheir, J.N., Zeng, P., Shi, Y.: Contrastive learning with temporal correlated medical images: a case study using lung segmentation in chest x-rays. In: 2021 IEEE\/ACM International Conference On Computer Aided Design (ICCAD), pp. 1\u2013 7. IEEE, ??? (2021)","DOI":"10.1109\/ICCAD51958.2021.9643443"},{"key":"2040_CR54","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013 778 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"2040_CR55","doi-asserted-by":"publisher","unstructured":"Chen, Z., Song, Y., Chang, T.-H., Wan, X.: Generating radiology reports via memory-driven transformer. In: Webber, B., Cohn, T., He, Y., Liu, Y. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1439\u20131449. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.112. https:\/\/aclanthology.org\/2020.emnlp-main.112\/","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"2040_CR56","unstructured":"Arora, S., Khandeparkar, H., Khodak, M., Plevrakis, O., Saunshi, N.: A theoretical analysis of contrastive unsupervised representation learning. arXiv preprint arXiv: 1902.09229 (2019)"},{"issue":"2","key":"2040_CR57","doi-asserted-by":"publisher","first-page":"304","DOI":"10.1093\/jamia\/ocv080","volume":"23","author":"D Demner-Fushman","year":"2016","unstructured":"Demner-Fushman, D., Antani, S., Simpson, M., Thoma, G.R., McDonald, C.J.: Preparing a collection of radiology examinations for distribution and retrieval. J. Am. Med. Inform. Assoc. 23(2), 304\u2013310 (2016)","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"2040_CR58","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","volume":"6","author":"AE Johnson","year":"2019","unstructured":"Johnson, A.E., Pollard, T.J., Berkowitz, S.J., Greenbaum, N.R., Lungren, M.P., Deng, C.-Y., Mark, R.G., Horng, S.: Mimic-cxr, a de-identified publicly available database of chest radiographs with free-text reports. Sci. Data 6, 317 (2019)","journal-title":"Sci. Data"},{"key":"2040_CR59","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7\u20139, 2015, Conference Track Proceedings (2015)"},{"key":"2040_CR60","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"2040_CR61","unstructured":"Banerjee, S., Lavie, A.: Meteor: an automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation And\/or Summarization, pp. 65\u201372 (2005)"},{"key":"2040_CR62","unstructured":"Lin, C.-Y.: Rouge: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u2013 81 (2004)"},{"key":"2040_CR63","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"2040_CR64","doi-asserted-by":"publisher","unstructured":"Jing, B., Xie, P., Xing, E.: On the automatic generation of medical imaging reports. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, pp. 2577\u20132586 (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1240","DOI":"10.18653\/v1\/P18-1240"},{"key":"2040_CR65","doi-asserted-by":"crossref","unstructured":"Jing, B., Wang, Z., Xing, E.: Show, describe and conclude: on exploiting the structure information of chest x-ray reports. In: Proceedings of the 57th Conference of the Association for Computational Linguistics (ACL), pp. 6570\u20136580 (2019)","DOI":"10.18653\/v1\/P19-1657"},{"key":"2040_CR66","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13753\u201313762 (2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"2040_CR67","doi-asserted-by":"publisher","unstructured":"Liu, F., Ge, S., Wu, X.: Competence-based multimodal curriculum learning for medical report generation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, pp. 3001\u20133012 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.234","DOI":"10.18653\/v1\/2021.acl-long.234"},{"key":"2040_CR68","unstructured":"Liu, F., You, C., Wu, X., Ge, S., Wang, S., Sun, X.: Auto-encoding knowledge graph for unsupervised medical report generation. CoRR (2021) arXiv: 2111.04318"},{"key":"2040_CR69","first-page":"2982","volume":"36","author":"B Yan","year":"2022","unstructured":"Yan, B., Pei, M.: Clinical-bert: vision-language pre-training for radiograph diagnosis and reports generation. Proc. AAAI Conf. Artif. Intell. 36, 2982\u20132990 (2022)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"2040_CR70","doi-asserted-by":"publisher","first-page":"102798","DOI":"10.1016\/j.media.2023.102798","volume":"86","author":"S Yang","year":"2023","unstructured":"Yang, S., Wu, X., Ge, S., Zheng, Z., Zhou, S.K., Xiao, L.: Radiology report generation with a learned knowledge base and multi-modal alignment. Med. Image Anal. 86, 102798 (2023)","journal-title":"Med. Image Anal."},{"key":"2040_CR71","doi-asserted-by":"publisher","first-page":"108482","DOI":"10.1016\/j.cmpb.2024.108482","volume":"258","author":"X Liu","year":"2025","unstructured":"Liu, X., Xin, J., Dai, B., Shen, Q., Huang, Z., Wang, Z.: Label correlated contrastive learning for medical report generation. Comput. Methods Programs Biomed. 258, 108482 (2025)","journal-title":"Comput. Methods Programs Biomed."},{"key":"2040_CR72","doi-asserted-by":"crossref","unstructured":"Sun, Y., Lee, Y.Z., Woodard, G.A., Zhu, H., Lian, C., Liu, M.: R2gen-mamba: a selective state space model for radiology report generation. In: 2025 IEEE 22nd International Symposium on Biomedical Imaging (ISBI), pp. 1\u20134 (2025). IEEE","DOI":"10.1109\/ISBI60581.2025.10980814"},{"key":"2040_CR73","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1492\u2013 1500 (2017)","DOI":"10.1109\/CVPR.2017.634"},{"key":"2040_CR74","doi-asserted-by":"publisher","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2261\u20132269 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"2040_CR75","doi-asserted-by":"publisher","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 9992\u201310002 ( 2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2040_CR76","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv: 2010.11929 (2020)"},{"key":"2040_CR77","doi-asserted-by":"crossref","unstructured":"Sufian, A., Leo, M., Distante, C., Ghosh, A., Barman, D.: Can vision transformers with resnet\u2019s global features fairly authenticate demographic faces? In: International Conference on Pattern Recognition, pp. 357\u2013 370 (2024). Springer","DOI":"10.1007\/978-3-031-87657-8_25"},{"key":"2040_CR78","first-page":"012196","volume":"1544","author":"R Zhang","year":"2020","unstructured":"Zhang, R., Du, L., Xiao, Q., Liu, J.: Comparison of backbones for semantic segmentation network. J. Phys.: Conf. Ser. 1544, 012196 (2020). (IOP Publishing)","journal-title":"J. Phys.: Conf. Ser."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02040-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02040-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02040-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T06:59:46Z","timestamp":1766127586000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02040-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,31]]},"references-count":78,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["2040"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02040-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,10,31]]},"assertion":[{"value":"3 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The data for this research project is publicly available and does not require additional consent for its use.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval and consent for data"}},{"value":"The authors declare no conflict of interest.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"462"}}