{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T22:11:24Z","timestamp":1774303884810,"version":"3.50.1"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T00:00:00Z","timestamp":1748649600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T00:00:00Z","timestamp":1748649600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62072070"],"award-info":[{"award-number":["62072070"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s00530-025-01858-7","type":"journal-article","created":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T05:58:25Z","timestamp":1748671105000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["IMGEF: integrated multimodal graph-enhanced framework for radiology report generation"],"prefix":"10.1007","volume":"31","author":[{"given":"Muhammad","family":"Usman","sequence":"first","affiliation":[]},{"given":"Xiaodi","family":"Hou","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Zonglin","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Zhang","family":"Yijia","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,31]]},"reference":[{"key":"1858_CR1","unstructured":"Bannur, S., Bouzid, K., Castro, D.C., Schwaighofer, A., Thieme, A., Bond-Taylor, S., Ilse, M., P\u00e9rez-Garc\u00eda, F., Salvatelli, V., Sharma, H., et al.: Maira-2: Grounded radiology report generation. arXiv preprint arXiv:2406.04449 (2024)"},{"key":"1858_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.121260","volume":"237","author":"Y Xue","year":"2024","unstructured":"Xue, Y., Tan, Y., Tan, L., Qin, J., Xiang, X.: Generating radiology reports via auxiliary signal guidance and a memory-driven network. Expert Syst. Appl. 237, 121260 (2024). https:\/\/doi.org\/10.1016\/j.eswa.2023.121260","journal-title":"Expert Syst. Appl."},{"key":"1858_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2024.103377","volume":"99","author":"W Lang","year":"2025","unstructured":"Lang, W., Liu, Z., Zhang, Y.: Dacg: Dual attention and context guidance model for radiology report generation. Med. Image Anal. 99, 103377 (2025)","journal-title":"Med. Image Anal."},{"key":"1858_CR4","doi-asserted-by":"crossref","unstructured":"Sun, Y., Lee, Y.Z., Woodard, G.A., Zhu, H., Lian, C., Liu, M.: R2gen-mamba: A selective state space model for radiology report generation. arXiv preprint arXiv:2410.18135 (2024)","DOI":"10.1109\/ISBI60581.2025.10980814"},{"key":"1858_CR5","first-page":"4776","volume":"38","author":"H Shen","year":"2024","unstructured":"Shen, H., Pei, M., Liu, J., Tian, Z.: Automatic radiology reports generation via memory alignment network. Proc. AAAI Conf. Artif. Intell. 38, 4776\u20134783 (2024)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"1858_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2024.104764","volume":"161","author":"J Chen","year":"2025","unstructured":"Chen, J., Huang, G., Yuan, X., Zhong, G., Tan, Z., Pun, C.-M., Yang, Q.: Visual-linguistic diagnostic semantic enhancement for medical report generation. J. Biomed. Inform. 161, 104764 (2025)","journal-title":"J. Biomed. Inform."},{"key":"1858_CR7","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13753\u201313762 (2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"1858_CR8","first-page":"2982","volume":"36","author":"B Yan","year":"2022","unstructured":"Yan, B., Pei, M.: Clinical-bert: Vision-language pre-training for radiograph diagnosis and reports generation. Proc. AAAI Conf. Artif. Intell. 36, 2982\u20132990 (2022)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"1858_CR9","unstructured":"Chen, Q., Zhao, R., Wang, S., Phan, V.M.H., Hengel, A.v.d., Verjans, J., Liao, Z., To, M.-S., Xia, Y., Chen, J., et al.: A survey of medical vision-and-language applications and their techniques. arXiv preprint arXiv:2411.12195 (2024)"},{"key":"1858_CR10","doi-asserted-by":"publisher","unstructured":"Liu, F., Ren, X., Liu, Y., Wang, H., Sun, X.: simnet: Stepwise image-topic merging network for generating detailed and comprehensive image captions. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 137\u2013149 (2018). https:\/\/doi.org\/10.18653\/v1\/D18-1013","DOI":"10.18653\/v1\/D18-1013"},{"key":"1858_CR11","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A.C., Salakhutdinov, R., Zemel, R.S., Bengio, Y.: Show, attend and tell: Neural image caption generation with visual attention. In: Bach, F.R., Blei, D.M. (eds.) Proceedings of the 32nd International Conference on Machine Learning (ICML). JMLR Workshop and Conference Proceedings, vol. 37, pp. 2048\u20132057 (2015). JMLR.org. http:\/\/proceedings.mlr.press\/v37\/xuc15.html"},{"key":"1858_CR12","doi-asserted-by":"publisher","unstructured":"Huang, L., Wang, W., Chen, J., Wei, X.: Attention on attention for image captioning. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4633\u20134642 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00473","DOI":"10.1109\/ICCV.2019.00473"},{"key":"1858_CR13","doi-asserted-by":"crossref","unstructured":"Lu, J., Xiong, C., Parikh, D., Socher, R.: Knowing when to look: Adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 375\u2013383 (2017)","DOI":"10.1109\/CVPR.2017.345"},{"key":"1858_CR14","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7008\u20137024 (2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"1858_CR15","doi-asserted-by":"publisher","unstructured":"Liu, F., Ren, X., Liu, Y., Lei, K., Sun, X.: Exploring and distilling cross-modal information for image captioning. In: Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, IJCAI-19, pp. 5095\u20135101 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/708","DOI":"10.24963\/ijcai.2019\/708"},{"key":"1858_CR16","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6077\u20136086 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"1858_CR17","unstructured":"Huang, X., et al.: Meshed memory transformer for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10578\u201310587 (2019)"},{"key":"1858_CR18","first-page":"13041","volume":"34","author":"L Zhou","year":"2020","unstructured":"Zhou, L., Palangi, H., Zhang, L., Hu, H., Corso, J., Gao, J.: Unified vision-language pre-training for image captioning and vqa. Proc. AAAI Conf. Artif. Intell. 34, 13041\u201313049 (2020)","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"issue":"4","key":"1858_CR19","doi-asserted-by":"publisher","first-page":"2152","DOI":"10.1109\/JBHI.2024.3350077","volume":"28","author":"X Yi","year":"2024","unstructured":"Yi, X., Fu, Y., Liu, R., Zhang, H., Hua, R.: Tsget: Two-stage global enhanced transformer for automatic radiology report generation. IEEE J. Biomed. Health Inform. 28(4), 2152\u20132162 (2024)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"1858_CR20","unstructured":"Jing, B., et al.: On the automatic generation of medical imaging reports. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7985\u20137994 (2018)"},{"key":"1858_CR21","unstructured":"Chen, Z., et al.: Cross-modal memory networks for radiology report generation. arXiv preprint (2020) arXiv:2003.12052"},{"key":"1858_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2023.104496","volume":"146","author":"X Hou","year":"2023","unstructured":"Hou, X., Liu, Z., Li, X., Li, X., Sang, S., Zhang, Y.: Mkcl: Medical knowledge with contrastive learning model for radiology report generation. J. Biomed. Inform. 146, 104496 (2023). https:\/\/doi.org\/10.1016\/j.jbi.2023.104496","journal-title":"J. Biomed. Inform."},{"key":"1858_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102802","volume":"88","author":"F Shamshad","year":"2023","unstructured":"Shamshad, F., Khan, S., Zamir, S.W., Khan, M.H., Hayat, M., Khan, F.S., Fu, H.: Transformers in medical imaging: A survey. Med. Image Anal. 88, 102802 (2023)","journal-title":"Med. Image Anal."},{"key":"1858_CR24","doi-asserted-by":"crossref","unstructured":"Liu, F., et al.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 13753\u201313762 (2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"1858_CR25","doi-asserted-by":"crossref","unstructured":"Yi, X., Fu, Y., Yu, J., Liu, R., Zhang, H., Hua, R.: Lhr-rfl: Linear hybrid-reward based reinforced focal learning for automatic radiology report generation. IEEE Transactions on Medical Imaging (2024)","DOI":"10.1109\/TMI.2024.3507073"},{"key":"1858_CR26","unstructured":"Atif, J., Hudelot, C., Fouquier, G., Bloch, I., Angelini, E.D.: From generic knowledge to specific reasoning for medical image interpretation using graph based representations. In: IJCAI, pp. 224\u2013229 (2007)"},{"key":"1858_CR27","unstructured":"Wang, X., Wang, S., Ding, Y., Li, Y., Wu, W., Rong, Y., Kong, W., Huang, J., Li, S., Yang, H., et al.: State space model for new-generation network alternative to transformers: A survey. arXiv preprint arXiv:2404.09516 (2024)"},{"key":"1858_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2023.105669","volume":"88","author":"P Lu","year":"2024","unstructured":"Lu, P., Hu, L., Mitelpunkt, A., Bhatnagar, S., Lu, L., Liang, H.: A hierarchical attention-based multimodal fusion framework for predicting the progression of Alzheimer\u2019s disease. Biomed. Signal Process. Control 88, 105669 (2024)","journal-title":"Biomed. Signal Process. Control"},{"key":"1858_CR29","unstructured":"Subedi, G.: Multimodal learning: Generating precise chest x-ray report on thorax abnormality. Master\u2019s thesis, University of South Dakota, United States (2023)"},{"issue":"2","key":"1858_CR30","doi-asserted-by":"publisher","first-page":"304","DOI":"10.1093\/jamia\/ocv080","volume":"23","author":"D Demner-Fushman","year":"2016","unstructured":"Demner-Fushman, D., Antani, S., Simpson, M., Thoma, G.R., McDonald, C.J.: Preparing a collection of radiology examinations for distribution and retrieval. J. Am. Med. Inform. Assoc. 23(2), 304\u2013310 (2016)","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"1858_CR31","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","volume":"6","author":"AE Johnson","year":"2019","unstructured":"Johnson, A.E., Pollard, T.J., Berkowitz, S.J., Greenbaum, N.R., Lungren, M.P., Deng, C.-Y., Mark, R.G., Horng, S.: Mimic-cxr, a de-identified publicly available database of chest radiographs with free-text reports. Sci. Data 6, 317 (2019)","journal-title":"Sci. Data"},{"key":"1858_CR32","unstructured":"Li, Y., Liang, X., Hu, Z., Xing, E.P.: Hybrid retrieval-generation reinforced agent for medical image report generation. Advances in neural information processing systems 31 (2018)"},{"key":"1858_CR33","doi-asserted-by":"publisher","unstructured":"Chen, Z., Song, Y., Chang, T.-H., Wan, X.: Generating radiology reports via memory-driven transformer. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1439\u20131449 (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.112","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"1858_CR34","doi-asserted-by":"crossref","unstructured":"Jing, B., Wang, Z., Xing, E.: Show, describe and conclude: On exploiting the structure information of chest x-ray reports. (2020)","DOI":"10.18653\/v1\/P19-1657"},{"key":"1858_CR35","first-page":"6","volume":"5","author":"D Kinga","year":"2015","unstructured":"Kinga, D., Adam, J.B., et al.: A method for stochastic optimization. Int. Conf. Learn. Represent. (ICLR) 5, 6 (2015). (San Diego, California)","journal-title":"Int. Conf. Learn. Represent. (ICLR)"},{"key":"1858_CR36","doi-asserted-by":"crossref","unstructured":"Chen, Z., Song, Y., Chang, T.-H., Wan, X.: Generating radiology reports via memory-driven transformer. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1439\u20131449 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"1858_CR37","unstructured":"Gupta, A.: Analyzing multimodal machine learning model performance and evaluation metrics for medical report generation. PhD thesis, Carnegie Mellon University Pittsburgh, PA (2024)"},{"key":"1858_CR38","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: Bleu: A method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"1858_CR39","unstructured":"Banerjee, S., Lavie, A.: Meteor: An automatic metric for mt evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation And\/or Summarization, pp. 65\u201372 (2005)"},{"key":"1858_CR40","unstructured":"Lin, C.-Y.: Rouge: A package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"key":"1858_CR41","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: Consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"1858_CR42","doi-asserted-by":"publisher","unstructured":"Chen, Z., Shen, Y., Song, Y., Wan, X.: Cross-modal memory networks for radiology report generation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 5904\u20135914 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.459","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"1858_CR43","doi-asserted-by":"publisher","unstructured":"Jing, B., Xie, P., Xing, E.: On the automatic generation of medical imaging reports. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, pp. 2577\u20132586 (2018). https:\/\/doi.org\/10.18653\/v1\/P18-1240","DOI":"10.18653\/v1\/P18-1240"},{"key":"1858_CR44","doi-asserted-by":"crossref","unstructured":"Jing, B., Wang, Z., Xing, E.: Show, describe and conclude: On exploiting the structure information of chest x-ray reports. In: Proceedings of the 57th Conference of the Association for Computational Linguistics (ACL), pp. 6570\u20136580 (2019)","DOI":"10.18653\/v1\/P19-1657"},{"key":"1858_CR45","doi-asserted-by":"publisher","unstructured":"Liu, F., Ge, S., Wu, X.: Competence-based multimodal curriculum learning for medical report generation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, pp. 3001\u20133012 (2021). https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.234","DOI":"10.18653\/v1\/2021.acl-long.234"},{"key":"1858_CR46","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13753\u201313762 (2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"1858_CR47","doi-asserted-by":"publisher","unstructured":"Lu, J., Xiong, C., Parikh, D., Socher, R.: Knowing when to look: Adaptive attention via a visual sentinel for image captioning. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3242\u20133250 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.345","DOI":"10.1109\/CVPR.2017.345"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01858-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01858-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01858-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T09:05:33Z","timestamp":1757927133000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01858-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,31]]},"references-count":47,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1858"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01858-7","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,31]]},"assertion":[{"value":"14 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interest"}},{"value":"The data for this research project is publicly available and does not require additional consent for its use.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval and consent for data"}},{"value":"The authors confirm that no financial interest or personal interaction influenced the development or findings of the IMGEF framework presented in the paper.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Financial interest"}}],"article-number":"275"}}