{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T05:57:54Z","timestamp":1780984674569,"version":"3.54.1"},"reference-count":39,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1016\/j.eswa.2026.133027","type":"journal-article","created":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T15:25:51Z","timestamp":1780327551000},"page":"133027","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Image mask-guided cross-modal network for radiology report generation"],"prefix":"10.1016","volume":"330","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-3674-0123","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2569-8967","authenticated-orcid":false,"given":"Xiaodi","family":"Hou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xichao","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5843-4675","authenticated-orcid":false,"given":"Yijia","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.133027_bib0001","series-title":"Stud health technol inform.","doi-asserted-by":"crossref","DOI":"10.3233\/SHTI250905","article-title":"Aimr-meditell: Attention-infused mask RNN for medical image interpretation and report generation","author":"Chen","year":"2025"},{"key":"10.1016\/j.eswa.2026.133027_bib0002","series-title":"Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)","first-page":"5904","article-title":"Cross-modal memory networks for radiology report generation","volume":"1","author":"Chen","year":"2021"},{"key":"10.1016\/j.eswa.2026.133027_bib0003","series-title":"Proceedings of the 2020 Conference on empirical methods in natural language processing (EMNLP)","first-page":"1439","article-title":"Generating radiology reports via memory-driven transformer","author":"Chen","year":"2020"},{"key":"10.1016\/j.eswa.2026.133027_bib0004","doi-asserted-by":"crossref","unstructured":"Dandwate, P., Shahane, C., Jagtap, V. S., & Karande, S. C. (2023). Comparative study of transformer and LSTM network with attention mechanism on image captioning. arXiv: 2303.02648.","DOI":"10.1007\/978-981-99-3761-5_47"},{"key":"10.1016\/j.eswa.2026.133027_bib0005","series-title":"Proceedings of the sixth workshop on statistical machine translation","first-page":"85","article-title":"Meteor 1.3: Automatic metric for reliable optimization and evaluation of machine translation systems","author":"Denkowski","year":"2011"},{"issue":"3","key":"10.1016\/j.eswa.2026.133027_bib0006","doi-asserted-by":"crossref","DOI":"10.1016\/S1470-2045(25)00008-7","article-title":"Ai model using clinical images for genomic prediction and tailored treatment in patients with cancer","volume":"26","author":"Guo","year":"2025","journal-title":"The Lancet Oncology"},{"issue":"2","key":"10.1016\/j.eswa.2026.133027_bib0007","doi-asserted-by":"crossref","first-page":"2805","DOI":"10.1097\/JS9.0000000000003668","article-title":"Current concerns and future directions of large language model chatGPT in medicine: A machine-learning-driven global-scale bibliometric analysis","volume":"112","author":"Guo","year":"2025","journal-title":"International Journal of Surgery"},{"key":"10.1016\/j.eswa.2026.133027_bib0008","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.126394","article-title":"Recalibrated cross-modal alignment network for radiology report generation with weakly supervised contrastive learning","volume":"269","author":"Hou","year":"2025","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.133027_bib0009","series-title":"2023 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"19809","article-title":"KiUT: Knowledge-injected u-transformer for radiology report generation","author":"Huang","year":"2023"},{"key":"10.1016\/j.eswa.2026.133027_bib0010","series-title":"2015 IEEE Conference on computer vision and pattern recognition (CVPR)","first-page":"3128","article-title":"Deep visual-semantic alignments for generating image descriptions","author":"Karpathy","year":"2014"},{"key":"10.1016\/j.eswa.2026.133027_bib0011","series-title":"3rd International Conference on Learning Representations (ICLR)","first-page":"1","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2015"},{"key":"10.1016\/j.eswa.2026.133027_bib0012","series-title":"Proceedings of the AAAI Conference on artificial intelligence","first-page":"6666","article-title":"Knowledge-driven encode, retrieve, paraphrase for medical image report generation","volume":"vol. 33","author":"Li","year":"2019"},{"key":"10.1016\/j.eswa.2026.133027_bib0013","series-title":"2023 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"3334","article-title":"Dynamic graph enhanced contrastive learning for chest x-ray report generation","author":"Li","year":"2023"},{"key":"10.1016\/j.eswa.2026.133027_bib0014","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1007\/s11280-022-01013-6","article-title":"Auxiliary signal-guided knowledge encoder-decoder for medical report generation","volume":"26","author":"Li","year":"2020","journal-title":"World Wide Web"},{"key":"10.1016\/j.eswa.2026.133027_bib0015","series-title":"Text summarization branches out","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","author":"Lin","year":"2004"},{"key":"10.1016\/j.eswa.2026.133027_bib0016","series-title":"Annual meeting of the association for computational linguistics","article-title":"Competence-based multimodal curriculum learning for medical report generation","author":"Liu","year":"2022"},{"key":"10.1016\/j.eswa.2026.133027_bib0017","series-title":"Findings","article-title":"Contrastive attention for automatic chest x-ray report generation","author":"Liu","year":"2021"},{"key":"10.1016\/j.eswa.2026.133027_bib0018","series-title":"2019 IEEE\/CVF International conference on computer vision, ICCV 2019, Seoul, Korea (South), october 27 - November 2, 2019","first-page":"4239","article-title":"Generating diverse and descriptive image captions using visual paraphrases","author":"Liu","year":"2019"},{"key":"10.1016\/j.eswa.2026.133027_bib0019","series-title":"AAAI-25, sponsored by the association for the advancement of artificial intelligence, February 25 - March 4, 2025, Philadelphia, Pa, USA","first-page":"5595","article-title":"HC-LLM: Historical-constrained large language models for radiology report generation","author":"Liu","year":"2025"},{"key":"10.1016\/j.eswa.2026.133027_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.cmpb.2024.108482","article-title":"Label correlated contrastive learning for medical report generation","volume":"258","author":"Liu","year":"2025","journal-title":"Computer Methods and Programs in Biomedicine"},{"key":"10.1016\/j.eswa.2026.133027_bib0021","series-title":"2017 IEEE conference on computer vision and pattern recognition (CVPR)","first-page":"3242","article-title":"Knowing when to look: Adaptive attention via a visual sentinel for image captioning","author":"Lu","year":"2016"},{"key":"10.1016\/j.eswa.2026.133027_bib0022","series-title":"Proceedings of the 23rd Workshop on biomedical natural language processing","first-page":"603","article-title":"AIRI at RRG24: LLaVa with specialised encoder and decoder","author":"Munkhoeva","year":"2024"},{"key":"10.1016\/j.eswa.2026.133027_bib0023","series-title":"2020 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"10968","article-title":"X-linear attention networks for image captioning","author":"Pan","year":"2020"},{"key":"10.1016\/j.eswa.2026.133027_bib0024","series-title":"Proceedings of the 40th Annual meeting of the association for computational linguistics","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"Papineni","year":"2002"},{"key":"10.1016\/j.eswa.2026.133027_bib0025","series-title":"Findings of the association for computational linguistics: ACL 2022","first-page":"448","article-title":"Reinforced cross-modal alignment for radiology report generation","author":"Qin","year":"2022"},{"key":"10.1016\/j.eswa.2026.133027_bib0026","doi-asserted-by":"crossref","unstructured":"Seibold, C., Jaus, A., Fink, M. A., Kim, M. S., Rei\u00df, S., Herrmann, K., Kleesiek, J., & Stiefelhagen, R. (2023). Accurate fine-grained segmentation of human anatomy in radiographs via volumetric pseudo-labeling. arXiv: 2306.03934.","DOI":"10.21203\/rs.3.rs-3687730\/v1"},{"key":"10.1016\/j.eswa.2026.133027_bib0027","unstructured":"Seibold, C., Rei\u00df, S., Sarfraz, S., Fink, M. A., Mayer, V. L., Sellner, J., Kim, M. S., Maier-Hein, K. H., Kleesiek, J., & Stiefelhagen, R. (2022). Detailed annotations of chest X-rays via CT projection for report understanding. arXiv: 2210.03416."},{"key":"10.1016\/j.eswa.2026.133027_bib0028","series-title":"Conference on empirical methods in natural language processing","article-title":"CheXBERT: Combining automatic labelers and expert annotations for accurate radiology report labeling using BERT","author":"Smit","year":"2020"},{"issue":"1","key":"10.1016\/j.eswa.2026.133027_bib0029","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/0720-048X(93)90023-G","article-title":"Image segmentation: Methods and applications in diagnostic radiology and nuclear medicine","volume":"17","author":"Suetens","year":"1993","journal-title":"European Journal of Radiology"},{"key":"10.1016\/j.eswa.2026.133027_bib0030","doi-asserted-by":"crossref","first-page":"18727","DOI":"10.1109\/JSTARS.2024.3471625","article-title":"A lightweight sparse focus transformer for remote sensing image change captioning","volume":"17","author":"Sun","year":"2024","journal-title":"IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing"},{"key":"10.1016\/j.eswa.2026.133027_bib0031","series-title":"2023 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"7433","article-title":"Interactive and explainable region-guided radiology report generation","author":"Tanida","year":"2023"},{"key":"10.1016\/j.eswa.2026.133027_bib0032","doi-asserted-by":"crossref","first-page":"4145","DOI":"10.1109\/JBHI.2024.3393018","article-title":"Memory-based cross-modal semantic alignment network for radiology report generation","volume":"28","author":"Tao","year":"2024","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"10.1016\/j.eswa.2026.133027_bib0033","series-title":"Neural information processing systems","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.eswa.2026.133027_bib0034","series-title":"2015 IEEE Conference on computer vision and pattern recognition (CVPR)","first-page":"3156","article-title":"Show and tell: A neural image caption generator","author":"Vinyals","year":"2014"},{"key":"10.1016\/j.eswa.2026.133027_bib0035","series-title":"European conference on computer vision","first-page":"563","article-title":"Cross-modal prototype driven network for radiology report generation","author":"Wang","year":"2022"},{"key":"10.1016\/j.eswa.2026.133027_bib0036","series-title":"2023 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"11558","article-title":"Metransformer: Radiology report generation by transformer with multiple learnable expert tokens","author":"Wang","year":"2023"},{"key":"10.1016\/j.eswa.2026.133027_bib0037","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.121260","article-title":"Generating radiology reports via auxiliary signal guidance and a memory-driven network","volume":"237","author":"Xue","year":"2024","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.eswa.2026.133027_bib0038","series-title":"Medical image computing and computer assisted intervention \u2013 MICCAI 2025","first-page":"152","article-title":"Diff-RRG: Longitudinal disease-wise patch difference as guidance for LLM-based radiology report generation","author":"Yun","year":"2026"},{"key":"10.1016\/j.eswa.2026.133027_bib0039","doi-asserted-by":"crossref","first-page":"476","DOI":"10.1016\/j.neucom.2018.11.004","article-title":"A multimodal fusion approach for image captioning","volume":"329","author":"Zhao","year":"2019","journal-title":"Neurocomputing"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095741742601938X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095741742601938X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T05:05:37Z","timestamp":1780981537000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S095741742601938X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,12]]},"references-count":39,"alternative-id":["S095741742601938X"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.133027","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Image mask-guided cross-modal network for radiology report generation","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.133027","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"133027"}}