{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T06:55:55Z","timestamp":1781592955417,"version":"3.54.5"},"reference-count":62,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.engappai.2026.115207","type":"journal-article","created":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T09:05:41Z","timestamp":1779959141000},"page":"115207","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"P1","title":["Bridging the image\u2013text gap: Reinforced Cross-modal Abnormality Driven Transformer for automatic chest X-ray report generation"],"prefix":"10.1016","volume":"179","author":[{"given":"Xiulong","family":"Yi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"You","family":"Fu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rui","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Enxu","family":"Bi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jianguo","family":"Liang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1772-2419","authenticated-orcid":false,"given":"Rong","family":"Hua","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.115207_b1","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2023.107732","article-title":"NPoSC-A3: A novel part of speech clues-aware adaptive attention mechanism for image captioning","volume":"131","author":"Al-Qatf","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115207_b2","unstructured":"Banerjee, S., Lavie, A., 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the Acl Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/Or Summarization. pp. 65\u201372."},{"issue":"10","key":"10.1016\/j.engappai.2026.115207_b3","doi-asserted-by":"crossref","first-page":"7005","DOI":"10.1109\/TCSVT.2022.3178844","article-title":"Vision-enhanced and consensus-aware transformer for image captioning","volume":"32","author":"Cao","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.engappai.2026.115207_b4","first-page":"277","article-title":"MMTN: multi-modal memory transformer network for image-report consistent medical report generation","volume":"vol. 37, no. 1","author":"Cao","year":"2023"},{"key":"10.1016\/j.engappai.2026.115207_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.compmedimag.2024.102342","article-title":"Medical report generation based on multimodal federated learning","volume":"113","author":"Chen","year":"2024","journal-title":"Comput. Med. Imaging Graph."},{"key":"10.1016\/j.engappai.2026.115207_b6","doi-asserted-by":"crossref","unstructured":"Chen, Z., Shen, Y., Song, Y., Wan, X., 2021. Cross-modal memory networks for radiology report generation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics. pp. 5904\u20135914.","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"10.1016\/j.engappai.2026.115207_b7","doi-asserted-by":"crossref","unstructured":"Chen, Z., Song, Y., Chang, T.H., Wan, X., 2020. Generating radiology reports via memory-driven transformer. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing.","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"10.1016\/j.engappai.2026.115207_b8","doi-asserted-by":"crossref","unstructured":"Cornia, M., Stefanini, M., Baraldi, L., Cucchiara, R., 2020. Meshed-memory transformer for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 10578\u201310587.","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"10.1016\/j.engappai.2026.115207_b9","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L., 2009. Imagenet: A large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. pp. 248\u2013255.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"10.1016\/j.engappai.2026.115207_b10","doi-asserted-by":"crossref","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K., 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT. pp. 4171\u20134186.","DOI":"10.18653\/v1\/N19-1423"},{"key":"10.1016\/j.engappai.2026.115207_b11","first-page":"304","article-title":"Preparing a collection of radiology examinations for distribution and retrieval","author":"Dina","year":"2016","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"10.1016\/j.engappai.2026.115207_b12","doi-asserted-by":"crossref","DOI":"10.1109\/JBHI.2024.3371894","article-title":"Memory guided transformer with spatio-semantic visual extractor for medical report generation","author":"Divya","year":"2024","journal-title":"IEEE J. Biomed. Health Inform"},{"key":"10.1016\/j.engappai.2026.115207_b13","doi-asserted-by":"crossref","unstructured":"Fang, Z., Wang, J., Hu, X., Liang, L., Gan, Z., Wang, L., Yang, Y., Liu, Z., 2022. Injecting semantic concepts into end-to-end image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 18009\u201318019.","DOI":"10.1109\/CVPR52688.2022.01748"},{"key":"10.1016\/j.engappai.2026.115207_b14","series-title":"Findings of the Association for Computational Linguistics: ACL 2022","first-page":"448","article-title":"Reinforced cross-modal alignment for radiology report generation","author":"Han","year":"2022"},{"key":"10.1016\/j.engappai.2026.115207_b15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J., 2016. Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"10.1016\/j.engappai.2026.115207_b16","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"10.1016\/j.engappai.2026.115207_b17","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.109134","article-title":"Attribute-driven filtering: A new attributes predicting approach for fine-grained image captioning","volume":"137","author":"Hossen","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115207_b18","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., Weinberger, K.Q., 2017. Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 4700\u20134708.","DOI":"10.1109\/CVPR.2017.243"},{"key":"10.1016\/j.engappai.2026.115207_b19","doi-asserted-by":"crossref","unstructured":"Jin, H., Che, H., Lin, Y., Chen, H., 2024. PromptMRG: Diagnosis-Driven Prompts for Medical Report Generation. In: The Thirty-Eighth AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v38i3.28038"},{"key":"10.1016\/j.engappai.2026.115207_b20","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128122","article-title":"Improving radiology report generation with multi-grained abnormality prediction","volume":"600","author":"Jin","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.engappai.2026.115207_b21","doi-asserted-by":"crossref","unstructured":"Jing, B., Wang, Z., Xing, E., 2019. Show, describe and conclude: On exploiting the structure information of chest x-ray reports. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. pp. 6570\u20136580.","DOI":"10.18653\/v1\/P19-1657"},{"issue":"1","key":"10.1016\/j.engappai.2026.115207_b22","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1038\/s41597-019-0322-0","article-title":"MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports","volume":"6","author":"Johnson","year":"2019","journal-title":"Sci. Data"},{"key":"10.1016\/j.engappai.2026.115207_b23","doi-asserted-by":"crossref","unstructured":"Kang, B., Zhang, Y., Xiong, Y., Jia, X., Jiao, J., Li, J., 2023. Bridging the Gap: Cross-modal Knowledge Driven Network for Radiology Report Generation. In: 2023 IEEE International Conference on Bioinformatics and Biomedicine. BIBM, pp. 1202\u20131209.","DOI":"10.1109\/BIBM58861.2023.10385967"},{"key":"10.1016\/j.engappai.2026.115207_b24","article-title":"Hybrid retrieval-generation reinforced agent for medical image report generation","volume":"31","author":"Li","year":"2018"},{"key":"10.1016\/j.engappai.2026.115207_b25","doi-asserted-by":"crossref","unstructured":"Li, M., Lin, B., Chen, Z., Lin, H., Liang, X., Chang, X., 2023. Dynamic Graph Enhanced Contrastive Learning for Chest X-ray Report Generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 3334\u20133343.","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"10.1016\/j.engappai.2026.115207_b26","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110358","article-title":"Dynamic window sampling strategy for image captioning","volume":"148","author":"Li","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115207_b27","doi-asserted-by":"crossref","unstructured":"Li, Y., Yang, B., Cheng, X., Zhu, Z., Li, H., Zou, Y., 2023. Unify, align and refine: Multi-level semantic alignment for radiology report generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 2863\u20132874.","DOI":"10.1109\/ICCV51070.2023.00268"},{"key":"10.1016\/j.engappai.2026.115207_b28","article-title":"Entangled transformer for image captioning","volume":"vol. 36, no. 3","author":"Li","year":"2019"},{"key":"10.1016\/j.engappai.2026.115207_b29","series-title":"Text Summarization Branches Out","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004"},{"key":"10.1016\/j.engappai.2026.115207_b30","doi-asserted-by":"crossref","unstructured":"Liu, C., Tian, Y., Chen, W., Song, Y., Zhang, Y., 2024. Bootstrapping Large Language Models for Radiology Report Generation. In: The Thirty-Eighth AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v38i17.29826"},{"key":"10.1016\/j.engappai.2026.115207_b31","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y., 2021a. Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 13753\u201313762.","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"10.1016\/j.engappai.2026.115207_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.compmedimag.2024.102486","article-title":"Automatic medical report generation based on deep learning: A state of the art survey","volume":"120","author":"Liu","year":"2025","journal-title":"Comput. Med. Imaging Graph."},{"key":"10.1016\/j.engappai.2026.115207_b33","series-title":"Findings of the Association for Computational Linguistics: ACL-IJCNLP","article-title":"Contrastive attention for automatic chest x-ray report generation","author":"Liu","year":"2021"},{"key":"10.1016\/j.engappai.2026.115207_b34","article-title":"From observation to concept: A flexible multi-view paradigm for medical report generation","author":"Liu","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.115207_b35","first-page":"2286","article-title":"Dual-level collaborative transformer for image captioning","volume":"vol. 35, no. 3","author":"Luo","year":"2021"},{"issue":"1","key":"10.1016\/j.engappai.2026.115207_b36","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1007\/s00607-022-01098-x","article-title":"AMAE: Adversarial multimodal auto-encoder for crisis-related tweet analysis","volume":"105","author":"Lv","year":"2023","journal-title":"Computing"},{"issue":"5","key":"10.1016\/j.engappai.2026.115207_b37","doi-asserted-by":"crossref","first-page":"2979","DOI":"10.1007\/s00530-022-00916-8","article-title":"TMIF: transformer-based multi-modal interactive fusion for automatic rumor detection","volume":"29","author":"Lv","year":"2023","journal-title":"Multimedia Syst."},{"key":"10.1016\/j.engappai.2026.115207_b38","doi-asserted-by":"crossref","first-page":"3723","DOI":"10.1109\/TMM.2022.3164787","article-title":"Knowing what it is: semantic-enhanced dual attention transformer","volume":"25","author":"Ma","year":"2022","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.115207_b39","first-page":"449","article-title":"Surface defects of rolled metal products recognised by a deep neural network under different illuminance levels and low-amplitude vibration","author":"Maruschak","year":"2025"},{"key":"10.1016\/j.engappai.2026.115207_b40","doi-asserted-by":"crossref","DOI":"10.1109\/TGRS.2023.3328181","article-title":"Prior knowledge-guided transformer for remote sensing image captioning","author":"Meng","year":"2023","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.engappai.2026.115207_b41","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102603","article-title":"Uncertainty-aware report generation for chest X-rays by variational topic inference","volume":"82","author":"Najdenkoska","year":"2022","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.engappai.2026.115207_b42","doi-asserted-by":"crossref","DOI":"10.1016\/j.artmed.2023.102633","article-title":"Improving chest X-ray report generation by leveraging warm starting","author":"Nicolson","year":"2023"},{"key":"10.1016\/j.engappai.2026.115207_b43","doi-asserted-by":"crossref","DOI":"10.1016\/j.compmedimag.2023.102320","article-title":"Deep learning for report generation on chest X-ray images","volume":"111","author":"Ouis","year":"2024","journal-title":"Comput. Med. Imaging Graph."},{"key":"10.1016\/j.engappai.2026.115207_b44","doi-asserted-by":"crossref","unstructured":"Pan, Y., Yao, T., Li, Y., Mei, T., 2020. X-linear attention networks for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 10971\u201310980.","DOI":"10.1109\/CVPR42600.2020.01098"},{"key":"10.1016\/j.engappai.2026.115207_b45","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, e.J., 2002. BLEU: A method for automatic evaluation of machine translation. In: Proc. 40th Annu. Meeting Assoc. Comput. Linguistics. pp. 311\u2013318.","DOI":"10.3115\/1073083.1073135"},{"key":"10.1016\/j.engappai.2026.115207_b46","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V., 2017. Self-critical sequence training for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 7008\u20137024.","DOI":"10.1109\/CVPR.2017.131"},{"key":"10.1016\/j.engappai.2026.115207_b47","first-page":"4776","article-title":"Automatic radiology reports generation via memory alignment network","volume":"vol. 38, no. 5","author":"Shen","year":"2024"},{"key":"10.1016\/j.engappai.2026.115207_b48","doi-asserted-by":"crossref","unstructured":"Shi, J., Wang, S., Wang, R., Ma, S., 2022. Aimnet: Adaptive image-tag merging network for automatic medical report generation. In: ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing. ICASSP, pp. 7737\u20137741.","DOI":"10.1109\/ICASSP43922.2022.9747702"},{"key":"10.1016\/j.engappai.2026.115207_b49","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., N. Gomez, A., Kaiser, L., 2017. Attention is all you need."},{"key":"10.1016\/j.engappai.2026.115207_b50","doi-asserted-by":"crossref","unstructured":"Wang, J., Abhir, B., He, Y., 2022. Cross-modal prototype driven network for radiology report generation. In: European Conference on Computer Vision. pp. 563\u2013579.","DOI":"10.1007\/978-3-031-19833-5_33"},{"key":"10.1016\/j.engappai.2026.115207_b51","article-title":"CAMANet: class activation map guided attention network for radiology report generation","author":"Wang","year":"2024","journal-title":"IEEE J. Biomed. Health Inform"},{"issue":"10","key":"10.1016\/j.engappai.2026.115207_b52","doi-asserted-by":"crossref","first-page":"2803","DOI":"10.1109\/TMI.2022.3171661","article-title":"Automated radiographic report generation purely on transformer: A multicriteria supervised approach","volume":"41","author":"Wang","year":"2022","journal-title":"IEEE Trans. Med. Imaging"},{"issue":"11","key":"10.1016\/j.engappai.2026.115207_b53","doi-asserted-by":"crossref","first-page":"5631","DOI":"10.1109\/JBHI.2022.3197162","article-title":"Prior guided transformer for accurate radiology reports generation","volume":"26","author":"Yan","year":"2022","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"10.1016\/j.engappai.2026.115207_b54","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2023.102798","article-title":"Radiology report generation with a learned knowledge base and multi-modal alignment","volume":"86","author":"Yang","year":"2023","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.engappai.2026.115207_b55","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102510","article-title":"Knowledge matters: Chest radiology report generation with general and specific knowledge","volume":"80","author":"Yang","year":"2022","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.engappai.2026.115207_b56","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2025.108102","article-title":"Radiology report generation via visual-semantic ambivalence-aware network and focal self-critical sequence training","volume":"194","author":"Yi","year":"2026","journal-title":"Neural Netw."},{"key":"10.1016\/j.engappai.2026.115207_b57","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2023.105742","article-title":"Unsupervised disease tags for automatic radiology report generation","volume":"89","author":"Yi","year":"2024","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.engappai.2026.115207_b58","doi-asserted-by":"crossref","DOI":"10.1109\/JBHI.2024.3350077","article-title":"TSGET: Two-stage global enhanced transformer for automatic radiology report generation","author":"Yi","year":"2024","journal-title":"IEEE J. Biomed. Health Inform"},{"key":"10.1016\/j.engappai.2026.115207_b59","doi-asserted-by":"crossref","first-page":"1494","DOI":"10.1109\/TMI.2024.3507073","article-title":"Lhr-rfl: Linear hybrid-reward-based reinforced focal learning for automatic radiology report generation","volume":"44","author":"Yi","year":"2025","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.engappai.2026.115207_b60","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2021: 24th International Conference, Strasbourg, France, September 27\u2013October 1, 2021, Proceedings, Part III 24","first-page":"72","article-title":"Aligntransformer: Hierarchical alignment of visual regions and disease tags for medical report generation","author":"You","year":"2021"},{"key":"10.1016\/j.engappai.2026.115207_b61","doi-asserted-by":"crossref","DOI":"10.1016\/j.compbiomed.2023.107522","article-title":"Visual prior-based cross-modal alignment network for radiology report generation","author":"Zhang","year":"2023","journal-title":"Comput. Biol. Med."},{"key":"10.1016\/j.engappai.2026.115207_b62","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Wang, M., Liu, D., Hu, Z., Zhang, H., 2020. More grounded image captioning by distilling image-text matching model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 4777\u20134786.","DOI":"10.1109\/CVPR42600.2020.00483"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626014910?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626014910?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T06:43:35Z","timestamp":1781592215000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626014910"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":62,"alternative-id":["S0952197626014910"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.115207","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Bridging the image\u2013text gap: Reinforced Cross-modal Abnormality Driven Transformer for automatic chest X-ray report generation","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.115207","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115207"}}