{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T15:12:48Z","timestamp":1769181168785,"version":"3.49.0"},"reference-count":50,"publisher":"Elsevier BV","issue":"2","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61966026"],"award-info":[{"award-number":["61966026"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004763","name":"Natural Science Foundation of Inner Mongolia Autonomous Region","doi-asserted-by":"publisher","award":["2020MS06015"],"award-info":[{"award-number":["2020MS06015"]}],"id":[{"id":"10.13039\/501100004763","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of the Franklin Institute"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1016\/j.jfranklin.2025.108328","type":"journal-article","created":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T16:06:48Z","timestamp":1766074008000},"page":"108328","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"title":["Bridging Visual Analysis and Text Generation: A Hierarchical Multi-scale Visual Feature Flow Model for Accessible Radiographic Report Automation"],"prefix":"10.1016","volume":"363","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-4006-2873","authenticated-orcid":false,"given":"Hailong","family":"Zuo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8102-6875","authenticated-orcid":false,"given":"Zhi","family":"Weng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunlu","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zijing","family":"Lv","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feifan","family":"Bi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiqiang","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.jfranklin.2025.108328_bib0001","doi-asserted-by":"crossref","DOI":"10.1016\/j.jfranklin.2025.107816","article-title":"AnDR-BLIP2: enhanced semantic understanding framework for industrial image anomaly detection and report generation","volume":"362","author":"Gao","year":"2025","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0002","doi-asserted-by":"crossref","DOI":"10.1016\/j.jfranklin.2024.107285","article-title":"Class incremental learning with analytic learning for hyperspectral image classification","volume":"361","author":"Zhuang","year":"2024","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0003","doi-asserted-by":"crossref","DOI":"10.1016\/j.jfranklin.2025.107532","article-title":"DCA-Unet: Enhancing small object segmentation in hyperspectral images with Dual Channel Attention Unet","volume":"362","author":"Han","year":"2025","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0004","article-title":"Deep convolutional sparse dictionary learning for bearing fault diagnosis under variable speed condition","volume":"362","author":"Wang","year":"2025","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0005","article-title":"Multi-level semantic-aware communication for multi-task image transmission","volume":"362","author":"Zhang","year":"2025","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0006","doi-asserted-by":"crossref","DOI":"10.1016\/j.jfranklin.2025.107713","article-title":"Multi-scale hierarchical cross fusion network for hyperspectral image and LiDAR classification","volume":"362","author":"Pan","year":"2025","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0007","doi-asserted-by":"crossref","DOI":"10.1016\/j.jfranklin.2025.107591","article-title":"Selective segmentation of inhomogeneous images based on local clustering and global smoothness","volume":"362","author":"Min","year":"2025","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0008","doi-asserted-by":"crossref","DOI":"10.1016\/j.jfranklin.2024.106973","article-title":"Transformer-enhanced two-stream complementary convolutional neural network for hyperspectral image classification","volume":"361","author":"Pan","year":"2024","journal-title":"Journal of the Franklin Institute"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0009","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2023.102982","article-title":"Simulating doctors\u2019 thinking logic for chest X-ray report generation via Transformer-based Semantic Query learning","volume":"91","author":"Gao","year":"2024","journal-title":"Medical Image Analysis"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0010","series-title":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"7433","article-title":"Interactive and Explainable Region-guided Radiology Report Generation","author":"Tanida","year":"2023"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0011","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2024.103413","article-title":"Dual-modality visual feature flow for medical report generation","volume":"101","author":"Tang","year":"2025","journal-title":"Medical Image Analysis"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0012","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102510","article-title":"Knowledge matters: Chest radiology report generation with general and specific knowledge","volume":"80","author":"Yang","year":"2022","journal-title":"Medical Image Analysis"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0013","doi-asserted-by":"crossref","first-page":"2657","DOI":"10.1109\/TMI.2024.3372638","article-title":"Multi-Grained Radiology Report Generation With Sentence-Level Image-Language Contrastive Learning","volume":"43","author":"Liu","year":"2024","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0014","series-title":"Applied Machine Learning and Data Analytics","first-page":"52","article-title":"Semantic Segmentation of the Lung to Examine the Effect of COVID-19 Using UNET Model","author":"Akinlade","year":"2023"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0015","doi-asserted-by":"crossref","DOI":"10.1016\/j.atech.2025.100774","article-title":"Vision transformers for automated detection of pig interactions in groups","volume":"10","author":"Taiwo","year":"2025","journal-title":"Smart Agricultural Technology"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0016","doi-asserted-by":"crossref","first-page":"4017","DOI":"10.1109\/TMI.2024.3412402","article-title":"Token-Mixer: Bind Image and Text in One Embedding Space for Medical Image Reporting","volume":"43","author":"Yang","year":"2024","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0017","doi-asserted-by":"crossref","first-page":"2803","DOI":"10.1109\/TMI.2022.3171661","article-title":"Automated Radiographic Report Generation Purely on Transformer: A Multicriteria Supervised Approach","volume":"41","author":"Wang","year":"2022","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0018","doi-asserted-by":"crossref","first-page":"4211","DOI":"10.1109\/TMI.2024.3416190","article-title":"PhraseAug: An Augmented Medical Report Generation Model With Phrasebook","volume":"43","author":"Mei","year":"2024","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0019","series-title":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"14194","article-title":"Instance-level Expert Knowledge and Aggregate Discriminative Attention for Radiology Report Generation","author":"Bu","year":"2024"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2023.102798","article-title":"Radiology report generation with a learned knowledge base and multi-modal alignment","volume":"86","author":"Yang","year":"2023","journal-title":"Medical Image Analysis"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0021","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102603","article-title":"Uncertainty-aware report generation for chest X-rays by variational topic inference","volume":"82","author":"Najdenkoska","year":"2022","journal-title":"Medical Image Analysis"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0022","doi-asserted-by":"crossref","first-page":"2211","DOI":"10.1109\/TMI.2023.3245608","article-title":"Attributed Abnormality Graph Embedding for Clinically Accurate X-Ray Report Generation","volume":"42","author":"Yan","year":"2023","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0023","series-title":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"19809","article-title":"KiUT: Knowledge-injected U-Transformer for Radiology Report Generation","author":"Huang","year":"2023"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0024","series-title":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"3334","article-title":"Dynamic Graph Enhanced Contrastive Learning for Chest X-Ray Report Generation","author":"Li","year":"2023"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0025","series-title":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"13748","article-title":"Exploring and Distilling Posterior and Prior Knowledge for Radiology Report Generation","author":"Liu","year":"2021"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0026","doi-asserted-by":"crossref","first-page":"304","DOI":"10.1093\/jamia\/ocv080","article-title":"Preparing a collection of radiology examinations for distribution and retrieval","volume":"23","author":"Demner-Fushman","year":"2016","journal-title":"Journal of the American Medical Informatics Association"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0027","doi-asserted-by":"crossref","unstructured":"A.E.W. Johnson, T.J. Pollard, N.R. Greenbaum, M.P. Lungren, C. Deng, Y. Peng, Z. Lu, R.G. Mark, S.J. Berkowitz, S. Horng, MIMIC-CXR-JPG, a large publicly available database of labeled chest radiographs, arXiv E-Prints (2019) arXiv:1901.07042. 10.48550\/arXiv.1901.07042.","DOI":"10.1038\/s41597-019-0322-0"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0028","unstructured":"W. Indeewara, M. Hennayake, K. Rathnayake, T. Ambegoda, D. Meedeniya, Chest X-ray Dataset with Lung Segmentation, (n.d.). 10.13026\/9CY4-F535."},{"key":"10.1016\/j.jfranklin.2025.108328_bib0029","series-title":"Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing","first-page":"5904","article-title":"Cross-modal Memory Networks for Radiology Report Generation","volume":"1","author":"Chen","year":"2021"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0030","series-title":"Proceedings of the 40th Annual Meeting on Association for Computational Linguistics - ACL \u201902","first-page":"311","article-title":"BLEU: a method for automatic evaluation of machine translation","author":"Papineni","year":"2001"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0031","series-title":"IEEvaluation@ACL","article-title":"METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments","author":"Banerjee","year":"2005"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0032","series-title":"Annual Meeting of the Association for Computational Linguistics","article-title":"ROUGE: A Package for Automatic Evaluation of Summaries","author":"Lin","year":"2004"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0033","doi-asserted-by":"crossref","unstructured":"S.J. Rennie, E. Marcheret, Y. Mroueh, J. Ross, V. Goel, Self-critical Sequence Training for Image Captioning, arXiv E-Prints (2016) arXiv:1612.00563. 10.48550\/arXiv.1612.00563.","DOI":"10.1109\/CVPR.2017.131"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0034","doi-asserted-by":"crossref","unstructured":"J. Lu, C. Xiong, D. Parikh, R. Socher, Knowing When to Look: Adaptive Attention via A Visual Sentinel for Image Captioning, arXiv E-Prints (2016) arXiv:1612.01887. 10.48550\/arXiv.1612.01887.","DOI":"10.1109\/CVPR.2017.345"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0035","article-title":"Hybrid retrieval-generation reinforced agent for medical image report generation","volume":"31","author":"Li","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0036","series-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"1439","article-title":"Generating Radiology Reports via Memory-driven Transformer","author":"Chen","year":"2020"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0037","series-title":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"3156","article-title":"Show and tell: A neural image caption generator","author":"Vinyals","year":"2015"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0038","series-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","first-page":"6570","article-title":"Show, Describe and Conclude: On Exploiting the Structure Information of Chest X-ray Reports","author":"Jing","year":"2019"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0039","series-title":"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics","first-page":"2577","article-title":"On the Automatic Generation of Medical Imaging Reports","volume":"1","author":"Jing","year":"2018"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0040","unstructured":"Y. Wang, Z. Lin, Z. Xu, H. Dong, J. Tian, J. Luo, Z. Shi, Y. Zhang, J. Fan, Z. He, Trust It or Not: Confidence-Guided Automatic Radiology Report Generation, arXiv E-Prints (2021) arXiv:2106.10887. 10.48550\/arXiv.2106.10887."},{"key":"10.1016\/j.jfranklin.2025.108328_bib0041","unstructured":"J. Wang, A. Bhalerao, T. Yin, S. See, Y. He, CAMANet: Class Activation Map Guided Attention Network for Radiology Report Generation, arXiv E-Prints (2022) arXiv:2211.01412. 10.48550\/arXiv.2211.01412."},{"key":"10.1016\/j.jfranklin.2025.108328_bib0042","doi-asserted-by":"crossref","first-page":"3079","DOI":"10.1109\/JBHI.2024.3371894","article-title":"Memory Guided Transformer With Spatio-Semantic Visual Extractor for Medical Report Generation","volume":"28","author":"Divya","year":"2024","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0043","doi-asserted-by":"crossref","unstructured":"J. Lu, C. Xiong, D. Parikh, R. Socher, Knowing when to look: Adaptive attention via a visual sentinel for image captioning, in: 2017: pp. 375\u2013383.","DOI":"10.1109\/CVPR.2017.345"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0044","unstructured":"C.Y. Li, X. Liang, Z. Hu, E.P. Xing, Hybrid Retrieval-Generation Reinforced Agent for Medical Image Report Generation, arXiv E-Prints (2018) arXiv:1805.08298. 10.48550\/arXiv.1805.08298."},{"key":"10.1016\/j.jfranklin.2025.108328_bib0045","doi-asserted-by":"crossref","unstructured":"F. Liu, X. Wu, S. Ge, W. Fan, Y. Zou, Exploring and Distilling Posterior and Prior Knowledge for Radiology Report Generation, arXiv E-Prints (2021) arXiv:2106.06963. 10.48550\/arXiv.2106.06963.","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0046","series-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2018","first-page":"457","article-title":"Multimodal Recurrent Model with Attention for Automated Radiology Report Generation","author":"Xue","year":"2018"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0047","series-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"1439","article-title":"Generating Radiology Reports via Memory-driven Transformer","author":"Chen","year":"2020"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0048","series-title":"Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing","first-page":"5904","article-title":"Cross-modal Memory Networks for Radiology Report Generation","volume":"1","author":"Chen","year":"2021"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0049","series-title":"Findings of the Association for Computational Linguistics: ACL 2022","first-page":"448","article-title":"Reinforced Cross-modal Alignment for Radiology Report Generation","author":"Qin","year":"2022"},{"key":"10.1016\/j.jfranklin.2025.108328_bib0050","unstructured":"S. Yang, X. Wu, S. Ge, S.K. Zhou, L. Xiao, Radiology Report Generation with a Learned Knowledge Base and Multi-modal Alignment, (2021). 10.48550\/ARXIV.2112.15011."}],"container-title":["Journal of the Franklin Institute"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0016003225008208?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0016003225008208?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T14:02:17Z","timestamp":1769176937000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0016003225008208"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":50,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["S0016003225008208"],"URL":"https:\/\/doi.org\/10.1016\/j.jfranklin.2025.108328","relation":{},"ISSN":["0016-0032"],"issn-type":[{"value":"0016-0032","type":"print"}],"subject":[],"published":{"date-parts":[[2026,1]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Bridging Visual Analysis and Text Generation: A Hierarchical Multi-scale Visual Feature Flow Model for Accessible Radiographic Report Automation","name":"articletitle","label":"Article Title"},{"value":"Journal of the Franklin Institute","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jfranklin.2025.108328","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Published by Elsevier Inc. on behalf of The Franklin Institute.","name":"copyright","label":"Copyright"}],"article-number":"108328"}}