{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T02:45:37Z","timestamp":1778294737989,"version":"3.51.4"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"7","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62406093"],"award-info":[{"award-number":["62406093"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62125201"],"award-info":[{"award-number":["62125201"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U24B20174"],"award-info":[{"award-number":["U24B20174"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62422204"],"award-info":[{"award-number":["62422204"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62286082"],"award-info":[{"award-number":["62286082"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Zhejiang Provincial Natural Science Foundation of China","doi-asserted-by":"publisher","award":["LQ24F020032"],"award-info":[{"award-number":["LQ24F020032"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Zhejiang Provincial Natural Science Foundation of China","doi-asserted-by":"publisher","award":["LDT23F02025F02"],"award-info":[{"award-number":["LDT23F02025F02"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Central Government Guiding Fund for Local Science and Technology Development","award":["2024Y01018"],"award-info":[{"award-number":["2024Y01018"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Med. Imaging"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/tmi.2025.3554498","type":"journal-article","created":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T07:23:26Z","timestamp":1742973806000},"page":"2892-2905","source":"Crossref","is-referenced-by-count":8,"title":["Spatio-Temporal and Retrieval-Augmented Modeling for Chest X-Ray Report Generation"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5598-1692","authenticated-orcid":false,"given":"Yan","family":"Yang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"}]},{"given":"Xiaoxing","family":"You","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9855-003X","authenticated-orcid":false,"given":"Ke","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2950-7190","authenticated-orcid":false,"given":"Zhenqi","family":"Fu","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1230-5772","authenticated-orcid":false,"given":"Xianyun","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Intelligence Science and Engineering, Harbin Institute of Technology (Shenzhen), Shenzhen, China"}]},{"given":"Jiajun","family":"Ding","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"}]},{"given":"Jiamei","family":"Sun","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8407-1137","authenticated-orcid":false,"given":"Zhou","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7542-296X","authenticated-orcid":false,"given":"Qingming","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Computer and Control Engineering, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7227-3671","authenticated-orcid":false,"given":"Weidong","family":"Han","sequence":"additional","affiliation":[{"name":"Department of Colorectal Medical Oncology, Zhejiang Cancer Hospital, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1922-7283","authenticated-orcid":false,"given":"Jun","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Intelligence Science and Engineering, Harbin Institute of Technology (Shenzhen), Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00718"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20204"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01112"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0322-0"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43904-9_19"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.140"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3342691"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.38"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28279"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3273390"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.451"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3412402"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112630"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.28038"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102510"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3424505"},{"key":"ref20","article-title":"Multi-modal pre-training for medical vision-language understanding and generation: An empirical study with a new benchmark","author":"Xu","year":"2023","journal-title":"arXiv:2306.06494"},{"key":"ref21","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv:2302.13971"},{"key":"ref22","article-title":"CheXagent: Towards a foundation model for chest X-ray interpretation","author":"Chen","year":"2024","journal-title":"arXiv:2401.12208"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.metrad.2023.100033"},{"key":"ref24","article-title":"MAIRA-1: A specialised large multimodal model for radiology report generation","volume-title":"arXiv:2311.13668","author":"Hyland","year":"2023"},{"key":"ref25","first-page":"1","article-title":"LLM-CXR: Instruction-finetuned LLM for CXR image understanding and generation","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Lee"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-demo.49"},{"key":"ref27","first-page":"19730","article-title":"BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"202","author":"Li"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3363014"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2023.3305384"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2024.103130"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01442"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73001-6_11"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681377"},{"key":"ref34","article-title":"Retrieval-augmented generation for AI-generated content: A survey","author":"Zhao","year":"2024","journal-title":"arXiv:2402.19473"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.546"},{"key":"ref36","article-title":"Extracting training data from large language models","author":"Carlini","year":"2020","journal-title":"arXiv:2012.07805"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.83"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3549555.3549585"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00278"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16452-1_58"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref43","first-page":"1","article-title":"Chest ImaGenome dataset for clinical reasoning","volume-title":"Proc. Neural Inf. Process. Syst. Track Datasets Benchmarks","author":"Wu"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref46","article-title":"The faiss library","author":"Douze","year":"2024","journal-title":"arXiv:2401.08281"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocv080"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref50","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","volume-title":"Proc. Workshop Text Summarization ACL","author":"Lin"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3115\/1626355.1626389"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.836"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.117"},{"key":"ref54","first-page":"1","article-title":"RadGraph: Extracting clinical entities and relations from radiology reports","volume-title":"Proc. Neural Inf. Process. Syst. Track Datasets Benchmarks","author":"Jain"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2023.100802"},{"key":"ref56","article-title":"TinyLlama: An open-source small language model","author":"Zhang","year":"2024","journal-title":"arXiv:2401.02385"},{"key":"ref57","first-page":"1","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Loshchilov"},{"key":"ref58","first-page":"1","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Hu"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref61","first-page":"1","article-title":"An image is worth 16 \u00d7 16 words: Transformers for image recognition at scale","volume-title":"Proc. 9th Int. Conf. Learn. Represent.","author":"Dosovitskiy"},{"key":"ref62","article-title":"Flamingo: A visual language model for few-shot learning","author":"Alayrac","year":"2022","journal-title":"arXiv:2204.14198"}],"container-title":["IEEE Transactions on Medical Imaging"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/42\/11068959\/10938723.pdf?arnumber=10938723","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T17:51:09Z","timestamp":1751651469000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10938723\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":62,"journal-issue":{"issue":"7"},"URL":"https:\/\/doi.org\/10.1109\/tmi.2025.3554498","relation":{},"ISSN":["0278-0062","1558-254X"],"issn-type":[{"value":"0278-0062","type":"print"},{"value":"1558-254X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}