{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T01:21:40Z","timestamp":1772932900936,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key Research and Development Project","award":["2022YFB3104005"],"award-info":[{"award-number":["2022YFB3104005"]}]},{"name":"Key Research and Development Program of Zhejiang Province","award":["2024C01025"],"award-info":[{"award-number":["2024C01025"]}]},{"name":"Innovation Foundation for Doctor Dissertation of Northwestern Polytechnical University","award":["CX2023061"],"award-info":[{"award-number":["CX2023061"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2024JC-YBMS-513"],"award-info":[{"award-number":["2024JC-YBMS-513"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372380, U20B2065 and U22B2036"],"award-info":[{"award-number":["62372380, U20B2065 and U22B2036"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681377","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"4699-4708","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Medical Report Generation via Multimodal Spatio-Temporal Fusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2768-5252","authenticated-orcid":false,"given":"Xin","family":"Mei","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1082-8755","authenticated-orcid":false,"given":"Rui","family":"Mao","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1406-107X","authenticated-orcid":false,"given":"Xiaoyan","family":"Cai","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5316-7689","authenticated-orcid":false,"given":"Libin","family":"Yang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3030-1280","authenticated-orcid":false,"given":"Erik","family":"Cambria","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imu.2021.100557"},{"key":"e_1_3_2_1_2_1","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In Proceedings of the Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization@ACL Jade Goldstein Alon Lavie Chin-Yew Lin and Clare R. Voss (Eds.). Association for Computational Linguistics 65--72. https:\/\/aclanthology.org\/W05-0909\/"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01442"},{"key":"e_1_3_2_1_4_1","first-page":"3","article-title":"Discrepancy and error in radiology: concepts, causes and consequences","volume":"81","author":"Brady Adrian","year":"2012","unstructured":"Adrian Brady, Riste\u00e1rd \u00d3 Laoide, Peter McCarthy, and Ronan McDermott. 2012. Discrepancy and error in radiology: concepts, causes and consequences. The Ulster Medical Journal, Vol. 81, 1 (2012), 3. https:\/\/www.ncbi.nlm.nih.gov\/pmc\/articles\/PMC3609674\/","journal-title":"The Ulster Medical Journal"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2023.3329745"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of International Conference on Human-Computer Interaction (HCII). Washington DC, USA.","author":"Cambria Erik","year":"2024","unstructured":"Erik Cambria, Xulang Zhang, Rui Mao, Melvin Chen, and Kenneth Kwok. 2024. SenticNet 8: Fusing emotion AI and commonsense AI for interpretable, trustworthy, and explainable affective computing. In Proceedings of International Conference on Human-Computer Interaction (HCII). Washington DC, USA."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3--540--79942--9_2"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2024.102306"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics (COLING). International Committee on Computational Linguistics, Gyeongju, Republic of Korea, 94--104","author":"Han Sooji","year":"2022","unstructured":"Sooji Han, Rui Mao, and Erik Cambria. 2022. Hierarchical Attention Network for Explainable Depression Detection on Twitter Aided by Metaphor Concept Mappings. In Proceedings of the 29th International Conference on Computational Linguistics (COLING). International Committee on Computational Linguistics, Gyeongju, Republic of Korea, 94--104. https:\/\/aclanthology.org\/2022.coling-1.9"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.140"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.451"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01897"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021","author":"Jain Saahil","year":"2021","unstructured":"Saahil Jain, Ashwin Agrawal, Adriel Saporta, Steven Q. H. Truong, Du Nguyen Duong, Tan Bui, Pierre J. Chambon, Yuhao Zhang, Matthew P. Lungren, Andrew Y. Ng, Curtis P. Langlotz, and Pranav Rajpurkar. 2021. RadGraph: Extracting Clinical Entities and Relations from Radiology Reports. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021, December 2021, virtual, Joaquin Vanschoren and Sai-Kit Yeung (Eds.). https:\/\/datasets-benchmarks-proceedings.neurips.cc\/paper\/2021\/hash\/c8ffe9a587b126f152ed3d89a146b445-Abstract-round1.html"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1657"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_20_1","volume-title":"MIMIC-CXR: A large publicly available database of labeled chest radiographs. CoRR","author":"Johnson Alistair E. W.","year":"2019","unstructured":"Alistair E. W. Johnson, Tom J. Pollard, Seth J. Berkowitz, Nathaniel R. Greenbaum, Matthew P. Lungren, Chih-ying Deng, Roger G. Mark, and Steven Horng. 2019. MIMIC-CXR: A large publicly available database of labeled chest radiographs. CoRR, Vol. abs\/1901.07042 (2019). http:\/\/arxiv.org\/abs\/1901.07042"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1097\/01.cpm.0000156704.33941.e2"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016666"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/S11280-022-01013--6"},{"key":"e_1_3_2_1_25_1","volume-title":"Xing","author":"Li Yuan","year":"2018","unstructured":"Yuan Li, Xiaodan Liang, Zhiting Hu, and Eric P. Xing. 2018. Hybrid Retrieval-Generation Reinforced Agent for Medical Image Report Generation. In Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018. 1537--1547. https:\/\/proceedings.neurips.cc\/paper\/2018\/hash\/e07413354875be01a996dc560274708e-Abstract.html"},{"key":"e_1_3_2_1_26_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Association for Computational Linguistics, Barcelona, Spain, 74--81. https:\/\/aclanthology.org\/W04--1013"},{"key":"e_1_3_2_1_27_1","volume-title":"Has Multimodal Learning Delivered Universal Intelligence in Healthcare? A Comprehensive Survey. arXiv preprint arXiv:2408.12880","author":"Lin Qika","year":"2024","unstructured":"Qika Lin, Yifan Zhu, Xin Mei, Ling Huang, Jingying Ma, Kai He, Zhen Peng, Erik Cambria, and Mengling Feng. 2024. Has Multimodal Learning Delivered Universal Intelligence in Healthcare? A Comprehensive Survey. arXiv preprint arXiv:2408.12880 (2024)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12960-017-0187--2"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING). Torino, Italia. https:\/\/aclanthology.org\/2024","author":"Mao Rui","year":"2024","unstructured":"Rui Mao, Guanyi Chen, Xulang Zhang, Frank Guerin, and Erik Cambria. 2024. GPTEval: A Survey on Assessments of ChatGPT and GPT-4. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING). Torino, Italia. https:\/\/aclanthology.org\/2024.lrec-main.693"},{"key":"e_1_3_2_1_32_1","volume-title":"Qian Liu, and Erik Cambria.","author":"Mao Rui","year":"2024","unstructured":"Rui Mao, Kai He, Claudia Beth Ong, Qian Liu, and Erik Cambria. 2024. MetaPro 2.0: Computational Metaphor Processing on the Effectiveness of Anomalous Language Modeling. In Findings of the Association for Computational Linguistics: ACL. Association for Computational Linguistics, Bangkok, Thailand."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101988"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-demo.12"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.acra.2015.05.007"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"key":"e_1_3_2_1_38_1","volume-title":"Improving Chest X-ray Report Generation by Leveraging Warm-Starting. CoRR","author":"Nicolson Aaron","year":"2022","unstructured":"Aaron Nicolson, Jason Dowling, and Bevan Koopman. 2022. Improving Chest X-ray Report Generation by Leveraging Warm-Starting. CoRR, Vol. abs\/2201.09405 (2022). https:\/\/arxiv.org\/abs\/2201.09405"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2307.09758"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.38"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.7861\/clinmedicine.13--4--349"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.274"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2306.07971"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","unstructured":"Tao Tu Shekoofeh Azizi Danny Driess Mike Schaekermann Mohamed Amin Pi-Chuan Chang Andrew Carroll Chuck Lau Ryutaro Tanno Ira Ktena Basil Mustafa Aakanksha Chowdhery Yun Liu Simon Kornblith David J. Fleet Philip Andrew Mansfield Sushant Prakash Renee Wong Sunny Virmani Christopher Semturs S. Sara Mahdavi Bradley Green Ewa Dominowska Blaise Ag\u00fcera y Arcas Joelle K. Barral Dale R. Webster Gregory S. Corrado Yossi Matias Karan Singhal Pete Florence Alan Karthikesalingam and Vivek Natarajan. 2023. Towards Generalist Biomedical AI. CoRR Vol. abs\/2307.14334 (2023). https:\/\/doi.org\/10.48550\/ARXIV.2307.14334","DOI":"10.48550\/ARXIV.2307.14334"},{"key":"e_1_3_2_1_47_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01954"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101939"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102798"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102510"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87199-4_7"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32226-7_80"},{"key":"e_1_3_2_1_54_1","volume-title":"Transfr: Transferable federated recommendation with pre-trained language models. arXiv preprint arXiv:2402.01124","author":"Zhang Honglei","year":"2024","unstructured":"Honglei Zhang, He Liu, Haoxuan Li, and Yidong Li. 2024. Transfr: Transferable federated recommendation with pre-trained language models. arXiv preprint arXiv:2402.01124 (2024)."},{"key":"e_1_3_2_1_55_1","volume-title":"The Thirty-Fourth AAAI Conference on Artificial Intelligence","author":"Zhang Yixiao","unstructured":"Yixiao Zhang, Xiaosong Wang, Ziyue Xu, Qihang Yu, Alan L. Yuille, and Daguang Xu. 2020. When Radiology Report Generation Meets Knowledge Graph. In The Thirty-Fourth AAAI Conference on Artificial Intelligence. AAAI Press, 12910--12917. https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/6989"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3497623.3497658"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681377","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681377","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:44Z","timestamp":1750295864000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681377"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":56,"alternative-id":["10.1145\/3664647.3681377","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681377","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}