{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:50:58Z","timestamp":1777567858842,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["QTZX23084"],"award-info":[{"award-number":["QTZX23084"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100017596","name":"Natural Science Basic Research Program of Shaanxi Province","doi-asserted-by":"publisher","award":["2024JC-YBQN-0715,2024GX-YBXM-039,2024GX-ZDCYL-02-15"],"award-info":[{"award-number":["2024JC-YBQN-0715,2024GX-YBXM-039,2024GX-ZDCYL-02-15"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100017596","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0117103"],"award-info":[{"award-number":["2022ZD0117103"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072354"],"award-info":[{"award-number":["62072354"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681201","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"4967-4975","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Divide and Conquer: Isolating Normal-Abnormal Attributes in Knowledge Graph-Enhanced Radiology Report Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0382-2715","authenticated-orcid":false,"given":"Xiao","family":"Liang","sequence":"first","affiliation":[{"name":"Xidian University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3921-2195","authenticated-orcid":false,"given":"Yanlei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8027-4287","authenticated-orcid":false,"given":"Di","family":"Wang","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6033-4959","authenticated-orcid":false,"given":"Haodi","family":"Zhong","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1890-7845","authenticated-orcid":false,"given":"Ronghan","family":"Li","sequence":"additional","affiliation":[{"name":"Xi'an University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6913-8604","authenticated-orcid":false,"given":"Quan","family":"Wang","sequence":"additional","affiliation":[{"name":"Xidian University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"e_1_3_2_1_2_1","volume-title":"Generating radiology reports via memory-driven transformer. ArXiv, abs\/2010.16056","author":"Chen Zhihong","year":"2020","unstructured":"Zhihong Chen, Yan Song, Tsung-Hui Chang, and Xiang Wan. Generating radiology reports via memory-driven transformer. ArXiv, abs\/2010.16056, 2020."},{"key":"e_1_3_2_1_3_1","volume-title":"Annual Meeting of the Association for Computational Linguistics","author":"Jing Baoyu","year":"2017","unstructured":"Baoyu Jing, Pengtao Xie, and Eric P. Xing. On the automatic generation of medical imaging reports. In Annual Meeting of the Association for Computational Linguistics, 2017."},{"key":"e_1_3_2_1_4_1","volume-title":"Automatic radiology report generation based on multi-view image fusion and medical concept enrichment. ArXiv, abs\/1907.09085","author":"Yuan Jianbo","year":"2019","unstructured":"Jianbo Yuan, Haofu Liao, Rui Luo, and Jiebo Luo. Automatic radiology report generation based on multi-view image fusion and medical concept enrichment. ArXiv, abs\/1907.09085, 2019."},{"key":"e_1_3_2_1_5_1","volume-title":"Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam M. Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. Attention is all you need. In Neural Information Processing Systems, 2017."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6989"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"e_1_3_2_1_8_1","volume-title":"Yanbo Xu. Biomedclip: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs.","author":"Jaspreet Kaur Bagga Robert Naoto Usuyama","year":"2023","unstructured":"Naoto Usuyama Jaspreet Kaur Bagga Robert Tinn Sam Preston Rajesh N. Rao Mu-HsinWei et al. Sheng Zhang, Yanbo Xu. Biomedclip: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs. 2023."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocv080"},{"key":"e_1_3_2_1_10_1","volume-title":"Yifan Peng, Zhiyong Lu, Roger G. Mark, Seth J. Berkowitz, and Steven Horng. Mimic-cxr-jpg, a large publicly available database of labeled chest radiographs.","author":"Johnson Alistair E. W.","year":"2019","unstructured":"Alistair E. W. Johnson, Tom J. Pollard, Nathaniel R. Greenbaum, Matthew P. Lungren, Chih ying Deng, Yifan Peng, Zhiyong Lu, Roger G. Mark, Seth J. Berkowitz, and Steven Horng. Mimic-cxr-jpg, a large publicly available database of labeled chest radiographs. 2019."},{"key":"e_1_3_2_1_11_1","first-page":"10575","volume-title":"2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Cornia Marcella","year":"2019","unstructured":"Marcella Cornia, Matteo Stefanini, Lorenzo Baraldi, and Rita Cucchiara. Meshedmemory transformer for image captioning. 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pages 10575--10584, 2019."},{"key":"e_1_3_2_1_12_1","first-page":"18145","volume-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Dou Zi-Yi","year":"2021","unstructured":"Zi-Yi Dou, Yichong Xu, Zhe Gan, Jianfeng Wang, Shuohang Wang, Lijuan Wang, Chenguang Zhu, Nanyun Peng, Zicheng Liu, and Michael Zeng. An empirical study of training end-to-end vision-and-language transformers. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pages 18145--18155, 2021."},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Machine Learning","author":"Kim Wonjae","year":"2021","unstructured":"Wonjae Kim, Bokyung Son, and Ildoo Kim. Vilt: Vision-and-language transformer without convolution or region supervision. In International Conference on Machine Learning, 2021."},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Machine Learning","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven C. H. Hoi. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International Conference on Machine Learning, 2022."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01062"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"e_1_3_2_1_17_1","first-page":"6077","volume-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Anderson Peter","year":"2017","unstructured":"Peter Anderson, Xiaodong He, Chris Buehler, Damien Teney, Mark Johnson, Stephen Gould, and Lei Zhang. Bottom-up and top-down attention for image captioning and visual question answering. 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 6077--6086, 2017."},{"key":"e_1_3_2_1_18_1","volume-title":"AAAI Conference on Artificial Intelligence","author":"Song Zeliang","year":"2020","unstructured":"Zeliang Song, Xiaofei Zhou, Zhendong Mao, and Jianlong Tan. Image captioning with context-aware auxiliary guidance. In AAAI Conference on Artificial Intelligence, 2020."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.694"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611987"},{"key":"e_1_3_2_1_22_1","volume-title":"Matthew B. A. McDermott, Willie Boag, Wei-Hung Weng, Peter Szolovits, and Marzyeh Ghassemi. Clinically accurate chest x-ray report generation. ArXiv, abs\/1904.02633","author":"Liu Guanxiong","year":"2019","unstructured":"Guanxiong Liu, Tzu-Ming Harry Hsu, Matthew B. A. McDermott, Willie Boag, Wei-Hung Weng, Peter Szolovits, and Marzyeh Ghassemi. Clinically accurate chest x-ray report generation. ArXiv, abs\/1904.02633, 2019."},{"key":"e_1_3_2_1_23_1","volume-title":"Cross-modal memory networks for radiology report generation. ArXiv, abs\/2204.13258","author":"Chen Zhihong","year":"2022","unstructured":"Zhihong Chen, Yaling Shen, Yan Song, and Xiang Wan. Cross-modal memory networks for radiology report generation. ArXiv, abs\/2204.13258, 2022."},{"key":"e_1_3_2_1_24_1","unstructured":"Yuan Li Xiaodan Liang Zhiting Hu and Eric P. Xing. Knowledge-driven encode retrieve paraphrase for medical image report generation. ArXiv abs\/1903.10122."},{"key":"e_1_3_2_1_25_1","volume-title":"Autoencoding knowledge graph for unsupervised medical report generation. ArXiv, abs\/2111.04318","author":"Liu Fenglin","year":"2021","unstructured":"Fenglin Liu, Chenyu You, Xian Wu, Shen Ge, Sheng Wang, and Xu Sun. Autoencoding knowledge graph for unsupervised medical report generation. ArXiv, abs\/2111.04318, 2021."},{"key":"e_1_3_2_1_26_1","volume-title":"Knowledge matters: Chest radiology report generation with general and specific knowledge. Medical image analysis, 80:102510","author":"Yang Shuxin","year":"2021","unstructured":"Shuxin Yang, Xian Wu, Shen Ge, S.kevin Zhou, and Li Xiao. Knowledge matters: Chest radiology report generation with general and specific knowledge. Medical image analysis, 80:102510, 2021."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02000"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01897"},{"key":"e_1_3_2_1_29_1","volume-title":"International Conference on Medical Imaging with Deep Learning","author":"Cohen Joseph Paul","year":"2021","unstructured":"Joseph Paul Cohen, Joseph D. Viviano, Paul Bertin, Paul Morrison, Parsa Torabian, Matteo Guarrera, Matthew P. Lungren, Akshay Chaudhari, Rupert Brooks, Mohammad Hashir, and Hadrien Bertrand. Torchxrayvision: A library of chest x-ray datasets and models. In International Conference on Medical Imaging with Deep Learning, 2021."},{"key":"e_1_3_2_1_30_1","volume-title":"Conference on Empirical Methods in Natural Language Processing","author":"Smit Akshay","year":"2020","unstructured":"Akshay Smit, Saahil Jain, Pranav Rajpurkar, Anuj Pareek, A. Ng, and Matthew P. Lungren. Chexbert: Combining automatic labelers and expert annotations for accurate radiology report labeling using bert. In Conference on Empirical Methods in Natural Language Processing, 2020."},{"key":"e_1_3_2_1_31_1","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. Distilbert a distilled version of bert: smaller faster cheaper and lighter. ArXiv abs\/1910.01108."},{"key":"e_1_3_2_1_32_1","unstructured":"Thomas Kipf and Max Welling. Semi-supervised classification with graph convolutional networks. ArXiv abs\/1609.02907."},{"key":"e_1_3_2_1_33_1","volume-title":"Segment anything in medical images. ArXiv, abs\/2304.12306","author":"Ma Jun","year":"2023","unstructured":"Jun Ma and BoWang. Segment anything in medical images. ArXiv, abs\/2304.12306, 2023."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01112"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25100"},{"key":"e_1_3_2_1_36_1","unstructured":"Yixin Wang Zihao Lin and Haoyu Dong. Rethinking medical report generation: Disease revealing enhancement with knowledge graph. ArXiv abs\/2307.12526."},{"key":"e_1_3_2_1_37_1","volume-title":"Natural language processing with Python: analyzing text with the natural language toolkit","author":"Bird Steven","year":"2009","unstructured":"Steven Bird, Ewan Klein, and Edward Loper. Natural language processing with Python: analyzing text with the natural language toolkit. 2009."},{"key":"e_1_3_2_1_38_1","first-page":"4566","volume-title":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Vedantam Ramakrishna","year":"2014","unstructured":"Ramakrishna Vedantam, C. Lawrence Zitnick, and Devi Parikh. Cider: Consensusbased image description evaluation. 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pages 4566--4575, 2014."},{"key":"e_1_3_2_1_39_1","volume-title":"Annual Meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, ToddWard, and Wei-Jing Zhu. Bleu: a method for automatic evaluation of machine translation. In Annual Meeting of the Association for Computational Linguistics, 2002."},{"key":"e_1_3_2_1_40_1","volume-title":"Annual Meeting of the Association for Computational Linguistics","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. Rouge: A package for automatic evaluation of summaries. In Annual Meeting of the Association for Computational Linguistics, 2004."},{"key":"e_1_3_2_1_41_1","volume-title":"IEEvaluation@ACL","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. Meteor: An automatic metric for mt evaluation with improved correlation with human judgments. In IEEvaluation@ACL, 2005."},{"key":"e_1_3_2_1_42_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. ArXiv, abs\/2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. An image is worth 16x16 words: Transformers for image recognition at scale. ArXiv, abs\/2010.11929, 2020."},{"key":"e_1_3_2_1_43_1","volume-title":"Domain-specific language model pretraining for biomedical natural language processing. ACM Transactions on Computing for Healthcare (HEALTH), 3:1--23","author":"Gu Yu","year":"2020","unstructured":"Yu Gu, Robert Tinn, Hao Cheng, Michael R. Lucas, Naoto Usuyama, Xiaodong Liu, Tristan Naumann, Jianfeng Gao, and Hoifung Poon. Domain-specific language model pretraining for biomedical natural language processing. ACM Transactions on Computing for Healthcare (HEALTH), 3:1--23, 2020."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681201","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681201","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681201"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":43,"alternative-id":["10.1145\/3664647.3681201","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681201","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}