{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:20:45Z","timestamp":1765340445092,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","funder":[{"name":"the National Natural Science Foundation of China","award":["62171323, 62271155"],"award-info":[{"award-number":["62171323, 62271155"]}]},{"name":"the Yeqisun Joint Funds of the National Natural Science Foundation of China","award":["U2441252"],"award-info":[{"award-number":["U2441252"]}]},{"name":"National Key R\\&D Program of China","award":["2020YFA0711400"],"award-info":[{"award-number":["2020YFA0711400"]}]},{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0100"],"award-info":[{"award-number":["2021SHZDZX0100"]}]},{"name":"the Changjiang Scholars Program of China"},{"name":"the Computational Biology Program of Science and Technology Commission of Shanghai Municipality (STCSM)","award":["25JS2840100"],"award-info":[{"award-number":["25JS2840100"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754913","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"2958-2967","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Self-Supervised Anatomical Consistency Learning for Vision-Grounded Medical Report Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5791-145X","authenticated-orcid":false,"given":"Longzhen","family":"Yang","sequence":"first","affiliation":[{"name":"Tongji University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3682-6288","authenticated-orcid":false,"given":"Zhangkai","family":"Ni","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6974-5110","authenticated-orcid":false,"given":"Ying","family":"Wen","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4257-2528","authenticated-orcid":false,"given":"Yihang","family":"Liu","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5250-170X","authenticated-orcid":false,"given":"Lianghua","family":"He","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China and Shanghai Eye Disease Prevention and Treatment Center, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2999-2088","authenticated-orcid":false,"given":"Heng Tao","family":"Shen","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_1"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01346"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Learning Representations. 1-12","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, G Heigold, S Gelly, et al., 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations. 1-12."},{"key":"e_1_3_2_1_9_1","volume-title":"Machine Learning for Health Conference. 209-219","author":"Endo Mark","year":"2021","unstructured":"Mark Endo, Rayan Krishnan, Viswesh Krishna, Andrew Y Ng, and Pranav Rajpurkar. 2021. Retrieval-based chest x-ray report generation using a pre-trained contrastive language-image model. In Machine Learning for Health Conference. 209-219."},{"key":"e_1_3_2_1_10_1","volume-title":"Anatomy-Guided Radiology Report Generation with Pathology-Aware Regional Prompts. arXiv preprint arXiv:2411.10789","author":"Gao Yijian","year":"2024","unstructured":"Yijian Gao, Dominic Marshall, Xiaodan Xing, Junzhi Ning, Giorgos Papanastasiou, Guang Yang, and Matthieu Komorowski. 2024. Anatomy-Guided Radiology Report Generation with Pathology-Aware Regional Prompts. arXiv preprint arXiv:2411.10789 (2024)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00781"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02016"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-51749-0"},{"key":"e_1_3_2_1_16_1","unstructured":"Stephanie L Hyland Shruthi Bannur Kenza Bouzid Daniel C Castro Mercy Ranjit Anton Schwaighofer Fernando P\u00e9rez-Garc\u00eda Valentina Salvatelli Shaury Srivastav Anja Thieme et al. 2023. Maira-1: A specialised large multimodal model for radiology report generation. arXiv preprint arXiv:2311.13668 (2023)."},{"key":"e_1_3_2_1_17_1","volume-title":"Du Nguyen Duong, Tan Bui, Pierre Chambon, Yuhao Zhang, Matthew P Lungren, Andrew Y Ng, et al.","author":"Jain Saahil","year":"2021","unstructured":"Saahil Jain, Ashwin Agrawal, Adriel Saporta, Steven QH Truong, Du Nguyen Duong, Tan Bui, Pierre Chambon, Yuhao Zhang, Matthew P Lungren, Andrew Y Ng, et al., 2021. Radgraph: Extracting clinical entities and relations from radiology reports. Advances in Neural Information Processing Systems (2021), 1-8."},{"key":"e_1_3_2_1_18_1","first-page":"978","article-title":"Multimodal image-text matching improves retrieval-based chest x-ray report generation","author":"Jeong Jaehwan","year":"2024","unstructured":"Jaehwan Jeong, Katherine Tian, Andrew Li, Sina Hartung, Subathra Adithan, Fardad Behzadi, Juan Calle, David Osayande, Michael Pohlen, and Pranav Rajpurkar. 2024. Multimodal image-text matching improves retrieval-based chest x-ray report generation. In Medical Imaging with Deep Learning. 978-990.","journal-title":"Medical Imaging with Deep Learning."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0322-0"},{"key":"e_1_3_2_1_20_1","first-page":"2","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","volume":"1","author":"Ming-Wei Chang Jacob Devlin","year":"2019","unstructured":"Jacob Devlin Ming-Wei Chang Kenton and Lee Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of NACCL-HLT, Vol. 1. 2.","journal-title":"Proceedings of NACCL-HLT"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01112"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681201"},{"key":"e_1_3_2_1_24_1","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text Summarization Branches Out. 74-81.","journal-title":"Text Summarization Branches Out."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29826"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680760"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i18.34091"},{"key":"e_1_3_2_1_28_1","volume-title":"Dare: Data augmented relation extraction with gpt-2. arXiv preprint arXiv:2004.13845","author":"Papanikolaou Yannis","year":"2020","unstructured":"Yannis Papanikolaou and Andrea Pierleoni. 2020. Dare: Data augmented relation extraction with gpt-2. arXiv preprint arXiv:2004.13845 (2020)."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. 311-318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. 311-318."},{"key":"e_1_3_2_1_30_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog Vol. 1 8 (2019) 9."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1148\/ryai.2019180041"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.117"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00718"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16443-9_68"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681476"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1056\/AIoa2300138"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-03359-y"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.369"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01112"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.metrad.2023.100033"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16437-8_63"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01954"},{"key":"e_1_3_2_1_46_1","unstructured":"Joy T Wu Nkechinyere N Agu Ismini Lourentzou Arjun Sharma Joseph A Paguio Jasper S Yao Edward C Dee William Mitchell Satyananda Kashyap Andrea Giovannini et al. 2021. Chest imagenome dataset for clinical reasoning. Advances in Neural Information Processing Systems (2021) 1-8."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-022-04398-4"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.336"},{"key":"e_1_3_2_1_49_1","volume-title":"Knowledge matters: Chest radiology report generation with general and specific knowledge. Medical image analysis","author":"Yang Shuxin","year":"2022","unstructured":"Shuxin Yang, Xian Wu, Shen Ge, S Kevin Zhou, and Li Xiao. 2022. Knowledge matters: Chest radiology report generation with general and specific knowledge. Medical image analysis, Vol. 80 (2022), 102510."},{"key":"e_1_3_2_1_50_1","volume-title":"Eduardo Kaiser Ururahy Nunes Fonseca, Henrique Min Ho Lee, Zahra Shakeri Hossein Abad, Andrew Y Ng, et al.","author":"Yu Feiyang","year":"2023","unstructured":"Feiyang Yu, Mark Endo, Rayan Krishnan, Ian Pan, Andy Tsai, Eduardo Pontes Reis, Eduardo Kaiser Ururahy Nunes Fonseca, Henrique Min Ho Lee, Zahra Shakeri Hossein Abad, Andrew Y Ng, et al., 2023. Evaluating progress in automatic chest x-ray radiology report generation. Patterns, Vol. 4, 9 (2023)."},{"key":"e_1_3_2_1_51_1","unstructured":"Anna Zawacki Carol Wu George Shih Julia Elliott Mikhail Fomitchev Mohannad Hussain ParasLakhani Phil Culliton and Shunxing Bao. 2019. SIIM-ACR Pneumothorax Segmentation. https:\/\/kaggle.com\/competitions\/siim-acr-pneumothorax-segmentation. Kaggle."},{"key":"e_1_3_2_1_52_1","volume-title":"Machine Learning for Health Conference. 2-25","author":"Zhang Yuhao","year":"2022","unstructured":"Yuhao Zhang, Hang Jiang, Yasuhide Miura, Christopher D Manning, and Curtis P Langlotz. 2022. Contrastive learning of medical visual representations from paired images and text. In Machine Learning for Health Conference. 2-25."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2024.104718"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-021-00425-9"},{"key":"e_1_3_2_1_55_1","volume-title":"Advancing Radiograph Representation Learning with Masked Record Modeling. In International Conference on Learning Representations. 1-16","author":"Zhou Hong-Yu","year":"2023","unstructured":"Hong-Yu Zhou, Chenyu Lian, Liansheng Wang, and Yizhou Yu. 2023. Advancing Radiograph Representation Learning with Masked Record Modeling. In International Conference on Learning Representations. 1-16."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754913","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:17:48Z","timestamp":1765340268000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754913"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":55,"alternative-id":["10.1145\/3746027.3754913","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754913","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}