{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T17:44:55Z","timestamp":1775324695534,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681476","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"1771-1780","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Diffusion Networks with Task-Specific Noise Control for Radiology Report Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6841-2341","authenticated-orcid":false,"given":"Yuanhe","family":"Tian","sequence":"first","affiliation":[{"name":"University of Washington, Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6015-6064","authenticated-orcid":false,"given":"Fei","family":"Xia","sequence":"additional","affiliation":[{"name":"University of Washington, Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2849-2962","authenticated-orcid":false,"given":"Yan","family":"Song","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Jun Chen Han Guo Kai Yi Boyang Li and Mohamed Elhoseiny. 2022. VisualGPT: Data-Efficient Adaptation of Pretrained Language Models for Image Captioning. In CVPR. 18009--18019.","DOI":"10.1109\/CVPR52688.2022.01750"},{"key":"e_1_3_2_1_2_1","volume-title":"Analog Bits: Generating Discrete Data using Diffusion Models with Self-conditioning. In ICLR. 1--23.","author":"Chen Ting","year":"2023","unstructured":"Ting Chen, Ruixiang Zhang, and Geoffrey Hinton. 2023. Analog Bits: Generating Discrete Data using Diffusion Models with Self-conditioning. In ICLR. 1--23."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Zhihong Chen Yaling Shen Yan Song and Xiang Wan. 2021. Cross-modal Memory Networks for Radiology Report Generation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers). Online 5904--5914.","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"e_1_3_2_1_5_1","volume-title":"2012 proceedings of the 35th international convention MIPRO. IEEE, 1725--1730","author":"Culjak Ivan","year":"2012","unstructured":"Ivan Culjak, David Abram, Tomislav Pribanic, Hrvoje Dzapo, and Mario Cifrek. 2012. A Brief Introduction to OpenCV. In 2012 proceedings of the 35th international convention MIPRO. IEEE, 1725--1730."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocv080"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Minneapolis, Minnesota, 4171--4186."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.425"},{"key":"e_1_3_2_1_9_1","unstructured":"Shansan Gong Mukai Li Jiangtao Feng Zhiyong Wu and Lingpeng Kong. 2023. DiffuSeq: Sequence to Sequence Text Generation with Diffusion Models. In ICLR. 1--20."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_11_1","first-page":"6840","article-title":"Denoising Diffusion Probabilistic Models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. NeurIPS, Vol. 33 (2020), 6840--6851.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_12_1","volume-title":"Classifier-free Diffusion Guidance. arXiv preprint arXiv:2207.12598","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free Diffusion Guidance. arXiv preprint arXiv:2207.12598 (2022)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.451"},{"key":"e_1_3_2_1_15_1","volume-title":"KiUT: Knowledge-injected U-Transformer for Radiology Report Generation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023","author":"Huang Zhongzhen","year":"2023","unstructured":"Zhongzhen Huang, Xiaofan Zhang, and Shaoting Zhang. 2023. KiUT: Knowledge-injected U-Transformer for Radiology Report Generation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17--24, 2023. IEEE, 19809--19818."},{"key":"e_1_3_2_1_16_1","volume-title":"Ng","author":"Irvin Jeremy","year":"2019","unstructured":"Jeremy Irvin, Pranav Rajpurkar, Michael Ko, Yifan Yu, Silviana Ciurea-Ilcus, Christopher Chute, Henrik Marklund, Behzad Haghgoo, Robyn L. Ball, Katie S. Shpanskaya, Jayne Seekins, David A. Mong, Safwan S. Halabi, Jesse K. Sandberg, Ricky Jones, David B. Larson, Curtis P. Langlotz, Bhavik N. Patel, Matthew P. Lungren, and Andrew Y. Ng. 2019. CheXpert: A Large Chest Radiograph Dataset with Uncertainty Labels and Expert Comparison. In The Thirty-Third AAAI Conference on Artificial Intelligence, AAAI 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, IAAI 2019, The Ninth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019. 590--597."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128122"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448326"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1240"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0322-0"},{"key":"e_1_3_2_1_21_1","volume-title":"Replace and Report: NLP Assisted Radiology Report Generation. In Findings of the Association for Computational Linguistics: ACL","author":"Kale Kaveri","year":"2023","unstructured":"Kaveri Kale, Pushpak Bhattacharyya, and Kshitij Jadhav. 2023. Replace and Report: NLP Assisted Radiology Report Generation. In Findings of the Association for Computational Linguistics: ACL 2023, Anna Rogers, Jordan Boyd-Graber, and Naoaki Okazaki (Eds.). Toronto, Canada, 10731--10742."},{"key":"e_1_3_2_1_22_1","volume-title":"DiffWave: A Versatile Diffusion Model for Audio Synthesis. In International Conference on Learning Representations. 1--17","author":"Kong Zhifeng","year":"2021","unstructured":"Zhifeng Kong, Wei Ping, Jiaji Huang, Kexin Zhao, and Bryan Catanzaro. 2021. DiffWave: A Versatile Diffusion Model for Audio Synthesis. In International Conference on Learning Representations. 1--17."},{"key":"e_1_3_2_1_23_1","volume-title":"Xing","author":"Li Christy Y.","year":"2019","unstructured":"Christy Y. Li, Xiaodan Liang, Zhiting Hu, and Eric P. Xing. 2019. Knowledge-driven Encode, Retrieve, Paraphrase for Medical Image Report Generation. In The Thirty-Third AAAI Conference on Artificial Intelligence, AAAI 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, IAAI 2019, The Ninth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019. 6666--6673."},{"key":"e_1_3_2_1_24_1","volume-title":"Dynamic Graph Enhanced Contrastive Learning for Chest X-Ray Report Generation. In 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Li M.","unstructured":"M. Li, B. Lin, Z. Chen, H. Lin, X. Liang, and X. Chang. 2023. Dynamic Graph Enhanced Contrastive Learning for Chest X-Ray Report Generation. In 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). Los Alamitos, CA, USA, 3334--3343."},{"key":"e_1_3_2_1_25_1","volume-title":"Auxiliary Signal-guided Knowledge Encoder-decoder for Medical Report Generation. World Wide Web","author":"Li Mingjie","year":"2022","unstructured":"Mingjie Li, Rui Liu, Fuyu Wang, Xiaojun Chang, and Xiaodan Liang. 2022. Auxiliary Signal-guided Knowledge Encoder-decoder for Medical Report Generation. World Wide Web (2022), 1--18."},{"key":"e_1_3_2_1_26_1","first-page":"4328","article-title":"Diffusion-LM Improves Controllable Text Generation","volume":"35","author":"Li Xiang","year":"2022","unstructured":"Xiang Li, John Thickstun, Ishaan Gulrajani, Percy S Liang, and Tatsunori B Hashimoto. 2022. Diffusion-LM Improves Controllable Text Generation. Advances in Neural Information Processing Systems, Vol. 35 (2022), 4328--4343.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_27_1","unstructured":"Xiang Lisa Li John Thickstun Ishaan Gulrajani Percy Liang and Tatsunori Hashimoto. 2022. Diffusion-LM Improves Controllable Text Generation. In Advances in Neural Information Processing Systems Alice H. Oh Alekh Agarwal Danielle Belgrave and Kyunghyun Cho (Eds.). 1--16."},{"key":"e_1_3_2_1_28_1","volume-title":"Xing","author":"Li Yuan","year":"2018","unstructured":"Yuan Li, Xiaodan Liang, Zhiting Hu, and Eric P. Xing. 2018. Hybrid Retrieval-Generation Reinforced Agent for Medical Image Report Generation. In Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3--8, 2018, Montr\u00e9al, Canada, Samy Bengio, Hanna M. Wallach, Hugo Larochelle, Kristen Grauman, Nicol\u00f2 Cesa-Bianchi, and Roman Garnett (Eds.). 1537--1547."},{"key":"e_1_3_2_1_29_1","volume-title":"ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Barcelona, Spain, 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. ROUGE: A Package for Automatic Evaluation of Summaries. In Text Summarization Branches Out. Barcelona, Spain, 74--81."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29826"},{"key":"e_1_3_2_1_31_1","volume-title":"A systematic review of deep learning-based research on radiology report generation. arXiv preprint arXiv:2311.14199","author":"Liu Chang","year":"2023","unstructured":"Chang Liu, Yuanhe Tian, and Yan Song. 2023. A systematic review of deep learning-based research on radiology report generation. arXiv preprint arXiv:2311.14199 (2023)."},{"key":"e_1_3_2_1_32_1","unstructured":"Fenglin Liu Shen Ge and Xian Wu. 2021. Competence-based Multimodal Curriculum Learning for Medical Report Generation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) Chengqing Zong Fei Xia Wenjie Li and Roberto Navigli (Eds.). Online 3001--3012."},{"key":"e_1_3_2_1_33_1","volume-title":"Exploring and Distilling Posterior and Prior Knowledge for Radiology Report Generation. In IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021","author":"Liu Fenglin","year":"2021","unstructured":"Fenglin Liu, Xian Wu, Shen Ge, Wei Fan, and Yuexian Zou. 2021. Exploring and Distilling Posterior and Prior Knowledge for Radiology Report Generation. In IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, virtual, June 19--25, 2021. 13753--13762."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.23"},{"key":"e_1_3_2_1_35_1","volume-title":"Visual Instruction Tuning. CoRR","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual Instruction Tuning. CoRR, Vol. abs\/2304.08485 (2023). showeprint[arXiv]2304.08485"},{"key":"e_1_3_2_1_36_1","volume-title":"Semantic-conditional Diffusion Networks for Image Captioning. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023","author":"Luo Jianjie","year":"2023","unstructured":"Jianjie Luo, Yehao Li, Yingwei Pan, Ting Yao, Jianlin Feng, Hongyang Chao, and Tao Mei. 2023. Semantic-conditional Diffusion Networks for Image Captioning. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17--24, 2023. IEEE, 23359--23368."},{"key":"e_1_3_2_1_37_1","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). 1--17","author":"Mao Junhua","unstructured":"Junhua Mao, Wei Xu, Yi Yang, Jiang Wang, and Alan L. Yuille. 2015. Deep Captioning with Multimodal Recurrent Neural Networks (m-RNN). In 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7--9, 2015, Conference Track Proceedings, Yoshua Bengio and Yann LeCun (Eds.). 1--17."},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the Sixth Workshop on Statistical Machine Translation","author":"Michael Denkowski","year":"2011","unstructured":"Denkowski Michael and Lavie Alon. 2011. Meteor 1.3: Automatic Metric for Reliable Optimization and Evaluation of Machine Translation Systems. In Proceedings of the Sixth Workshop on Statistical Machine Translation. Edinburgh, Scotland, 85--91."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2023.102633"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.480"},{"key":"e_1_3_2_1_41_1","volume-title":"Progressive Transformer-Based Generation of Radiology Reports. In Findings of the Association for Computational Linguistics: EMNLP","author":"Nooralahzadeh Farhad","year":"2021","unstructured":"Farhad Nooralahzadeh, Nicolas Perez Gonzalez, Thomas Frauenfelder, Koji Fujimoto, and Michael Krauthammer. 2021. Progressive Transformer-Based Generation of Radiology Reports. In Findings of the Association for Computational Linguistics: EMNLP 2021, Marie-Francine Moens, Xuanjing Huang, Lucia Specia, and Scott Wen-tau Yih (Eds.). Punta Cana, Dominican Republic, 2824--2832."},{"key":"e_1_3_2_1_42_1","volume-title":"Abdelrahman Shaker and Fahad Shahbaz Khan","author":"Mullappilly Hisham Cholakkal Rao Sahal Shaji","year":"2023","unstructured":"Sahal Shaji Mullappilly Hisham Cholakkal Rao Muhammad Anwer Salman Khan Jorma Laaksonen Omkar Thawkar, Abdelrahman Shaker and Fahad Shahbaz Khan. 2023. XrayGPT: Chest Radiographs Summarization using Large Medical Vision-language Models. arXiv: 2306.07971 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. BLEU: A Method for Automatic Evaluation of Machine Translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. Philadelphia, Pennsylvania, USA, 311--318."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1"},{"key":"e_1_3_2_1_45_1","volume-title":"Reinforced Cross-modal Alignment for Radiology Report Generation. In Findings of the Association for Computational Linguistics: ACL","author":"Qin Han","year":"2022","unstructured":"Han Qin and Yan Song. 2022. Reinforced Cross-modal Alignment for Radiology Report Generation. In Findings of the Association for Computational Linguistics: ACL 2022, Smaranda Muresan, Preslav Nakov, and Aline Villavicencio (Eds.). Dublin, Ireland, 448--458."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.228"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-resolution Image Synthesis with Latent Diffusion Models. In CVPR. 10684--10695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_49_1","first-page":"36479","article-title":"Photorealistic Text-to-image Diffusion Models with Deep Language Understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic Text-to-image Diffusion Models with Deep Language Understanding. NeurIPS, Vol. 35 (2022), 36479--36494.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_50_1","volume-title":"Denoising Diffusion Implicit Models. In International Conference on Learning Representations. 1--20","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. In International Conference on Learning Representations. 1--20."},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics. Gyeongju, Republic of Korea, 6436--6446","author":"Song Yan","year":"2022","unstructured":"Yan Song. 2022. Chinese Couplet Generation with Syntactic Information. In Proceedings of the 29th International Conference on Computational Linguistics. Gyeongju, Republic of Korea, 6436--6446."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.486"},{"key":"e_1_3_2_1_53_1","volume-title":"Interactive and Explainable Region-guided Radiology Report Generation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023","author":"Tanida Tim","year":"2023","unstructured":"Tim Tanida, Philip M\u00fcller, Georgios Kaissis, and Daniel Rueckert. 2023. Interactive and Explainable Region-guided Radiology Report Generation. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17--24, 2023. 7433--7442."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.326"},{"key":"e_1_3_2_1_55_1","volume-title":"ChiMed-GPT: A Chinese Medical Large Language Model with Full Training Regime and Better Alignment to Human Preferences. arXiv preprint arXiv:2311.06025","author":"Tian Yuanhe","year":"2023","unstructured":"Yuanhe Tian, Ruyi Gan, Yan Song, Jiaxing Zhang, and Yongdong Zhang. 2023. ChiMed-GPT: A Chinese Medical Large Language Model with Full Training Regime and Better Alignment to Human Preferences. arXiv preprint arXiv:2311.06025 (2023)."},{"key":"e_1_3_2_1_56_1","volume-title":"Aspect-based Sentiment Analysis with Context Denoising. In 2024 Annual Conference of the North American Chapter of the Association for Computational Linguistics","author":"Tian Yuanhe","year":"2024","unstructured":"Yuanhe Tian, Chang Liu, Yan Song, Fei Xia, and Yongdong Zhang. 2024. Aspect-based Sentiment Analysis with Context Denoising. In 2024 Annual Conference of the North American Chapter of the Association for Computational Linguistics. Mexico City, Mexico."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1186\/s12859-020-03834-6"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.487"},{"key":"e_1_3_2_1_59_1","volume-title":"LLaMA: Open and Efficient Foundation Language Models. CoRR","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, Aur\u00e9lien Rodriguez, Armand Joulin, Edouard Grave, and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. CoRR, Vol. abs\/2302.13971 (2023). showeprint[arXiv]2302.13971"},{"key":"e_1_3_2_1_60_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is All You Need. In Advances in neural information processing systems. 5998--6008."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_33"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16452-1_54"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01112"},{"key":"e_1_3_2_1_64_1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021","author":"Wu Joy T.","year":"2021","unstructured":"Joy T. Wu, Nkechinyere Agu, Ismini Lourentzou, Arjun Sharma, Joseph Alexander Paguio, Jasper Seth Yao, Edward C. Dee, William Mitchell, Satyananda Kashyap, Andrea Giovannini, Leo Anthony Celi, and Mehdi Moradi. 2021. Chest ImaGenome Dataset for Clinical Reasoning. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021, December 2021, virtual, Joaquin Vanschoren and Sai-Kit Yeung (Eds.). 1--14."},{"key":"e_1_3_2_1_65_1","volume-title":"Weakly Supervised Contrastive Learning for Chest X-Ray Report Generation. In Findings of the Association for Computational Linguistics: EMNLP","author":"Yan An","year":"2021","unstructured":"An Yan, Zexue He, Xing Lu, Jiang Du, Eric Chang, Amilcare Gentili, Julian McAuley, and Chun-Nan Hsu. 2021. Weakly Supervised Contrastive Learning for Chest X-Ray Report Generation. In Findings of the Association for Computational Linguistics: EMNLP 2021, Marie-Francine Moens, Xuanjing Huang, Lucia Specia, and Scott Wen-tau Yih (Eds.). Punta Cana, Dominican Republic, 4009--4015."},{"key":"e_1_3_2_1_66_1","unstructured":"Jingyi You Dongyuan Li Manabu Okumura and Kenji Suzuki. 2022. JPG - Jointly Learn to Align: Automated Disease Prediction and Radiology Report Generation. In Proceedings of the 29th International Conference on Computational Linguistics Nicoletta Calzolari Chu-Ren Huang Hansaem Kim James Pustejovsky Leo Wanner Key-Sun Choi Pum-Mo Ryu Hsin-Hsi Chen Lucia Donatelli Heng Ji Sadao Kurohashi Patrizia Paggio Nianwen Xue Seokhwan Kim Younggyun Hahm Zhong He Tony Kyungil Lee Enrico Santus Francis Bond and Seung-Hoon Na (Eds.). 5989--6001."},{"key":"e_1_3_2_1_67_1","volume-title":"Automatic Radiology Report Generation based on Multi-view Image Fusion and Medical Concept Enrichment. ArXiv","author":"Yuan Jianbo","year":"2019","unstructured":"Jianbo Yuan, Haofu Liao, Rui Luo, and Jiebo Luo. 2019. Automatic Radiology Report Generation based on Multi-view Image Fusion and Medical Concept Enrichment. ArXiv, Vol. abs\/1907.09085 (2019)."},{"key":"e_1_3_2_1_68_1","volume-title":"DiffuSum: Generation Enhanced Extractive Summarization with Diffusion. In Findings of the Association for Computational Linguistics: ACL","author":"Zhang Haopeng","year":"2023","unstructured":"Haopeng Zhang, Xiao Liu, and Jiawei Zhang. 2023. DiffuSum: Generation Enhanced Extractive Summarization with Diffusion. In Findings of the Association for Computational Linguistics: ACL 2023. Toronto, Canada, 13089--13100."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6989"},{"key":"e_1_3_2_1_70_1","volume-title":"Visual-textual Attentive Semantic Consistency for Medical Report Generation. In 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). 3965--3974","author":"Zhou Yi","year":"2021","unstructured":"Yi Zhou, Lei Huang, Tao Zhou, Huazhu Fu, and Ling Shao. 2021. Visual-textual Attentive Semantic Consistency for Medical Report Generation. In 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). 3965--3974."},{"key":"e_1_3_2_1_71_1","volume-title":"MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models. CoRR","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models. CoRR, Vol. abs\/2304.10592 (2023). showeprint[arXiv]2304.10592"},{"key":"e_1_3_2_1_72_1","volume-title":"Exploring Discrete Diffusion Models for Image Captioning. arXiv preprint arXiv:2211.11694","author":"Zhu Zixin","year":"2022","unstructured":"Zixin Zhu, Yixuan Wei, Jianfeng Wang, Zhe Gan, Zheng Zhang, Le Wang, Gang Hua, Lijuan Wang, Zicheng Liu, and Han Hu. 2022. Exploring Discrete Diffusion Models for Image Captioning. arXiv preprint arXiv:2211.11694 (2022)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681476","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681476","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:47Z","timestamp":1750294667000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681476"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":72,"alternative-id":["10.1145\/3664647.3681476","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681476","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}