{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T16:48:33Z","timestamp":1779382113446,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,4]],"date-time":"2023-08-04T00:00:00Z","timestamp":1691107200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Intramural Research Program of the National Institutes of Health Clinical Center"},{"name":"JST Moonshot R&D","award":["JPMJMS2011"],"award-info":[{"award-number":["JPMJMS2011"]}]},{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science","doi-asserted-by":"publisher","award":["22K07681"],"award-info":[{"award-number":["22K07681"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,6]]},"DOI":"10.1145\/3580305.3599819","type":"proceedings-article","created":{"date-parts":[[2023,8,4]],"date-time":"2023-08-04T18:13:58Z","timestamp":1691172838000},"page":"4156-4165","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":41,"title":["Expert Knowledge-Aware Image Difference Graph Representation Learning for Difference-Aware Medical Visual Question Answering"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0426-9029","authenticated-orcid":false,"given":"Xinyue","family":"Hu","sequence":"first","affiliation":[{"name":"The University of Texas at Arlington, Arlinton, TX, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7419-6240","authenticated-orcid":false,"given":"Lin","family":"Gu","sequence":"additional","affiliation":[{"name":"RIKEN &amp; The University of Tokyo, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4712-891X","authenticated-orcid":false,"given":"Qiyuan","family":"An","sequence":"additional","affiliation":[{"name":"The University of Texas at Arlington, Arlington, TX, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-5681-8230","authenticated-orcid":false,"given":"Mengliang","family":"Zhang","sequence":"additional","affiliation":[{"name":"The University of Texas at Arlington, Arlington, TX, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6539-5408","authenticated-orcid":false,"given":"Liangchen","family":"Liu","sequence":"additional","affiliation":[{"name":"National Institutes of Health Clinical Center, Bethesda, MD, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2565-1374","authenticated-orcid":false,"given":"Kazuma","family":"Kobayashi","sequence":"additional","affiliation":[{"name":"National Cancer Center Research Institute, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3712-3691","authenticated-orcid":false,"given":"Tatsuya","family":"Harada","sequence":"additional","affiliation":[{"name":"The University of Tokyo &amp; RIKEN, Tokyo, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8081-7376","authenticated-orcid":false,"given":"Ronald M.","family":"Summers","sequence":"additional","affiliation":[{"name":"National Institutes of Health Clinical Center, Bethesda, MD, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3920-5890","authenticated-orcid":false,"given":"Yingying","family":"Zhu","sequence":"additional","affiliation":[{"name":"The University of Texas at Arlington, Arlington, TX, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,8,4]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Dina Demner- Fushman, and Henning M\u00fcller","author":"Abacha Asma Ben","year":"2019","unstructured":"Asma Ben Abacha , Sadid A Hasan , Vivek V Datla , Joey Liu , Dina Demner- Fushman, and Henning M\u00fcller . 2019 . VQA-Med: Overview of the Medical Visual Question Answering Task at ImageCLEF 2019. CLEF (Working Notes) 2 (2019). Asma Ben Abacha, Sadid A Hasan, Vivek V Datla, Joey Liu, Dina Demner- Fushman, and Henning M\u00fcller. 2019. VQA-Med: Overview of the Medical Visual Question Answering Task at ImageCLEF 2019. CLEF (Working Notes) 2 (2019)."},{"key":"e_1_3_2_2_2_1","volume-title":"Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325","author":"Chen Xinlei","year":"2015","unstructured":"Xinlei Chen , Hao Fang , Tsung-Yi Lin , Ramakrishna Vedantam , Saurabh Gupta , Piotr Doll\u00e1r , and C Lawrence Zitnick . 2015. Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 ( 2015 ). Xinlei Chen, Hao Fang, Tsung-Yi Lin, Ramakrishna Vedantam, Saurabh Gupta, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2015. Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)."},{"key":"e_1_3_2_2_3_1","volume-title":"Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio.","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho , Bart Van Merri\u00ebnboer , Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014 . Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014). Kyunghyun Cho, Bart Van Merri\u00ebnboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)."},{"key":"e_1_3_2_2_4_1","volume-title":"International conference on machine learning. PMLR, 1126--1135","author":"Finn Chelsea","year":"2017","unstructured":"Chelsea Finn , Pieter Abbeel , and Sergey Levine . 2017 . Model-agnostic metalearning for fast adaptation of deep networks . In International conference on machine learning. PMLR, 1126--1135 . Chelsea Finn, Pieter Abbeel, and Sergey Levine. 2017. Model-agnostic metalearning for fast adaptation of deep networks. In International conference on machine learning. PMLR, 1126--1135."},{"key":"e_1_3_2_2_5_1","volume-title":"Neural naturalist: generating fine-grained image comparisons. arXiv preprint arXiv:1909.04101","author":"Forbes Maxwell","year":"2019","unstructured":"Maxwell Forbes , Christine Kaeser-Chen , Piyush Sharma , and Serge Belongie . 2019. Neural naturalist: generating fine-grained image comparisons. arXiv preprint arXiv:1909.04101 ( 2019 ). Maxwell Forbes, Christine Kaeser-Chen, Piyush Sharma, and Serge Belongie. 2019. Neural naturalist: generating fine-grained image comparisons. arXiv preprint arXiv:1909.04101 (2019)."},{"key":"e_1_3_2_2_6_1","volume-title":"Leon Glass, Jeffrey M Hausdorff, Plamen Ch Ivanov, Roger G Mark, Joseph E Mietus, George B Moody, Chung-Kang Peng, and H Eugene Stanley.","author":"Goldberger Ary L","year":"2000","unstructured":"Ary L Goldberger , Luis AN Amaral , Leon Glass, Jeffrey M Hausdorff, Plamen Ch Ivanov, Roger G Mark, Joseph E Mietus, George B Moody, Chung-Kang Peng, and H Eugene Stanley. 2000 . PhysioBank, Physio Toolkit , and PhysioNet : components of a new research resource for complex physiologic signals. circulation 101, 23 (2000), e215--e220. Ary L Goldberger, Luis AN Amaral, Leon Glass, Jeffrey M Hausdorff, Plamen Ch Ivanov, Roger G Mark, Joseph E Mietus, George B Moody, Chung-Kang Peng, and H Eugene Stanley. 2000. PhysioBank, PhysioToolkit, and PhysioNet: components of a new research resource for complex physiologic signals. circulation 101, 23 (2000), e215--e220."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"e_1_3_2_2_8_1","volume-title":"Learning to describe differences between pairs of similar images. arXiv preprint arXiv:1808.10584","author":"Jhamtani Harsh","year":"2018","unstructured":"Harsh Jhamtani and Taylor Berg-Kirkpatrick . 2018. Learning to describe differences between pairs of similar images. arXiv preprint arXiv:1808.10584 ( 2018 ). Harsh Jhamtani and Taylor Berg-Kirkpatrick. 2018. Learning to describe differences between pairs of similar images. arXiv preprint arXiv:1808.10584 (2018)."},{"key":"e_1_3_2_2_9_1","volume-title":"a de-identified publicly available database of chest radiographs with free-text reports. Scientific data 6, 1","author":"Johnson Alistair EW","year":"2019","unstructured":"Alistair EW Johnson , Tom J Pollard , Seth J Berkowitz , Nathaniel R Greenbaum , Matthew P Lungren , Chih-ying Deng, Roger G Mark , and Steven Horng . 2019. MIMIC-CXR , a de-identified publicly available database of chest radiographs with free-text reports. Scientific data 6, 1 ( 2019 ), 1--8. Alistair EW Johnson, Tom J Pollard, Seth J Berkowitz, Nathaniel R Greenbaum, Matthew P Lungren, Chih-ying Deng, Roger G Mark, and Steven Horng. 2019. MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports. Scientific data 6, 1 (2019), 1--8."},{"key":"e_1_3_2_2_10_1","volume-title":"a large publicly available database of labeled chest radiographs. arXiv preprint arXiv:1901.07042","author":"Johnson Alistair EW","year":"2019","unstructured":"Alistair EW Johnson , Tom J Pollard , Nathaniel R Greenbaum , Matthew P Lungren , Chih-ying Deng, Yifan Peng , Zhiyong Lu , Roger G Mark , Seth J Berkowitz , and Steven Horng . 2019. MIMIC-CXR-JPG , a large publicly available database of labeled chest radiographs. arXiv preprint arXiv:1901.07042 ( 2019 ). Alistair EW Johnson, Tom J Pollard, Nathaniel R Greenbaum, Matthew P Lungren, Chih-ying Deng, Yifan Peng, Zhiyong Lu, Roger G Mark, Seth J Berkowitz, and Steven Horng. 2019. MIMIC-CXR-JPG, a large publicly available database of labeled chest radiographs. arXiv preprint arXiv:1901.07042 (2019)."},{"key":"e_1_3_2_2_11_1","volume-title":"Supervised contrastive learning. Advances in neural information processing systems 33","author":"Khosla Prannay","year":"2020","unstructured":"Prannay Khosla , Piotr Teterwak , ChenWang, Aaron Sarna , Yonglong Tian , Phillip Isola , Aaron Maschinot , Ce Liu , and Dilip Krishnan . 2020. Supervised contrastive learning. Advances in neural information processing systems 33 ( 2020 ), 18661--18673. Prannay Khosla, Piotr Teterwak, ChenWang, Aaron Sarna, Yonglong Tian, Phillip Isola, Aaron Maschinot, Ce Liu, and Dilip Krishnan. 2020. Supervised contrastive learning. Advances in neural information processing systems 33 (2020), 18661--18673."},{"key":"e_1_3_2_2_12_1","volume-title":"Asma Ben Abacha, and Dina Demner-Fushman","author":"Lau Jason J","year":"2018","unstructured":"Jason J Lau , Soumya Gayen , Asma Ben Abacha, and Dina Demner-Fushman . 2018 . A dataset of clinically generated visual questions and answers about radiology images. Scientific data 5, 1 (2018), 1--10. Jason J Lau, Soumya Gayen, Asma Ben Abacha, and Dina Demner-Fushman. 2018. A dataset of clinically generated visual questions and answers about radiology images. Scientific data 5, 1 (2018), 1--10."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.3115\/1626355.1626389"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01041"},{"key":"e_1_3_2_2_15_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin . 2004 . Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81. Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_2_16_1","volume-title":"ScispaCy: fast and robust models for biomedical natural language processing. arXiv preprint arXiv:1902.07669","author":"Neumann Mark","year":"2019","unstructured":"Mark Neumann , Daniel King , Iz Beltagy , and Waleed Ammar . 2019. ScispaCy: fast and robust models for biomedical natural language processing. arXiv preprint arXiv:1902.07669 ( 2019 ). Mark Neumann, Daniel King, Iz Beltagy, and Waleed Ammar. 2019. ScispaCy: fast and robust models for biomedical natural language processing. arXiv preprint arXiv:1902.07669 (2019)."},{"key":"e_1_3_2_2_17_1","volume-title":"Diep H Dinh, et al.","author":"Nguyen Ha Q","year":"2020","unstructured":"Ha Q Nguyen , Khanh Lam , Linh T Le , Hieu H Pham , Dat Q Tran , Dung B Nguyen , Dung D Le , Chi M Pham , Hang TT Tong , Diep H Dinh, et al. 2020 . VinDr-CXR: An open dataset of chest X-rays with radiologist's annotations. arXiv preprint arXiv:2012.15029 (2020). Ha Q Nguyen, Khanh Lam, Linh T Le, Hieu H Pham, Dat Q Tran, Dung B Nguyen, Dung D Le, Chi M Pham, Hang TT Tong, Diep H Dinh, et al. 2020. VinDr-CXR: An open dataset of chest X-rays with radiologist's annotations. arXiv preprint arXiv:2012.15029 (2020)."},{"key":"e_1_3_2_2_18_1","volume-title":"Automated Generation of Accurate & Fluent Medical X-ray Reports. CoRR abs\/2108.12126","author":"Nguyen Hoang T. N.","year":"2021","unstructured":"Hoang T. N. Nguyen , Dong Nie , Taivanbat Badamdorj , Yujie Liu , Yingying Zhu , Jason Truong , and Li Cheng . 2021. Automated Generation of Accurate & Fluent Medical X-ray Reports. CoRR abs\/2108.12126 ( 2021 ). Hoang T. N. Nguyen, Dong Nie, Taivanbat Badamdorj, Yujie Liu, Yingying Zhu, Jason Truong, and Li Cheng. 2021. Automated Generation of Accurate & Fluent Medical X-ray Reports. CoRR abs\/2108.12126 (2021)."},{"key":"e_1_3_2_2_19_1","volume-title":"Learning conditioned graph structures for interpretable visual question answering. Advances in neural information processing systems 31","author":"Norcliffe-Brown Will","year":"2018","unstructured":"Will Norcliffe-Brown , Stathis Vafeias , and Sarah Parisot . 2018. Learning conditioned graph structures for interpretable visual question answering. Advances in neural information processing systems 31 ( 2018 ). Will Norcliffe-Brown, Stathis Vafeias, and Sarah Parisot. 2018. Learning conditioned graph structures for interpretable visual question answering. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2957513"},{"key":"e_1_3_2_2_21_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311--318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni , Salim Roukos , Todd Ward , and Wei-Jing Zhu . 2002 . Bleu: a method for automatic evaluation of machine translation . In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311--318 . Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311--318."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00472"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ATC52653.2021.9598342"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00198"},{"key":"e_1_3_2_2_26_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren , Kaiming He , Ross Girshick , and Jian Sun . 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 ( 2015 ). Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073445.1073478"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.369"},{"key":"e_1_3_2_2_30_1","volume-title":"Edward Christopher Dee, William Mitchell, Satyananda Kashyap, Andrea Giovannini, et al.","author":"Wu Joy","year":"2021","unstructured":"Joy Wu , Nkechinyere Agu , Ismini Lourentzou , Arjun Sharma , Joseph Paguio , Jasper Seth Yao , Edward Christopher Dee, William Mitchell, Satyananda Kashyap, Andrea Giovannini, et al. 2021 . Chest ImaGenome Dataset. PhysioNet ( 2021). Joy Wu, Nkechinyere Agu, Ismini Lourentzou, Arjun Sharma, Joseph Paguio, Jasper Seth Yao, Edward Christopher Dee, William Mitchell, Satyananda Kashyap, Andrea Giovannini, et al. 2021. Chest ImaGenome Dataset. PhysioNet (2021)."},{"key":"e_1_3_2_2_31_1","volume-title":"Image Difference Captioning with Pre-training and Contrastive Learning. arXiv preprint arXiv:2202.04298","author":"Yao Linli","year":"2022","unstructured":"Linli Yao , Weiying Wang , and Qin Jin . 2022. Image Difference Captioning with Pre-training and Contrastive Learning. arXiv preprint arXiv:2202.04298 ( 2022 ). Linli Yao, Weiying Wang, and Qin Jin. 2022. Image Difference Captioning with Pre-training and Contrastive Learning. arXiv preprint arXiv:2202.04298 (2022)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_42"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6989"}],"event":{"name":"KDD '23: The 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Long Beach CA USA","acronym":"KDD '23","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3580305.3599819","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3580305.3599819","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:23Z","timestamp":1750182563000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3580305.3599819"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,4]]},"references-count":33,"alternative-id":["10.1145\/3580305.3599819","10.1145\/3580305"],"URL":"https:\/\/doi.org\/10.1145\/3580305.3599819","relation":{},"subject":[],"published":{"date-parts":[[2023,8,4]]},"assertion":[{"value":"2023-08-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}