{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T18:52:09Z","timestamp":1767034329934,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100019491","name":"National Natural Science Foundation of China - State Grid Corporation Joint Fund for Smart Grid","doi-asserted-by":"publisher","award":["1972059, 61773272, 6160233"],"award-info":[{"award-number":["1972059, 61773272, 6160233"]}],"id":[{"id":"10.13039\/501100019491","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592272","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"253-261","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Knowledge-Aware Causal Inference Network for Visual Dialog"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5627-050X","authenticated-orcid":false,"given":"Zefan","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6965-4158","authenticated-orcid":false,"given":"Yi","family":"Ji","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1495-5138","authenticated-orcid":false,"given":"Chunping","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Soochow University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"volume-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics.","author":"Agarwal S.","key":"e_1_3_2_1_1_1","unstructured":"S. Agarwal, T. Bui, J.\u00a0Y. Lee, I. Konstas, and V. Rieser. 2020. History for Visual Dialog: Do we really need it?. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_3_1","first-page":"4","article-title":"VQA: Visual Question Answering","volume":"123","author":"Antol S.","year":"2015","unstructured":"S. Antol, A. Agrawal, J. Lu, M. Mitchell, and D. Parikh. 2015. VQA: Visual Question Answering. International Journal of Computer Vision 123, 1 (2015), 4\u201331.","journal-title":"International Journal of Computer Vision"},{"key":"e_1_3_2_1_4_1","volume-title":"Benchmarking Knowledge-Enhanced Commonsense Question Answering via Knowledge-to-Text Transformation. CoRR abs\/2101.00760","author":"Bian Ning","year":"2021","unstructured":"Ning Bian, Xianpei Han, Bo Chen, and Le Sun. 2021. Benchmarking Knowledge-Enhanced Commonsense Question Answering via Knowledge-to-Text Transformation. CoRR abs\/2101.00760 (2021). arXiv:2101.00760https:\/\/arxiv.org\/abs\/2101.00760"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01757"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Feilong Chen Xiuyi Chen Fandong Meng Peng Li and Jie Zhou. 2021. GoG: Relation-aware Graph-over-Graph Network for Visual Dialog. arxiv:2109.08475\u00a0[cs.CL]","DOI":"10.18653\/v1\/2021.findings-acl.20"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.93"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747769"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859849"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-88361-4_9"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.121"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/693"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.170"},{"key":"e_1_3_2_1_14_1","volume-title":"KBGN: Knowledge-Bridge Graph Network for Adaptive Vision-Text Reasoning in Visual Dialogue. arxiv:2008.04858\u00a0[cs.CV]","author":"Jiang Xiaoze","year":"2020","unstructured":"Xiaoze Jiang, Siyi Du, Zengchang Qin, Yajing Sun, and Jing Yu. 2020. KBGN: Knowledge-Bridge Graph Network for Adaptive Vision-Text Reasoning in Visual Dialogue. arxiv:2008.04858\u00a0[cs.CV]"},{"key":"e_1_3_2_1_15_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_16_1","volume-title":"Kipf and Max Welling","author":"N.","year":"2017","unstructured":"Thomas\u00a0N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. In 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings. OpenReview.net. https:\/\/openreview.net\/forum?id=SJU4ayYgl"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.3233\/SW-140134"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.03.104"},{"key":"e_1_3_2_1_19_1","volume-title":"Microsoft COCO: Common Objects in Context. European Conference on Computer Vision","author":"Lin Y.","year":"2014","unstructured":"T.\u00a0Y. Lin, M. Maire, S. Belongie, J. Hays, and C.\u00a0L. Zitnick. 2014. Microsoft COCO: Common Objects in Context. European Conference on Computer Vision (2014), 740\u2013755."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Van-Quang Nguyen Masanori Suganuma and Takayuki Okatani. 2020. Efficient Attention Mechanism for Visual Dialog that can Handle All the Interactions between Multiple Inputs. arxiv:1911.11390\u00a0[cs.CV]","DOI":"10.1007\/978-3-030-58586-0_14"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01251"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00684"},{"key":"e_1_3_2_1_23_1","unstructured":"Sungjin Park Taesun Whang Yeochan Yoon and Heuiseok Lim. 2020. Multi-View Attention Network for Visual Dialog. arxiv:2004.14025\u00a0[cs.AI]"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01087"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013027"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.269"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.5555\/3398761.3399067"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00265"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746098"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00506"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428279"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.10.121"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Thessaloniki Greece","acronym":"ICMR '23"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592272","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592272","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:30Z","timestamp":1750178250000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592272"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":37,"alternative-id":["10.1145\/3591106.3592272","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592272","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}