{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T18:52:26Z","timestamp":1771959146223,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":86,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62172039, 62276110"],"award-info":[{"award-number":["62172039, 62276110"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679594","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:21Z","timestamp":1729452861000},"page":"1867-1877","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards Deconfounded Visual Question Answering via Dual-causal Intervention"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5208-3811","authenticated-orcid":false,"given":"Daowan","family":"Peng","sequence":"first","affiliation":[{"name":"CCIIP Lab, School of Computer Science and Technology, Huazhong University of Science and Technology Joint Laboratory of HUST and Pingan Property &amp; Casualty Research (HPL), Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4488-0102","authenticated-orcid":false,"given":"Wei","family":"Wei","sequence":"additional","affiliation":[{"name":"CCIIP Lab, School of Computer Science and Technology, Huazhong University of Science and Technology Joint Laboratory of HUST and Pingan Property &amp; Casualty Research (HPL), Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 10044--10054","author":"Abbasnejad Ehsan","unstructured":"Ehsan Abbasnejad, Damien Teney, Amin Parvaneh, Javen Shi, and Anton van den Hengel. 2020. Counterfactual vision and language learning. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 10044--10054."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00522"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_1_5_1","volume-title":"RMLVQA: A Margin Loss Approach for Visual Question Answering With Language Biases. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 11671--11680","author":"Basu Abhipsa","year":"2023","unstructured":"Abhipsa Basu, Sravanti Addepalli, and R Venkatesh Babu. 2023. RMLVQA: A Margin Loss Approach for Visual Question Answering With Language Biases. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 11671--11680."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence (AAAI). 8102--8109","author":"Hedi","year":"2019","unstructured":"Hedi Ben-younes, Remi Cadene, Nicolas Thome, and Matthieu Cord. 2019. BLOCK: Bilinear Superdiagonal Fusion for Visual Question Answering and Visual Relationship Detection. In Proceedings of the AAAI Conference on Artificial Intelligence (AAAI). 8102--8109."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1080\/00405840701764706"},{"key":"e_1_3_2_1_8_1","volume-title":"Rubi: Reducing unimodal biases for visual question answering. In Advances in Neural Information Processing, NeurIPS. 839--850.","author":"Cadene Remi","year":"2019","unstructured":"Remi Cadene, Corentin Dancette, Matthieu Cord, Devi Parikh, et al. 2019. Rubi: Reducing unimodal biases for visual question answering. In Advances in Neural Information Processing, NeurIPS. 839--850."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01081"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3290012"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_6"},{"key":"e_1_3_2_1_12_1","volume-title":"Generative Bias for Robust Visual Question Answering. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 11681--11690","author":"Cho Jae Won","year":"2023","unstructured":"Jae Won Cho, Dong-Jin Kim, Hyeonggon Ryu, and In So Kweon. 2023. Generative Bias for Robust Visual Question Answering. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 11681--11690."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the Conference on Empirical Methods in Natural Language Processing, EMNLP. 4069--4082","author":"Clark Christopher","year":"2019","unstructured":"Christopher Clark, Mark Yatskar, and Luke Zettlemoyer. 2019. Don?t Take the Easy Way Out: Ensemble Based Methods for Avoiding Known Dataset Biases. In Proceedings of the Conference on Empirical Methods in Natural Language Processing, EMNLP. 4069--4082."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00160"},{"key":"e_1_3_2_1_15_1","unstructured":"Alexander D'Amour. 2019. On multi-cause approaches to causal inference with unobserved counfounding: Two cautionary failure cases and a promising alternative. In AISTATS. 3478--3486."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-020-00257-z"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.63"},{"key":"e_1_3_2_1_18_1","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. In Advances in Neural Information Processing NeurIPS. 2672--2680."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3128322"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00502"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00161"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3240337"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1080\/02602938.2021.1888075"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_47"},{"key":"e_1_3_2_1_27_1","volume-title":"Distilling the knowledge in a neural network. CoRR","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. CoRR, Vol. abs\/1503.02531 (2015)."},{"key":"e_1_3_2_1_28_1","unstructured":"Matthew Honnibal and Ines Montani. 2017. spaCy 2: Natural language understanding with Bloom embeddings convolutional neural networks and incremental parsing. https:\/\/github.com\/explosion\/spaCy."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00395"},{"key":"e_1_3_2_1_30_1","unstructured":"Drew Hudson and Christopher D Manning. 2019. Learning by abstraction: The neural state machine. In Advances in Neural Information Processing NeurIPS. 5901--5914."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00686"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6776"},{"key":"e_1_3_2_1_33_1","volume-title":"International Conference on Machine Learning, ICML. 15435--15450","author":"Jung Yeonsung","year":"2023","unstructured":"Yeonsung Jung, Hajin Shim, June Yong Yang, and Eunho Yang. 2023. Fighting Fire with Fire: Contrastive Debiasing without Bias-free Data via Generative Bias-transformation. In International Conference on Machine Learning, ICML. 15435--15450."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00280"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00419"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.512"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Camila Kolling Martin More Nathan Gavenski Eduardo Pooch Ot\u00e1vio Parraga and Rodrigo C Barros. 2022. Efficient counterfactual debiasing for visual question answering. In WACV. 3001--3010.","DOI":"10.1109\/WACV51458.2022.00263"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58601-0_2"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.5"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612472"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Yu Li Bojie Hu Fengshuo Zhang Yahan Yu Jian Liu Yufeng Chen and Jinan Xu. 2023. A Multi-modal Debiasing Model with Dynamical Constraint for Robust Visual Question Answering. In Findings of the Association for Computational Linguistics: ACL. 5032--5045.","DOI":"10.18653\/v1\/2023.findings-acl.311"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462981"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.265"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01751"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3284038"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i12.29315"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3190686"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/453"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1111\/pops.12059"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01251"},{"key":"e_1_3_2_1_51_1","unstructured":"Yulei Niu and Hanwang Zhang. 2021. Introspective distillation for robust question answering. In Advances in Neural Information Processing NeurIPS. 16292--16304."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Timothy Ossowski and Junjie Hu. 2023. Multimodal Prompt Retrieval for Generative Visual Question Answering. In Findings of the Association for Computational Linguistics: ACL. 2518--2535.","DOI":"10.18653\/v1\/2023.findings-acl.158"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1214\/09-ss057"},{"key":"e_1_3_2_1_54_1","unstructured":"Judea Pearl. 2009. Causality. Cambridge university press."},{"key":"e_1_3_2_1_55_1","volume-title":"Jewell","author":"Pearl Judea","year":"2016","unstructured":"Judea Pearl, M Maria Glymour, and Nicholas P. Jewell. 2016. Causal Inference in Statistics: A Primer. John Wiley & Sons. https:\/\/api.semanticscholar.org\/CorpusID:148322624"},{"key":"e_1_3_2_1_56_1","unstructured":"Judea Pearl and Dana Mackenzie. 2018. The book of why: the new science of cause and effect. Basic books."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/457"},{"key":"e_1_3_2_1_58_1","unstructured":"Sainandan Ramakrishnan Aishwarya Agrawal and Stefan Lee. 2018. Overcoming language priors in visual question answering with adversarial regularization. In Advances in Neural Information Processing NeurIPS. 1548--1558."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"crossref","unstructured":"Yuval Reif and Roy Schwartz. 2023. Fighting Bias with Bias: Promoting Model Robustness by Amplifying Dataset Biases. In Findings of the Association for Computational Linguistics: ACL. 13169--13189.","DOI":"10.18653\/v1\/2023.findings-acl.833"},{"key":"e_1_3_2_1_60_1","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2015. Faster R-CNN: Towards real-time object detection with region proposal networks. In Advances in Neural Information Processing NeurIPS. 91--99."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176344064"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1080\/24709360.2019.1670513"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.727"},{"key":"e_1_3_2_1_64_1","unstructured":"Qingyi Si Yuanxin Liu Fandong Meng Zheng Lin Peng Fu Yanan Cao Weiping Wang and Jie Zhou. 2022. Towards Robust Visual Question Answering: Making the Most of Biased Samples via Contrastive Learning. In Findings of the Association for Computational Linguistics: EMNLP. 6650--6662."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"crossref","unstructured":"Qingyi Si Fandong Meng Mingyu Zheng Zheng Lin Yuanxin Liu Peng Fu Yanan Cao Weiping Wang and Jie Zhou. 2022. Language Prior Is Not the Only Shortcut: A Benchmark for Shortcut Learning in VQA. In Findings of the Association for Computational Linguistics: EMNLP. 3698--3712.","DOI":"10.18653\/v1\/2022.findings-emnlp.271"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"key":"e_1_3_2_1_67_1","volume-title":"Adversarial Visual Robustness by Causal Intervention. CoRR","author":"Tang Kaihua","year":"2021","unstructured":"Kaihua Tang, Mingyuan Tao, and Hanwang Zhang. 2021. Adversarial Visual Robustness by Causal Intervention. CoRR, Vol. abs\/2106.09534 (2021)."},{"key":"e_1_3_2_1_68_1","volume-title":"European Conference on Computer Vision, ECCV. 580--599","author":"Teney Damien","unstructured":"Damien Teney, Ehsan Abbasnedjad, and Anton van den Hengel. 2020. Learning what makes a difference from counterfactual examples and gradient supervision. In European Conference on Computer Vision, ECCV. 580--599."},{"key":"e_1_3_2_1_69_1","first-page":"407","article-title":"On the value of out-of-distribution testing: An example of goodhart's law. In Advances in Neural Information Processing","volume":"33","author":"Teney Damien","year":"2020","unstructured":"Damien Teney, Ehsan Abbasnejad, Kushal Kafle, Robik Shrestha, Christopher Kanan, and Anton Van Den Hengel. 2020. On the value of out-of-distribution testing: An example of goodhart's law. In Advances in Neural Information Processing, NeurIPS, Vol. 33. 407--417.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21389"},{"key":"e_1_3_2_1_71_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing NeurIPS. 5998--6008."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01077"},{"key":"e_1_3_2_1_73_1","volume-title":"Test-time model adaptation for visual question answering with debiased self-supervisions","author":"Wen Zhiquan","year":"2023","unstructured":"Zhiquan Wen, Shuaicheng Niu, Ge Li, Qingyao Wu, Mingkui Tan, and Qi Wu. 2023. Test-time model adaptation for visual question answering with debiased self-supervisions. IEEE TMM (2023)."},{"key":"e_1_3_2_1_74_1","unstructured":"Zhiquan Wen Yaowei Wang Mingkui Tan Qingyao Wu and Qi Wu. 2023. Digging out Discrimination Information from Generated Samples for Robust Visual Question Answering. In Findings of the Association for Computational Linguistics: ACL. 6910--6928."},{"key":"e_1_3_2_1_75_1","first-page":"3784","article-title":"Debiased visual question answering from feature and sample perspectives. In Advances in Neural Information Processing Advances in Neural Information Processing","volume":"34","author":"Wen Zhiquan","year":"2021","unstructured":"Zhiquan Wen, Guanghui Xu, Mingkui Tan, Qingyao Wu, and Qi Wu. 2021. Debiased visual question answering from feature and sample perspectives. In Advances in Neural Information Processing Advances in Neural Information Processing, NeurIPS, Vol. 34. 3784--3796.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_76_1","volume-title":"On the convergence properties of the EM algorithm. The Annals of statistics","author":"Jeff Wu CF","year":"1983","unstructured":"CF Jeff Wu. 1983. On the convergence properties of the EM algorithm. The Annals of statistics (1983), 95--103."},{"key":"e_1_3_2_1_77_1","volume-title":"Proceedings of the 32nd International Conference on Machine Learning, ICML. 2048--2057","author":"Xu Kelvin","year":"2015","unstructured":"Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhudinov, Rich Zemel, and Yoshua Bengio. 2015. Show, attend and tell: Neural image caption generation with visual attention. In Proceedings of the 32nd International Conference on Machine Learning, ICML. 2048--2057."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00972"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"e_1_3_2_1_80_1","unstructured":"Charles Yu Sullam Jeoung Anish Kasi Pengfei Yu and Heng Ji. 2023. Unlearning bias in language models by partitioning gradients. In Findings of the Association for Computational Linguistics: ACL. 6032--6048."},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.799"},{"key":"e_1_3_2_1_82_1","volume-title":"Discovering the Real Association: Multimodal Causal Reasoning in Video Question Answering. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 19027--19036","author":"Zang Chuanqi","year":"2023","unstructured":"Chuanqi Zang, Hanqing Wang, Mingtao Pei, and Wei Liang. 2023. Discovering the Real Association: Multimodal Causal Reasoning in Video Question Answering. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR. 19027--19036."},{"key":"e_1_3_2_1_83_1","first-page":"655","article-title":"Causal intervention for weakly-supervised semantic segmentation. In Advances in Neural Information Processing","volume":"33","author":"Zhang Dong","year":"2020","unstructured":"Dong Zhang, Hanwang Zhang, Jinhui Tang, Xian-Sheng Hua, and Qianru Sun. 2020. Causal intervention for weakly-supervised semantic segmentation. In Advances in Neural Information Processing, NeurIPS, Vol. 33. 655--666.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_84_1","volume-title":"International Conference on Machine Learning, ICML. 26958--26970","author":"Zhao Haiteng","year":"2022","unstructured":"Haiteng Zhao, Chang Ma, Xinshuai Dong, Anh Tuan Luu, Zhi-Hong Deng, and Hanwang Zhang. 2022. Certified robustness against natural language attacks by causal intervention. In International Conference on Machine Learning, ICML. 26958--26970."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1323"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/151"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679594","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679594","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:23Z","timestamp":1750294703000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679594"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":86,"alternative-id":["10.1145\/3627673.3679594","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679594","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}