{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T17:36:11Z","timestamp":1758476171186,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681663","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"955-964","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Combating Visual Question Answering Hallucinations via Robust Multi-Space Co-Debias Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9648-0140","authenticated-orcid":false,"given":"Jiawei","family":"Zhu","sequence":"first","affiliation":[{"name":"Beijing Institute of Technology, Zhuhai, Zhuhai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7924-4039","authenticated-orcid":false,"given":"Yishu","family":"Liu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8329-4398","authenticated-orcid":false,"given":"Huanjia","family":"Zhu","sequence":"additional","affiliation":[{"name":"South China Normal University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0190-969X","authenticated-orcid":false,"given":"Hui","family":"Lin","sequence":"additional","affiliation":[{"name":"China Academic of Electronics and Information Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0402-5382","authenticated-orcid":false,"given":"Yuncheng","family":"Jiang","sequence":"additional","affiliation":[{"name":"South China Normal University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1470-6998","authenticated-orcid":false,"given":"Zheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2497-6214","authenticated-orcid":false,"given":"Bingzhi","family":"Chen","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Zhuhai, Zhuhai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00522"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 11671--11680","author":"Basu Abhipsa","key":"e_1_3_2_1_4_1","unstructured":"Abhipsa Basu, Sravanti Addepalli, and R. Venkatesh Babu. 2023. RMLVQA: A Margin Loss Approach for Visual Question Answering With Language Biases. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 11671--11680."},{"key":"e_1_3_2_1_5_1","volume-title":"Rubi: Reducing unimodal biases in visual question answering. In Advances in Neural Information Processing Systems.","author":"Cadene Remi","year":"2019","unstructured":"Remi Cadene, Corentin Dancette, Hedi Ben-younes, Matthieu Cord, and Devi Parikh. 2019. Rubi: Reducing unimodal biases in visual question answering. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i20.30196"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01081"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_6"},{"volume-title":"International conference on machine learning. 1597--1607","author":"Chen Ting","key":"e_1_3_2_1_9_1","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey E. Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. 1597--1607."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01124"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1418"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Krioukov D Papadopoulos F and Kitsak M. 2010. Hyperbolic geometry of complex networks. Physical Review E?Statistical Nonlinear and Soft Matter Physics Vol. 82 3 (2010).","DOI":"10.1103\/PhysRevE.82.036106"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i19.30100"},{"key":"e_1_3_2_1_14_1","volume-title":"Statistical analysis of spherical data","author":"Fisher Nicholas I","year":"1993","unstructured":"Nicholas I Fisher, Toby Lewis, and Brian JJ Embleton. 1993. Statistical analysis of spherical data. Cambridge university press (1993)."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the Advances in Neural Information Processing Systems.","author":"Garg Saurabh","year":"2023","unstructured":"Saurabh Garg, Amrith Setlur, Zachary Lipton, Sivaraman Balakrishnan, Virginia Smith, and Aditi Raghunathan. 2023. Complementary Benefits of Contrastive Learning and Self-Training Under Distribution Shift. In Proceedings of the Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29771"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/98"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00502"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00161"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3240337"},{"key":"e_1_3_2_1_22_1","volume-title":"Visual Perturbation-aware Collaborative Learning for Overcoming the Language Prior Problem. arXiv preprint arXiv:2207.11850","author":"Han Yudong","year":"2022","unstructured":"Yudong Han, Liqiang Nie, Jianhua Yin, Jianlong Wu, and Yan Yan. 2022. Visual Perturbation-aware Collaborative Learning for Overcoming the Language Prior Problem. arXiv preprint arXiv:2207.11850 (2022)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018385"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/3618408.3618926"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00114"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.3008248"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 2572--2581","author":"Kolling Camila","key":"e_1_3_2_1_28_1","unstructured":"Camila Kolling, Martin D. More, Nathan Gavenski, Eduardo H. P. Pooch, Ot\u00e1vio Parraga, and Rodrigo C. 2022. Efficient counterfactual debiasing for visual question answering. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 2572--2581."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00336"},{"key":"e_1_3_2_1_30_1","first-page":"5032","article-title":"A Multi-modal Debiasing Model with Dynamical Constraint for Robust Visual Question Answering. In Findings of the Association for Computational Linguistics","volume":"2023","author":"Li Yu","year":"2023","unstructured":"Yu Li, Bojie Hu, Fengshuo Zhang, Yahan Yu, Jian Liu, Yufeng Chen, and Jinan Xu. 2023. A Multi-modal Debiasing Model with Dynamical Constraint for Robust Visual Question Answering. In Findings of the Association for Computational Linguistics : ACL 2023. 5032--5045.","journal-title":"ACL"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.265"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00373"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning. 3122--3130","author":"Lipton Zachary","year":"2018","unstructured":"Zachary Lipton, Yu-Xiang Wang, and Alexander Smola. 2018. Detecting and Correcting for Label Shift with Black Box Predictors. In Proceedings of the 35th International Conference on Machine Learning. 3122--3130."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3026892"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.713"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of The 33rd International Conference on Machine Learning. 507--516","author":"Liu Weiyang","year":"2016","unstructured":"Weiyang Liu, Yandong Wen, Zhiding Yu, and Meng Yang. 2016. Large-Margin Softmax Loss for Convolutional Neural Networks. In Proceedings of The 33rd International Conference on Machine Learning. 507--516."},{"key":"e_1_3_2_1_37_1","volume-title":"Deep Fuzzy Multi-Teacher Distillation Network for Medical Visual Question Answering","author":"Liu Yishu","year":"2024","unstructured":"Yishu Liu, Bingzhi Chen, Shuihua Wang, Guangming Lu, and Zheng Zhang. 2024. Deep Fuzzy Multi-Teacher Distillation Network for Medical Visual Question Answering. IEEE Transactions on Fuzzy Systems (2024), 1--15."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2024.3355640"},{"key":"e_1_3_2_1_39_1","volume-title":"Nicholas","author":"Pearl Judea","year":"2016","unstructured":"Judea Pearl, Glymour Madelyn, and P. Jewell. Nicholas. 2016. Causal inference in statistics: A primer. John Wiley & Sons (2016)."},{"key":"e_1_3_2_1_40_1","unstructured":"Sainandan Ramakrishnan Aishwarya Agrawal and Stefan Lee. 2018. Overcoming language priors in visual question answering with adversarial regularization. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00727"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3067067"},{"key":"e_1_3_2_1_43_1","volume-title":"Multi-Facet Recommender Networks with Spherical Optimization. In 2021 IEEE 37th International Conference on Data Engineering. 1524--1535","author":"Tan Yanchao","year":"2021","unstructured":"Yanchao Tan, Carl Yang, Xiangyu Wei, Yun Ma, and Xiaolin Zheng. 2021. Multi-Facet Recommender Networks with Spherical Optimization. In 2021 IEEE 37th International Conference on Data Engineering. 1524--1535."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"e_1_3_2_1_45_1","volume-title":"Cross Modality Bias in Visual Question Answering: A Causal View with Possible Worlds VQA","author":"Vosoughi Ali","year":"2024","unstructured":"Ali Vosoughi, Shijian Deng, Songyang Zhang, Yapeng Tian, Chenliang Xu, and Jiebo Luo. 2024. Cross Modality Bias in Visual Question Answering: A Causal View with Possible Worlds VQA. IEEE Transactions on Multimedia (2024)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548155"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i9.28905"},{"key":"e_1_3_2_1_48_1","volume-title":"International conference on machine learning. 9929--9939","author":"Wang Tongzhou","year":"2020","unstructured":"Tongzhou Wang and Phillip Isola. 2020. Understanding contrastive representation learning through alignment and uniformity on the hypersphere. In International conference on machine learning. 9929--9939."},{"key":"e_1_3_2_1_49_1","unstructured":"Zhiquan Wen Yaowei Wang Mingkui Tan Qingyao Wu and Qi Wu. 2023. Digging out Discrimination Information from Generated Samples for Robust Visual Question Answering. In Findings of the Association for Computational Linguistics. 6910--6928."},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the Advances in Neural Information Processing Systems. 3784--3796","author":"Wen Zhiquan","year":"2021","unstructured":"Zhiquan Wen, Guanghui Xu, Mingkui Tan, Qingyao Wu, and Qi Wu. 2021. Debiased Visual Question Answering from Feature and Sample Perspectives. In Proceedings of the Advances in Neural Information Processing Systems. 3784--3796."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00047"},{"key":"e_1_3_2_1_52_1","volume-title":"X2-Softmax: Margin adaptive loss function for face recognition. Expert Systems with Applications","author":"Xu Jiamu","year":"2024","unstructured":"Jiamu Xu, Xiaoxiang Liu, Xinyuan Zhang, Yain-Whar Si, Xiaofan Li, Zheng Shi, Ke Wang, and Xueyuan Gong. 2024. X2-Softmax: Margin adaptive loss function for face recognition. Expert Systems with Applications (2024)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.5555\/3618408.3620042"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2011.6115851"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00515"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2024.3368341"},{"key":"e_1_3_2_1_57_1","volume-title":"Combating Representation Learning Disparity with Geometric Harmonization. In Thirty-seventh Conference on Neural Information Processing Systems.","author":"Zhou Zhihan","year":"2023","unstructured":"Zhihan Zhou, Jiangchao Yao, Feng Hong, Ya Zhang, Bo Han, and Yanfeng Wang. 2023. Combating Representation Learning Disparity with Geometric Harmonization. In Thirty-seventh Conference on Neural Information Processing Systems."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681663","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681663","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:50Z","timestamp":1750295870000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681663"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":57,"alternative-id":["10.1145\/3664647.3681663","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681663","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}