{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T20:07:28Z","timestamp":1780344448716,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,6]],"date-time":"2023-12-06T00:00:00Z","timestamp":1701820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906044"],"award-info":[{"award-number":["61906044"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key projects of natural science research in Anhui colleges and universities","award":["2023AH050406, 2023AH050418, 2022AH051324, KJ2020ZD48 and gxgwfx2021034"],"award-info":[{"award-number":["2023AH050406, 2023AH050418, 2022AH051324, KJ2020ZD48 and gxgwfx2021034"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2020M681984"],"award-info":[{"award-number":["2020M681984"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,6]]},"DOI":"10.1145\/3595916.3626381","type":"proceedings-article","created":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T16:34:41Z","timestamp":1704126881000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Adaptive Fusion for Visual Question Answering: Integrating Multi-Label Classification and Similarity Matching"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-4753-3529","authenticated-orcid":false,"given":"Zhengtao","family":"Yu","sequence":"first","affiliation":[{"name":"Fuyang Normal University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7440-0109","authenticated-orcid":false,"given":"Jia","family":"Zhao","sequence":"additional","affiliation":[{"name":"Fuyang Normal University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2962-0619","authenticated-orcid":false,"given":"Huiling","family":"Wang","sequence":"additional","affiliation":[{"name":"Fuyang normal university, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7417-2265","authenticated-orcid":false,"given":"Chenliang","family":"Guo","sequence":"additional","affiliation":[{"name":"Fuyang Normal University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9000-8228","authenticated-orcid":false,"given":"Tong","family":"Zhou","sequence":"additional","affiliation":[{"name":"Fuyang Normal University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1651-1229","authenticated-orcid":false,"given":"Chongxiang","family":"Sun","sequence":"additional","affiliation":[{"name":"Fuyang Normal University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,1]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00522"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_1_4_1","volume-title":"RUBi: Reducing Unimodal Biases for Visual Question Answering. (Dec","author":"Cadene Remi","year":"2019","unstructured":"Remi Cadene , Corentin Dancette , Hedi Ben-younes, Matthieu Cord , and Devi Parikh . 2019. RUBi: Reducing Unimodal Biases for Visual Question Answering. (Dec 2019 ). Remi Cadene, Corentin Dancette, Hedi Ben-younes, Matthieu Cord, and Devi Parikh. 2019. RUBi: Reducing Unimodal Biases for Visual Question Answering. (Dec 2019)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3290012"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897273"},{"key":"e_1_3_2_1_8_1","volume-title":"MUTANT: A Training Paradigm for Out-of-Distribution Generalization in Visual Question Answering. In Conference on Empirical Methods in Natural Language Processing.","author":"Gokhale Tejas","year":"2020","unstructured":"Tejas Gokhale , Pratyay Banerjee , Chitta Baral , and Yezhou Yang . 2020 . MUTANT: A Training Paradigm for Out-of-Distribution Generalization in Visual Question Answering. In Conference on Empirical Methods in Natural Language Processing. Tejas Gokhale, Pratyay Banerjee, Chitta Baral, and Yezhou Yang. 2020. MUTANT: A Training Paradigm for Out-of-Distribution Generalization in Visual Question Answering. In Conference on Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2022.3185008"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"e_1_3_2_1_11_1","unstructured":"Yangyang Guo Liqiang Nie Zhiyong Cheng Feng Ji Ji Zhang and A. Bimbo. 2021. AdaVQA: Overcoming Language Priors with Adapted Margin Cosine Loss. ArXiv abs\/2105.01993 (2021).  Yangyang Guo Liqiang Nie Zhiyong Cheng Feng Ji Ji Zhang and A. Bimbo. 2021. AdaVQA: Overcoming Language Priors with Adapted Margin Cosine Loss. ArXiv abs\/2105.01993 (2021)."},{"key":"e_1_3_2_1_12_1","first-page":"227","article-title":"Loss Re-Scaling VQA: Revisiting the Language Prior Problem From a Class-Imbalance View","volume":"31","author":"Guo Yangyang","year":"2020","unstructured":"Yangyang Guo , Liqiang Nie , Zhiyong Cheng , Qi Tian , and Min Zhang . 2020 . Loss Re-Scaling VQA: Revisiting the Language Prior Problem From a Class-Imbalance View . IEEE Transactions on Image Processing 31 (2020), 227 \u2013 238 . Yangyang Guo, Liqiang Nie, Zhiyong Cheng, Qi Tian, and Min Zhang. 2020. Loss Re-Scaling VQA: Revisiting the Language Prior Problem From a Class-Imbalance View. IEEE Transactions on Image Processing 31 (2020), 227\u2013238.","journal-title":"IEEE Transactions on Image Processing"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2207.11850"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3149958"},{"key":"e_1_3_2_1_15_1","volume-title":"Revisiting Visual Question Answering Baselines. CoRR abs\/1606.08390","author":"Jabri Allan","year":"2016","unstructured":"Allan Jabri , Armand Joulin , and Laurens van\u00a0der Maaten . 2016. Revisiting Visual Question Answering Baselines. CoRR abs\/1606.08390 ( 2016 ). arXiv:1606.08390http:\/\/arxiv.org\/abs\/1606.08390 Allan Jabri, Armand Joulin, and Laurens van\u00a0der Maaten. 2016. Revisiting Visual Question Answering Baselines. CoRR abs\/1606.08390 (2016). arXiv:1606.08390http:\/\/arxiv.org\/abs\/1606.08390"},{"key":"e_1_3_2_1_16_1","volume-title":"Self-supervised vision-language pretraining for Medical visual question answering. ArXiv abs\/2211.13594","author":"Li Pengfei","year":"2022","unstructured":"Pengfei Li , Gang Liu , Lin Tan , Jinying Liao , and Shenjun Zhong . 2022. Self-supervised vision-language pretraining for Medical visual question answering. ArXiv abs\/2211.13594 ( 2022 ). Pengfei Li, Gang Liu, Lin Tan, Jinying Liao, and Shenjun Zhong. 2022. Self-supervised vision-language pretraining for Medical visual question answering. ArXiv abs\/2211.13594 (2022)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498340"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01251"},{"key":"e_1_3_2_1_19_1","unstructured":"Yulei Niu and Hanwang Zhang. 2021. Introspective Distillation for Robust Question Answering. In Neural Information Processing Systems.  Yulei Niu and Hanwang Zhang. 2021. Introspective Distillation for Robust Question Answering. In Neural Information Processing Systems."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2302.07920"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10150-3"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00569"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2204.13399"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPSN54338.2022.00029"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_26_1","volume-title":"Zero-Shot Visual Question Answering. CoRR abs\/1611.05546","author":"Teney Damien","year":"2016","unstructured":"Damien Teney and Anton van\u00a0den Hengel . 2016. Zero-Shot Visual Question Answering. CoRR abs\/1611.05546 ( 2016 ). arXiv:1611.05546http:\/\/arxiv.org\/abs\/1611.05546 Damien Teney and Anton van\u00a0den Hengel. 2016. Zero-Shot Visual Question Answering. CoRR abs\/1611.05546 (2016). arXiv:1611.05546http:\/\/arxiv.org\/abs\/1611.05546"},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Computational Linguistics.","author":"Wu Yike","year":"2022","unstructured":"Yike Wu , Yu Zhao , Shiwan Zhao , Ying Zhang , Xiaojie Yuan , Guoqing Zhao , and Ning Jiang . 2022 . Overcoming Language Priors in Visual Question Answering via Distinguishing Superficially Similar Instances . In International Conference on Computational Linguistics. Yike Wu, Yu Zhao, Shiwan Zhao, Ying Zhang, Xiaojie Yuan, Guoqing Zhao, and Ning Jiang. 2022. Overcoming Language Priors in Visual Question Answering via Distinguishing Superficially Similar Instances. In International Conference on Computational Linguistics."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3155488"},{"key":"e_1_3_2_1_29_1","volume-title":"Unified Focal loss: Generalising Dice and cross entropy-based losses to handle class imbalanced medical image segmentation. Computerized Medical Imaging and Graphics 95","author":"Yeung Michael","year":"2021","unstructured":"Michael Yeung , Evis Sala , Carola-Bibiane Sch\u00f6nlieb , and Leonardo Rundo . 2021. Unified Focal loss: Generalising Dice and cross entropy-based losses to handle class imbalanced medical image segmentation. Computerized Medical Imaging and Graphics 95 ( 2021 ). Michael Yeung, Evis Sala, Carola-Bibiane Sch\u00f6nlieb, and Leonardo Rundo. 2021. Unified Focal loss: Generalising Dice and cross entropy-based losses to handle class imbalanced medical image segmentation. Computerized Medical Imaging and Graphics 95 (2021)."},{"key":"e_1_3_2_1_30_1","volume-title":"Yin and Yang: Balancing and Answering Binary Visual Questions. CoRR abs\/1511.05099","author":"Zhang Peng","year":"2015","unstructured":"Peng Zhang , Yash Goyal , Douglas Summers-Stay , Dhruv Batra , and Devi Parikh . 2015. Yin and Yang: Balancing and Answering Binary Visual Questions. CoRR abs\/1511.05099 ( 2015 ). arXiv:1511.05099http:\/\/arxiv.org\/abs\/1511.05099 Peng Zhang, Yash Goyal, Douglas Summers-Stay, Dhruv Batra, and Devi Parikh. 2015. Yin and Yang: Balancing and Answering Binary Visual Questions. CoRR abs\/1511.05099 (2015). arXiv:1511.05099http:\/\/arxiv.org\/abs\/1511.05099"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-022-06923-0"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.5555\/3491440.3491591"}],"event":{"name":"MMAsia '23: ACM Multimedia Asia","location":"Tainan Taiwan","acronym":"MMAsia '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["ACM Multimedia Asia 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3595916.3626381","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3595916.3626381","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:48:40Z","timestamp":1750286920000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3595916.3626381"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,6]]},"references-count":32,"alternative-id":["10.1145\/3595916.3626381","10.1145\/3595916"],"URL":"https:\/\/doi.org\/10.1145\/3595916.3626381","relation":{},"subject":[],"published":{"date-parts":[[2023,12,6]]},"assertion":[{"value":"2024-01-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}