{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,23]],"date-time":"2026-05-23T01:04:31Z","timestamp":1779498271046,"version":"3.53.1"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031039478","type":"print"},{"value":"9783031039485","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-03948-5_23","type":"book-chapter","created":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T10:07:17Z","timestamp":1653300437000},"page":"290-298","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Self-supervised Strategy for the Robustness of VQA Models"],"prefix":"10.1007","author":[{"given":"Jingyu","family":"Su","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chuanhao","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenchen","family":"Jing","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuwei","family":"Wu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,4,25]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"23_CR2","unstructured":"Ramakrishnan, S., Agrawal, A., Lee, S.: Overcoming language priors in visual question answering with adversarial regularization. In: Advances in Neural Information Processing Systems, pp. 1541\u20131551 (2018)"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Cadene, R., Ben-Younes, H., Cord, M., Thome, N.: Murel: multimodal relational reasoning for visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1989\u20131998 (2019)","DOI":"10.1109\/CVPR.2019.00209"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Zhu, X., Mao, Z., Liu, C., Zhang, P., Wang, B., Zhang, Y.: Overcoming language priors with self-supervised learning for visual question answering. In: International Joint Conference on Artificial Intelligence, pp. 1083\u20131089 (2020)","DOI":"10.24963\/ijcai.2020\/151"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Chen, L., Yan, X., Xiao, J., Zhang, H., Pu, S., Zhuang, Y.: Counterfactual samples synthesizing for robust visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10800\u201310809 (2020)","DOI":"10.1109\/CVPR42600.2020.01081"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Clark, C., Yatskar, M., Zettlemoyer, L.: Don\u2019t take the easy way out: ensemble based methods for avoiding known dataset biases. In: Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, pp. 4060\u20134073 (2019)","DOI":"10.18653\/v1\/D19-1418"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Abbasnejad, E., Teney, D., Parvaneh, A., Shi, J., Hengel, A.V.D.: Counterfactual vision and language learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10044\u201310054 (2020)","DOI":"10.1109\/CVPR42600.2020.01006"},{"key":"23_CR8","doi-asserted-by":"crossref","unstructured":"Jing, C., Wu, Y., Zhang, X., Jia, Y., Wu, Q.: Overcoming language priors in VQA via decomposed linguistic representations. In: AAAI Conference on Artificial Intelligence, vol. 34, no. 07, pp. 11181\u201311188 (2020)","DOI":"10.1609\/aaai.v34i07.6776"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6077\u20136086 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., Smola, A.: Stacked attention networks for image question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 21\u201329 (2016)","DOI":"10.1109\/CVPR.2016.10"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Fukui, A., Park, D.H., Yang, D., Rohrbach, A., Darrell, T., Rohrbach, M.: Multimodal compact bilinear pooling for visual question answering and visual grounding. In: Empirical Methods in Natural Language Processing, pp. 457\u2013468 (2016)","DOI":"10.18653\/v1\/D16-1044"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Agrawal, A., Batra, D., Parikh, D., Kembhavi, A.: Don\u2019t just assume; look and answer: overcoming priors for visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4971\u20134980 (2018)","DOI":"10.1109\/CVPR.2018.00522"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., et al.: Taking a hint: leveraging explanations to make vision and language models more grounded. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2591\u20132600 (2019)","DOI":"10.1109\/ICCV.2019.00268"},{"key":"23_CR14","unstructured":"Wu, J., Mooney, R.J.: Self-critical reasoning for robust visual question answering. In: Advances in Neural Information Processing Systems, pp. 8601\u20138611 (2019)"},{"key":"23_CR15","unstructured":"Cadene, R., Dancette C., Cord M., Parikh D.: Rubi: reducing unimodal biases for visual question answering. In: Advances in Neural Information Processing Systems, pp. 841\u2013852 (2019)"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the V in VQA matter: elevating the role of image understanding in visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6904\u20136913 (2017)","DOI":"10.1109\/CVPR.2017.670"},{"key":"23_CR17","volume-title":"Natural Language Processing with Python: Analyzing Text with the Natural Language Toolkit","author":"S Bird","year":"2009","unstructured":"Bird, S., Klein, E., Loper, E.: Natural Language Processing with Python: Analyzing Text with the Natural Language Toolkit. O\u2019Reilly Media Inc., Newton (2009)"}],"container-title":["IFIP Advances in Information and Communication Technology","Intelligent Information Processing XI"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-03948-5_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,23]],"date-time":"2026-05-23T00:04:59Z","timestamp":1779494699000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-03948-5_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031039478","9783031039485"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-03948-5_23","relation":{},"ISSN":["1868-4238","1868-422X"],"issn-type":[{"value":"1868-4238","type":"print"},{"value":"1868-422X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"25 April 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Qingdao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 May 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 May 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iip2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.intsci.ac.cn\/iip2022","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 reviews per paper","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"56","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}