{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T05:39:59Z","timestamp":1751607599232,"version":"3.30.2"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,8,16]],"date-time":"2024-08-16T00:00:00Z","timestamp":1723766400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,8,16]],"date-time":"2024-08-16T00:00:00Z","timestamp":1723766400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,8,16]]},"DOI":"10.1109\/besc64747.2024.10780559","type":"proceedings-article","created":{"date-parts":[[2024,12,12]],"date-time":"2024-12-12T19:07:29Z","timestamp":1734030449000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Effective Retrieval Augmentation for Knowledge-Based Vision Question Answering"],"prefix":"10.1109","author":[{"given":"Jiaqi","family":"Deng","sequence":"first","affiliation":[{"name":"School of Computer Science, University of Technology Sydney,Sydney,Australia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00501"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.375"},{"key":"ref3","volume":"11","author":"Yasunaga","year":"2022","journal-title":"Retrieval-Augmented Multi-modal Language Modeling"},{"key":"ref4","volume":"10","author":"Devlin","year":"2018","journal-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.716"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613848"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.70"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_2"},{"key":"ref9","volume":"11","author":"Vinyals","year":"2014","journal-title":"Show and Tell: A Neural Image Caption Generator"},{"key":"ref10","volume":"1","author":"Yu","year":"2018","journal-title":"Interactive Grounded Language Acquisition and Generalization in a 2D World"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.10"},{"key":"ref12","volume":"5","author":"Lu","year":"2016","journal-title":"Hierarchical Question-Image Co-Attention for Visual Question Answering"},{"key":"ref13","volume":"5","author":"Kim","year":"2018","journal-title":"Bilinear Attention Networks"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/springerreference_9081"},{"key":"ref15","volume":"4","author":"Alayrac","year":"2022","journal-title":"Flamingo: a Visual Language Model for Few-Shot Learning"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/springerreference_9081"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00503"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20174"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547870"},{"key":"ref20","volume":"12","author":"Yang","year":"2022","journal-title":"Enhancing Multi-modal and Multi-hop Question Answering via Structured Knowledge and Unified Retrieval-Generation"},{"key":"ref21","volume":"12","author":"Speer","year":"2016","journal-title":"ConceptNet 5.5: An Open Multilingual Graph of General Knowledge"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2629489"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462987"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.772"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.517"},{"key":"ref26","volume":"9","author":"Lin","year":"2023","journal-title":"Fine-grained Late-interaction Multi-modal Retrieval for Retrieval Augmented Visual Question Answering"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591629"},{"key":"ref28","volume":"6","author":"Lin","year":"2022","journal-title":"REVIVE: Regional Visual Representation Matters in Knowledge-Based Visual Question Answering"},{"key":"ref29","volume":"9","author":"Yang","year":"2021","journal-title":"An Empirical Study of GPT-3 for Few-Shot Knowledge-Based VQA"},{"key":"ref30","volume":"3","author":"Yu","year":"2023","journal-title":"Prophet: Prompting Large Language Models with Complementary Answer Heuristics for Knowledge-based Visual Question Answering"},{"key":"ref31","volume":"2","author":"Ma","year":"2024","journal-title":"GeReA: Question-Aware Prompt Captions for Knowledge-based Visual Question Answering"},{"key":"ref32","volume":"5","author":"Dai","year":"2023","journal-title":"InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning"},{"key":"ref33","volume":"12","author":"Hu","year":"2022","journal-title":"REVEAL: Retrieval-Augmented Visual-Language Pre-Training with Multi-Source Multimodal Knowledge Memory"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01454"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"ref36","volume":"6","author":"Wang","year":"2016","journal-title":"FVQA: Fact-based Visual Question Answering"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.925"}],"event":{"name":"2024 11th International Conference on Behavioural and Social Computing (BESC)","start":{"date-parts":[[2024,8,16]]},"location":"Harbin, China","end":{"date-parts":[[2024,8,18]]}},"container-title":["2024 11th International Conference on Behavioural and Social Computing (BESC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10779601\/10780446\/10780559.pdf?arnumber=10780559","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,13]],"date-time":"2024-12-13T06:29:27Z","timestamp":1734071367000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10780559\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,16]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/besc64747.2024.10780559","relation":{},"subject":[],"published":{"date-parts":[[2024,8,16]]}}}