{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T19:45:50Z","timestamp":1768074350064,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":24,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819500130","type":"print"},{"value":"9789819500147","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-95-0014-7_27","type":"book-chapter","created":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T10:05:19Z","timestamp":1753351519000},"page":"316-327","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Prompting Large Models for Knowledge and Reasoning Augmentation in KB-VQA"],"prefix":"10.1007","author":[{"given":"Qiang","family":"Liu","sequence":"first","affiliation":[]},{"given":"Mengxi","family":"Ying","sequence":"additional","affiliation":[]},{"given":"Peng","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Gan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xinpan","family":"Yuan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,25]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: Vqa: Visual question answering. In: Proceedings of the IEEE international conference on computer vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"27_CR2","unstructured":"Alayrac, J.-B., et al.: Flamingo: a visual language model for few-shot learning. Advances in Neural Information Processing Systems 35, 23 716\u201323 and 736 (2022)"},{"issue":"4","key":"27_CR3","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1023\/B:BTTJ.0000047600.45421.6d","volume":"22","author":"H Liu","year":"2004","unstructured":"Liu, H., Singh, P.: Conceptnet\u2014a practical commonsense reasoning tool-kit. BT Technol. J. 22(4), 211\u2013226 (2004)","journal-title":"BT Technol. J."},{"key":"27_CR4","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: An empirical study of gpt-3 for few-shot knowledge-based vqa. In: Proceedings of the AAAI conference on artificial intelligence, vol. 36, no. 3, pp. 3081\u20133089 (2022)","DOI":"10.1609\/aaai.v36i3.20215"},{"issue":"10","key":"27_CR5","doi-asserted-by":"publisher","first-page":"2413","DOI":"10.1109\/TPAMI.2017.2754246","volume":"40","author":"P Wang","year":"2017","unstructured":"Wang, P., Wu, Q., Shen, C., Dick, A., Van Den Hengel, A.: Fvqa: Fact-based visual question answering. IEEE Trans. Pattern Anal. Mach. Intell. 40(10), 2413\u20132427 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Marino, K., Rastegari, M., Farhadi, A., Mottaghi, R.: Ok-vqa: A visual question answering benchmark requiring external knowledge. In Proceedings of the IEEE\/cvf conference on computer vision and pattern recognition, pp. 3195\u20133204 (2019)","DOI":"10.1109\/CVPR.2019.00331"},{"key":"27_CR7","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Shao, Z., Yu, Z., Wang, M., Yu, J.: Prompting large language models with answer heuristics for knowledge-based visual question answering. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, p. 14 974\u201314 983 (2023)","DOI":"10.1109\/CVPR52729.2023.01438"},{"key":"27_CR9","unstructured":"Hu, Y., et al.: Promptcap: Prompt-guided task-aware image captioning. arXiv preprint arXiv:2211.09699 (2022)"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Chen, A., Stanovsky, G., Singh, S., Gardner, M.: Evaluating question answering evaluation. In: Proceedings of the 2nd workshop on machine reading for question answering, pp. 119\u2013124 (2019)","DOI":"10.18653\/v1\/D19-5817"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Manas, O., Krojer, B., Agrawal, A.: Improving automatic vqa evaluation using large language models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, no. 5, pp. 4171\u20134179 (2024)","DOI":"10.1609\/aaai.v38i5.28212"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Gui, L., et al.: Kat: A knowledge augmented transformer for vision-and-language. arXiv preprint arXiv:2112.08614 (2021)","DOI":"10.18653\/v1\/2022.naacl-main.70"},{"key":"27_CR13","unstructured":"Lin, Y., et al.: Revive: Regional visual representation matters in knowledge-based visual question answering. Advances in Neural Information Processing Systems 35, 10 560\u201310 571 (2022)"},{"key":"27_CR14","unstructured":"Li, L.H., et al.: Grounded language-image pretraining. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10 965\u201310 975 (2022)"},{"key":"27_CR15","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the 38th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research, vol. 139. PMLR, 18\u201324, pp. 8748\u20138763 (2021)"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Cui, Y., Tao, D., Tian, Q.: Deep modular coattention networks for visual question answering. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 6281\u20136290 (2019)","DOI":"10.1109\/CVPR.2019.00644"},{"key":"27_CR17","doi-asserted-by":"crossref","unstructured":"Wu, J., Lu, J., Sabharwal, A., Mottaghi, R.: Multi-modal answer validation for knowledge-based vqa. In: Proceedings of the AAAI conference on artificial intelligence, vol. 36, no. 3, pp. 2712\u20132721 (2022)","DOI":"10.1609\/aaai.v36i3.20174"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Garderes, F., Ziaeefard, M., Abeloos, B., Lecue, F.: Conceptbert: Concept-aware representation for visual question answering. In: Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 489\u2013498 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.44"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Feng, A., et al.: Caption matters: a new perspective for knowledge based visual question answering. Knowledge and Information Systems, vol. 66, no. 11, pp. 6975\u20137003 (2024)","DOI":"10.1007\/s10115-024-02166-8"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Gao, F., et al.: Transform-retrieve-generate: Natural language-centric outside knowledge visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5067\u20135077 (2022)","DOI":"10.1109\/CVPR52688.2022.00501"},{"key":"27_CR21","unstructured":"Lu, J., Clark, C., Zellers, R., Mottaghi, R., Kembhavi, A.: Unified-io: A unified model for vision, language, and multi-modal tasks. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"27_CR22","unstructured":"Bai, J., et al.: Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Chen, Z., et al.: Visual chain-of-thought prompting for knowledge-based visual reasoning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, pp. 1254\u20131262 (2024)","DOI":"10.1609\/aaai.v38i2.27888"},{"key":"27_CR24","unstructured":"Guo, J., et al.: From images to textual prompts: Zero-shot visual question answering with frozen large language models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp.10 867\u201310 877 (2023)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-0014-7_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T21:37:50Z","timestamp":1757281070000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-0014-7_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819500130","9789819500147"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-0014-7_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"25 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}