{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T00:06:32Z","timestamp":1769558792157,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":22,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819755004","type":"print"},{"value":"9789819755011","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5501-1_9","type":"book-chapter","created":{"date-parts":[[2024,7,26]],"date-time":"2024-07-26T03:48:02Z","timestamp":1721965682000},"page":"109-120","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["IIU: Independent Inference Units for\u00a0Knowledge-Based Visual Question Answering"],"prefix":"10.1007","author":[{"given":"Yili","family":"Li","sequence":"first","affiliation":[]},{"given":"Jing","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Keke","family":"Gai","sequence":"additional","affiliation":[]},{"given":"Gang","family":"Xiong","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,27]]},"reference":[{"key":"9_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1007\/978-3-319-46454-1_24","volume-title":"Computer Vision \u2013 ECCV 2016","author":"P Anderson","year":"2016","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: SPICE: Semantic Propositional Image Caption Evaluation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 382\u2013398. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_24"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6077\u20136086 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Ben-Younes, H., Cadene, R., Cord, M., Thome, N.: Mutan: Multimodal tucker fusion for visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.285"},{"key":"9_CR4","first-page":"2980","volume":"28","author":"J Chung","year":"2015","unstructured":"Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A.C., Bengio, Y.: A recurrent latent variable model for sequential data. Adv. Neural. Inf. Process. Syst. 28, 2980\u20132988 (2015)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Garderes, F., Ziaeefard, M., Abeloos, B., Lecue, F.: Conceptbert: Concept-aware representation for visual question answering. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings, pp. 489\u2013498 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.44"},{"key":"9_CR6","unstructured":"Goyal, A., Lamb, A., Hoffmann, J., Sodhani, S., Levine, S., Bengio, Y., Sch\u00f6lkopf, B.: Recurrent independent mechanisms. In: 9th International Conference on Learning Representations (2021)"},{"key":"9_CR7","unstructured":"Kim, J.H., Jun, J., Zhang, B.T.: Bilinear attention networks. In: Advances in Neural Information Processing Systems (2018)"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Li, H., Wang, P., Shen, C., Hengel, A.v.d.: Visual question answering as reading comprehension. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6319\u20136328 (2019)","DOI":"10.1109\/CVPR.2019.00648"},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Li, L., Gan, Z., Cheng, Y., Liu, J.: Relation-aware graph attention network for visual question answering. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10313\u201310322 (2019)","DOI":"10.1109\/ICCV.2019.01041"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Li, S., Li, W., Cook, C., Zhu, C., Gao, Y.: Independently recurrent neural network (indrnn): Building a longer and deeper RNN. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5457\u20135466 (2018)","DOI":"10.1109\/CVPR.2018.00572"},{"key":"9_CR11","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: Advances in Neural Information Processing Systems (2019)"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Marino, K., Rastegari, M., Farhadi, A., Mottaghi, R.: Ok-vqa: A visual question answering benchmark requiring external knowledge. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3195\u20133204 (2019)","DOI":"10.1109\/CVPR.2019.00331"},{"key":"9_CR13","unstructured":"Narasimhan, M., Lazebnik, S., Schwing, A.G.: Out of the box: Reasoning with graph convolution nets for factual visual question answering. In: Advances in Neural Information Processing Systems (2018)"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Narasimhan, M., Schwing, A.G.: Straight to the facts: Learning knowledge base retrieval for factual visual question answering. In: Proceedings of the European conference on computer vision (ECCV), pp. 451\u2013468 (2018)","DOI":"10.1007\/978-3-030-01237-3_28"},{"key":"9_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.118669","volume":"212","author":"A Salaberria","year":"2023","unstructured":"Salaberria, A., Azkune, G., de Lacalle, O.L., Soroa, A., Agirre, E.: Image captioning for effective use of language models in knowledge-based visual question answering. Expert Syst. Appl. 212, 118669 (2023)","journal-title":"Expert Syst. Appl."},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Tan, H., Bansal, M.: LXMERT: learning cross-modality encoder representations from transformers. In: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP) (2019)","DOI":"10.18653\/v1\/D19-1514"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"Wang, P., Wu, Q., Cao, J., Shen, C., Gao, L., Hengel, A.v.d.: Neighbourhood watch: Referring expression comprehension via language-guided graph attention networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1960\u20131968 (2019)","DOI":"10.1109\/CVPR.2019.00206"},{"key":"9_CR18","doi-asserted-by":"publisher","first-page":"2413","DOI":"10.1109\/TPAMI.2017.2754246","volume":"40","author":"P Wang","year":"2017","unstructured":"Wang, P., Wu, Q., Shen, C., Dick, A., Van Den Hengel, A.: FVQA: fact-based visual question answering. IEEE Trans. Pattern Anal. Mach. Intell. 40, 2413\u20132427 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"9_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1007\/978-3-319-46478-7_28","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Huijuan Xu","year":"2016","unstructured":"Xu, Huijuan, Saenko, Kate: Ask, attend and answer: exploring question-guided spatial attention for visual question answering. In: Leibe, Bastian, Matas, Jiri, Sebe, Nicu, Welling, Max (eds.) ECCV 2016. LNCS, vol. 9911, pp. 451\u2013466. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_28"},{"key":"9_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107563","volume":"108","author":"J Yu","year":"2020","unstructured":"Yu, J., Zhu, Z., Wang, Y., Zhang, W., Hu, Y., Tan, J.: Cross-modal knowledge reasoning for knowledge-based visual question answering. Pattern Recogn. 108, 107563 (2020)","journal-title":"Pattern Recogn."},{"key":"9_CR21","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.inffus.2020.10.007","volume":"67","author":"W Zheng","year":"2021","unstructured":"Zheng, W., Yan, L., Gou, C., Wang, F.Y.: KM4: visual reasoning via knowledge embedding memory model with mutual modulation. Inform. Fusion 67, 14\u201328 (2021)","journal-title":"Inform. Fusion"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Yu, J., Wang, Y., Sun, Y., Hu, Y., Wu, Q.: Mucko: multi-layer cross-modal knowledge reasoning for fact-based visual question answering. In: Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence (2020)","DOI":"10.24963\/ijcai.2020\/153"}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5501-1_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,26]],"date-time":"2024-07-26T04:06:13Z","timestamp":1721966773000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5501-1_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819755004","9789819755011"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5501-1_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"27 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Birmingham","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ai-edge.net\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}