{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T05:57:53Z","timestamp":1742968673525,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031466632"},{"type":"electronic","value":"9783031466649"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-46664-9_41","type":"book-chapter","created":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T13:02:29Z","timestamp":1699102949000},"page":"614-628","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Knowledge-Enhanced Inferential Network for\u00a0Cross-Modality Multi-hop VQA"],"prefix":"10.1007","author":[{"given":"Shiqi","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianxing","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Miaopei","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuang","family":"Qiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofeng","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Yin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,5]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6077\u20136086 (2018)","key":"41_CR1","DOI":"10.1109\/CVPR.2018.00636"},{"doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","key":"41_CR2","DOI":"10.1109\/ICCV.2015.279"},{"doi-asserted-by":"crossref","unstructured":"Ben-Younes, H., Cadene, R., Cord, M., Thome, N.: Mutan: multimodal tucker fusion for visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2612\u20132620 (2017)","key":"41_CR3","DOI":"10.1109\/ICCV.2017.285"},{"key":"41_CR4","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Cadene, R., Ben-Younes, H., Cord, M., Thome, N.: Murel: multimodal relational reasoning for visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1989\u20131998 (2019)","key":"41_CR5","DOI":"10.1109\/CVPR.2019.00209"},{"doi-asserted-by":"crossref","unstructured":"Chen, S., Zhao, Q.: Divide and conquer: answering questions with object factorization and compositional reasoning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6736\u20136745 (2023)","key":"41_CR6","DOI":"10.1109\/CVPR52729.2023.00651"},{"doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)","key":"41_CR7","DOI":"10.3115\/v1\/D14-1179"},{"doi-asserted-by":"crossref","unstructured":"Dettmers, T., Minervini, P., Stenetorp, P., Riedel, S.: Convolutional 2D knowledge graph embeddings. In: Thirty-second AAAI Conference on Artificial Intelligence (2018)","key":"41_CR8","DOI":"10.1609\/aaai.v32i1.11573"},{"doi-asserted-by":"crossref","unstructured":"Gao, Y., Beijbom, O., Zhang, N., Darrell, T.: Compact bilinear pooling. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 317\u2013326 (2016)","key":"41_CR9","DOI":"10.1109\/CVPR.2016.41"},{"unstructured":"Hudson, D.A., Manning, C.D.: Compositional attention networks for machine reasoning. arXiv preprint arXiv:1803.03067 (2018)","key":"41_CR10"},{"doi-asserted-by":"crossref","unstructured":"Hudson, D.A., Manning, C.D.: Gqa: A new dataset for real-world visual reasoning and compositional question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6700\u20136709 (2019)","key":"41_CR11","DOI":"10.1109\/CVPR.2019.00686"},{"doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., Van Der Maaten, L., Fei-Fei, L., Lawrence Zitnick, C., Girshick, R.: Clevr: a diagnostic dataset for compositional language and elementary visual reasoning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2901\u20132910 (2017)","key":"41_CR12","DOI":"10.1109\/CVPR.2017.215"},{"unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)","key":"41_CR13"},{"doi-asserted-by":"crossref","unstructured":"Le, T.M., Le, V., Gupta, S., Venkatesh, S., Tran, T.: Guiding visual question answering with attention priors. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 4381\u20134390 (2023)","key":"41_CR14","DOI":"10.1109\/WACV56688.2023.00436"},{"unstructured":"Le, T.M., Le, V., Venkatesh, S., Tran, T.: Dynamic language binding in relational visual reasoning. arXiv preprint arXiv:2004.14603 (2020)","key":"41_CR15"},{"key":"41_CR16","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1007\/978-3-030-21348-0_30","volume-title":"The Semantic Web: 16th International Conference, ESWC 2019, Portoro\u017e, Slovenia, June 2\u20136, 2019, Proceedings","author":"Y Liu","year":"2019","unstructured":"Liu, Y., Li, H., Garcia-Duran, A., Niepert, M., Onoro-Rubio, D., Rosenblum, D.S.: MMKG: multi-modal knowledge graphs. In: Hitzler, P., et al. (eds.) The Semantic Web: 16th International Conference, ESWC 2019, Portoro\u017e, Slovenia, June 2\u20136, 2019, Proceedings, pp. 459\u2013474. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-21348-0_30"},{"key":"41_CR17","doi-asserted-by":"publisher","first-page":"460","DOI":"10.1007\/978-3-030-01237-3_28","volume-title":"Computer Vision \u2013 ECCV 2018: 15th European Conference, Munich, Germany, September 8-14, 2018, Proceedings, Part VIII","author":"M Narasimhan","year":"2018","unstructured":"Narasimhan, M., Schwing, A.G.: Straight to the facts: learning knowledge base retrieval for factual visual question answering. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018: 15th European Conference, Munich, Germany, September 8-14, 2018, Proceedings, Part VIII, pp. 460\u2013477. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01237-3_28"},{"unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: ICML, vol. 99, pp. 278\u2013287 (1999)","key":"41_CR18"},{"doi-asserted-by":"crossref","unstructured":"Nguyen, B.X., Do, T., Tran, H., Tjiputra, E., Tran, Q.D., Nguyen, A.: Coarse-to-fine reasoning for visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4558\u20134566 (2022)","key":"41_CR19","DOI":"10.1109\/CVPRW56347.2022.00502"},{"doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","key":"41_CR20","DOI":"10.3115\/v1\/D14-1162"},{"key":"41_CR21","first-page":"91","volume":"28","author":"S Ren","year":"2015","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. Adv. Neural. Inf. Process. Syst. 28, 91\u201399 (2015)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Shah, S., Mishra, A., Yadati, N., Talukdar, P.P.: KVQA: knowledge-aware visual question answering. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 8876\u20138884 (2019)","key":"41_CR22","DOI":"10.1609\/aaai.v33i01.33018876"},{"doi-asserted-by":"crossref","unstructured":"Shao, Z., Yu, Z., Wang, M., Yu, J.: Prompting large language models with answer heuristics for knowledge-based visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14974\u201314983 (2023)","key":"41_CR23","DOI":"10.1109\/CVPR52729.2023.01438"},{"doi-asserted-by":"crossref","unstructured":"Shi, J., Zhang, H., Li, J.: Explainable and explicit visual reasoning over scene graphs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8376\u20138384 (2019)","key":"41_CR24","DOI":"10.1109\/CVPR.2019.00857"},{"unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)","key":"41_CR25"},{"issue":"3","key":"41_CR26","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF02289464","volume":"31","author":"LR Tucker","year":"1966","unstructured":"Tucker, L.R.: Some mathematical notes on three-mode factor analysis. Psychometrika 31(3), 279\u2013311 (1966)","journal-title":"Psychometrika"},{"issue":"10","key":"41_CR27","doi-asserted-by":"publisher","first-page":"2413","DOI":"10.1109\/TPAMI.2017.2754246","volume":"40","author":"P Wang","year":"2017","unstructured":"Wang, P., Wu, Q., Shen, C., Dick, A., Van Den Hengel, A.: FVQA: fact-based visual question answering. IEEE Trans. Pattern Anal. Mach. Intell. 40(10), 2413\u20132427 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"unstructured":"Weston, J., Chopra, S., Bordes, A.: Memory networks. arXiv preprint arXiv:1410.3916 (2014)","key":"41_CR28"},{"issue":"3\u20134","key":"41_CR29","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3\u20134), 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: An empirical study of gpt-3 for few-shot knowledge-based VQA. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol. 36, pp. 3081\u20133089 (2022)","key":"41_CR30","DOI":"10.1609\/aaai.v36i3.20215"},{"doi-asserted-by":"crossref","unstructured":"Zhang, L., et al.: Rich visual knowledge-based augmentation network for visual question answering. IEEE Trans. Neural Netw. Learn. Syst. 32(10), 4362\u20134373 (2020)","key":"41_CR31","DOI":"10.1109\/TNNLS.2020.3017530"},{"doi-asserted-by":"crossref","unstructured":"Zhang, Y., Jiang, M., Zhao, Q.: Query and attention augmentation for knowledge-based explainable reasoning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15576\u201315585 (2022)","key":"41_CR32","DOI":"10.1109\/CVPR52688.2022.01513"},{"doi-asserted-by":"crossref","unstructured":"Zhu, Z., Yu, J., Wang, Y., Sun, Y., Hu, Y., Wu, Q.: Mucko: multi-layer cross-modal knowledge reasoning for fact-based visual question answering. arXiv preprint arXiv:2006.09073 (2020)","key":"41_CR33","DOI":"10.24963\/ijcai.2020\/153"}],"container-title":["Lecture Notes in Computer Science","Advanced Data Mining and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-46664-9_41","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T13:14:44Z","timestamp":1699103684000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-46664-9_41"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031466632","9783031466649"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-46664-9_41","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"5 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADMA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Data Mining and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenyang","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adma2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/adma2023.uqcloud.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes. Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"503","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"216","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"43% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.97","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.77","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}