{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:32Z","timestamp":1750220192160,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T00:00:00Z","timestamp":1663891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,9,23]]},"DOI":"10.1145\/3573942.3574090","type":"proceedings-article","created":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T23:45:42Z","timestamp":1684280742000},"page":"746-752","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Visual Question Answering Model Based on CAM and GCN"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2808-9038","authenticated-orcid":false,"given":"Ping","family":"Wen","sequence":"first","affiliation":[{"name":"Xi'an University of Posts &amp; Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4359-0050","authenticated-orcid":false,"given":"Ma","family":"Li","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts &amp; Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3697-2605","authenticated-orcid":false,"given":"Zhang","family":"Zhen","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts &amp; Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8943-1955","authenticated-orcid":false,"given":"Wang","family":"Ze","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts &amp; Telecommunications, China"}]}],"member":"320","published-online":{"date-parts":[[2023,5,16]]},"reference":[{"issue":"8","key":"e_1_3_2_1_1_1","first-page":"23","volume":"32","author":"Bao X G","year":"2021","unstructured":"Bao X G, Zhou C L, Xiao K J, Review of Visual Question Answering Research [J]. Journal of Software, 2021, 32(8):23.","journal-title":"Journal of Software"},{"key":"e_1_3_2_1_2_1","volume-title":"Are You Talking to a Machine? Dataset and Methods for Multilingual Image Question Answering[J]. Computer ence","author":"Gao H.","year":"2015","unstructured":"Gao H. Are You Talking to a Machine? Dataset and Methods for Multilingual Image Question Answering[J]. Computer ence, 2015:2296-2304."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.202]"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00644"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00209]"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01041"},{"key":"e_1_3_2_1_8_1","volume-title":"VisualBERT: A Simple and Performant Baseline for Vision and Language[J]","author":"Li L H","year":"2019","unstructured":"Li L H, Yatskar M, D Yin, VisualBERT: A Simple and Performant Baseline for Vision and Language[J]. 2019. arXiv:1908.03557,2019."},{"key":"e_1_3_2_1_9_1","volume-title":"Object-Semantics Aligned Pre-training for Vision-Language Tasks[C]\/\/European Conference on Computer Vision","author":"Oscar","year":"2020","unstructured":"LI X,YIN X,LI C, Oscar: Object-Semantics Aligned Pre-training for Vision-Language Tasks[C]\/\/European Conference on Computer Vision.Springer,Cham,2020:121-137."},{"key":"e_1_3_2_1_10_1","volume-title":"Learning to Count Objects in Natural Images for Visual Question Answering[J]","author":"Yan Z","year":"2018","unstructured":"Yan Z, Hare J, Gel-Bennett A P. Learning to Count Objects in Natural Images for Visual Question Answering[J]. 2018."},{"key":"e_1_3_2_1_11_1","volume-title":"Multi-task Learning of Hierarchical Vision-Language Representation[J]","author":"Nguyen D K","year":"2020","unstructured":"Nguyen D K, Okatani T. Multi-task Learning of Hierarchical Vision-Language Representation[J]. IEEE, 2020."},{"key":"e_1_3_2_1_12_1","volume-title":"Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation[J]. Computer Science","author":"Cho K","year":"2014","unstructured":"Cho K, Merrienboer B V, Gulcehre C, Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation[J]. Computer Science, 2014."},{"key":"e_1_3_2_1_13_1","volume-title":"Two can play this Game: Visual Dialog with Discriminative Question Generation and Answering[J]","author":"Jain U","year":"2018","unstructured":"Jain U, Lazebnik S, Schwing A. Two can play this Game: Visual Dialog with Discriminative Question Generation and Answering[J]. IEEE, 2018."},{"key":"e_1_3_2_1_14_1","volume-title":"Learning Conditioned Graph Structures for Interpretable Visual Question Answering[J]","author":"Norcliffe-Brown W","year":"2018","unstructured":"Norcliffe-Brown W, Vafeais E, Parisot S. Learning Conditioned Graph Structures for Interpretable Visual Question Answering[J]. 2018."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3054830"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.19678\/j.issn.1000-3428.0061159"},{"key":"e_1_3_2_1_17_1","volume-title":"2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE","author":"Noh H","year":"2019","unstructured":"Noh H, Kim T, Mun J, Transfer Learning via Unsupervised Task Discovery for Visual Question Answering[C]\/\/ 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, 2019."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"e_1_3_2_1_19_1","volume-title":"Lu J","author":"Antol S","year":"2015","unstructured":"Antol S, Agrawal A, Lu J, VQA: Visual question answering[C]\/\/Proceedings of the IEEE International Conference on Computer Vision. Santiago, Chile: IEEE, 2015: 2425-2433."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Yang Zichao He X Gao J Stacked attention networks for image question answering[C]\/\/Proceedings of 2016 IEEE Conference on Computer Vision and Pattern Recognition. 2016: 21-29.","DOI":"10.1109\/CVPR.2016.10"},{"key":"e_1_3_2_1_21_1","unstructured":"Yang Zhuoqian Qin Z Yu J and Hu Y. 2018. Multi-modal learning with prior visual relation reasoning. [EB\/OL]. [2018-12-23]."},{"volume-title":"IEEE International Conference on Image Processing (ICIP)","author":"Yang","key":"e_1_3_2_1_22_1","unstructured":"Yang, Zhuoqian, Zengchang Qin, Prior Visual Relationship Reasoning For Visual Question Answering[C]\/\/2020 IEEE International Conference on Image Processing (ICIP), 2020: 1411-1415."}],"event":{"name":"AIPR 2022: 2022 5th International Conference on Artificial Intelligence and Pattern Recognition","acronym":"AIPR 2022","location":"Xiamen China"},"container-title":["Proceedings of the 2022 5th International Conference on Artificial Intelligence and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574090","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3573942.3574090","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:32Z","timestamp":1750186952000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574090"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,23]]},"references-count":22,"alternative-id":["10.1145\/3573942.3574090","10.1145\/3573942"],"URL":"https:\/\/doi.org\/10.1145\/3573942.3574090","relation":{},"subject":[],"published":{"date-parts":[[2022,9,23]]},"assertion":[{"value":"2023-05-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}