{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:53:45Z","timestamp":1777492425038,"version":"3.51.4"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,10]],"date-time":"2021-01-10T00:00:00Z","timestamp":1610236800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020AAA0107800,2018AAA0102000"],"award-info":[{"award-number":["2020AAA0107800,2018AAA0102000"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China Major Project","doi-asserted-by":"publisher","award":["U1611461"],"award-info":[{"award-number":["U1611461"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,10]]},"DOI":"10.1109\/icpr48806.2021.9412891","type":"proceedings-article","created":{"date-parts":[[2021,5,6]],"date-time":"2021-05-06T02:15:54Z","timestamp":1620267354000},"page":"3491-3498","source":"Crossref","is-referenced-by-count":10,"title":["Multi-modal Contextual Graph Neural Network for Text Visual Question Answering"],"prefix":"10.1109","author":[{"given":"Yaoyuan","family":"Liang","sequence":"first","affiliation":[]},{"given":"Xin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xuguang","family":"Duan","sequence":"additional","affiliation":[]},{"given":"Wenwu","family":"Zhu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref38","article-title":"Variational reasoning for question answering with knowledge graph","author":"yuyu","year":"0","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref33","author":"weston","year":"2014","journal-title":"Memory networks"},{"key":"ref32","author":"wang","year":"2015","journal-title":"Explicit knowledge-based reasoning for visual question answering"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2754246"},{"key":"ref30","first-page":"5998","article-title":"Attention is all you need","author":"ashish","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2817340"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.496"},{"key":"ref35","first-page":"684","article-title":"Exploring visual relationship for image captioning","author":"yao","year":"0","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00855"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref12","author":"jiang","year":"2018","journal-title":"Pythia v0 1 the winning entry to the vqa challenge 2018"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.215"},{"key":"ref14","author":"kim","year":"2016","journal-title":"Hadamard product for low-rank bilinear pooling"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref16","author":"kuznetsova","year":"2018","journal-title":"The open images dataset v4 Unified image classification object detection and visual relationship detection at scale"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01041"},{"key":"ref18","first-page":"289","article-title":"Hierarchical question-image co-attention for visual question answering","author":"lu","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00156"},{"key":"ref28","article-title":"Pythia-a platform for vision & language research","author":"singh","year":"0","journal-title":"SysML Workshop NeurIPS 2019"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00470"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00439"},{"key":"ref6","first-page":"3059","article-title":"Weakly supervised dense event captioning in videos","author":"duan","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00851"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219861"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2729019"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1044"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref9","author":"goyal","year":"2017","journal-title":"Accurate large minibatch sgd Training imagenet in 1 hour"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref20","first-page":"2654","article-title":"Out of the box: Reasoning with graph convolution nets for factual visual question answering","author":"narasimhan","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref22","first-page":"8334","article-title":"Learning conditioned graph structures for interpretable visual question answering","author":"norcliffe-brown","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00637"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref23","author":"paszke","year":"2017","journal-title":"Automatic differentiation in pytorch"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018876"},{"key":"ref25","first-page":"91","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2020 25th International Conference on Pattern Recognition (ICPR)","location":"Milan, Italy","start":{"date-parts":[[2021,1,10]]},"end":{"date-parts":[[2021,1,15]]}},"container-title":["2020 25th International Conference on Pattern Recognition (ICPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9411940\/9411911\/09412891.pdf?arnumber=9412891","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:40:40Z","timestamp":1652197240000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9412891\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,10]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icpr48806.2021.9412891","relation":{},"subject":[],"published":{"date-parts":[[2021,1,10]]}}}