{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:34:13Z","timestamp":1730266453312,"version":"3.28.0"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534326","type":"proceedings-article","created":{"date-parts":[[2021,9,21]],"date-time":"2021-09-21T20:40:52Z","timestamp":1632256852000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["VisDG: Towards Spontaneous Visual Dialogue Generation"],"prefix":"10.1109","author":[{"given":"Qichuan","family":"Yang","sequence":"first","affiliation":[]},{"given":"Liuxin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jinghua","family":"Gao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"DeepStory Video Story QA by Deep Embedded Memory Networks","year":"2017","author":"kim","key":"ref39"},{"key":"ref38","article-title":"Unsupervised learning of video representations using lstms","author":"srivastava","year":"2015","journal-title":"ICML"},{"key":"ref33","article-title":"Affective neural response generation","author":"asghar","year":"2017","journal-title":"European Conference on Information Retrieval"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2017.7965952"},{"key":"ref31","article-title":"Improving frame semantic parsing with hierarchical dialogue encoders","author":"bapna","year":"2017","journal-title":"Computing Research Repository"},{"key":"ref30","article-title":"Learning to update auto-associative memory in recurrent neural networks for improving sequence memorization","author":"zhang","year":"2017","journal-title":"Computing Research Repository"},{"key":"ref37","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"International Conference on Machine Learning"},{"journal-title":"Attention is all you need","year":"2017","author":"vaswani","key":"ref36"},{"journal-title":"Convolutional sequence to sequence learning","year":"2017","author":"gehring","key":"ref35"},{"key":"ref34","article-title":"Cold fusion: training seq2seq models together with language models","author":"sriram","year":"2017","journal-title":"Computing Research Repository"},{"key":"ref28","article-title":"Sequence to sequence learning with neural networks","author":"hya","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref27","article-title":"Towards ai-complete question answering: A set of prerequisite toy tasks","author":"weston","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref29","first-page":"3776","article-title":"Building end-to-end dialogue systems using generative hierarchical neural network models","author":"serban","year":"2016","journal-title":"Association for the Advancement of Artificial Intelligence"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref1","first-page":"595","article-title":"Multimodal neural language models","author":"kiros","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3295748"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1012"},{"key":"ref21","article-title":"Visual question answering using deep learning: A survey and performance analysis","author":"srivastava","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1094"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1104"},{"key":"ref26","first-page":"1156","article-title":"Open question answering over curated and extracted knowledge bases","author":"anthony","year":"2014","journal-title":"ACM SIGKDD International Conference on Knowledge Discovery and Data Mining"},{"key":"ref25","first-page":"1608","article-title":"Paraphrase-driven learning for open question answering","volume":"1","author":"fader","year":"2013","journal-title":"Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6320"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"11125","DOI":"10.1609\/aaai.v34i07.6769","article-title":"Du-alvd: An adaptive dual encoding model for deep visual understanding in visual dialogue","volume":"34","author":"jiang","year":"2020","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref54","first-page":"818","article-title":"Visualizing and understanding convolutional networks","author":"zeiler","year":"2014","journal-title":"European Conference on Computer Vision"},{"key":"ref53","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","author":"glorot","year":"2010","journal-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref52","first-page":"7132","article-title":"Squeeze-and-excitation networks","author":"hu","year":"2018","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.215"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.149"},{"journal-title":"How Images Inspire Poems Generating Classical Chinese Poetry from Images with Memory Networks","year":"2018","author":"xu","key":"ref40"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.121"},{"key":"ref13","article-title":"C-vqa: A compositional split of the visual question answering (vqa) vl, 0 dataset","author":"agrawal","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0987-1"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00430"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2708709"},{"key":"ref17","first-page":"552","article-title":"Vqa-e: Explaining, elaborating, and enhancing your answers for visual questions","author":"li","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00380"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00468"},{"key":"ref4","article-title":"Microsoft coco captions: Data collection and evaluation server","author":"chen","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref3","article-title":"Deep captioning with multimodal recurrent neural networks (m-rnn)","volume":"absi1412 6632","author":"mao","year":"2015","journal-title":"CoRR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.501"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00444"},{"key":"ref9","first-page":"4995","article-title":"Visua17w: Grounded question answering in images","author":"zhu","year":"2016","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref46","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018997"},{"journal-title":"Are you talking to me? reasoned visual dialog generation through adversarial learning","year":"2017","author":"wu","key":"ref48"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"sepp","year":"1997","journal-title":"Neural Computation"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01277"},{"key":"ref44","article-title":"Collecting highly parallel data for paraphrase evaluation","author":"chen","year":"2011","journal-title":"ACL 2011"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00911"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534326.pdf?arnumber=9534326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:45:50Z","timestamp":1652197550000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534326\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534326","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}