{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T06:58:31Z","timestamp":1770274711299,"version":"3.49.0"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T00:00:00Z","timestamp":1561939200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,7]]},"DOI":"10.1109\/ijcnn.2019.8852087","type":"proceedings-article","created":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T03:44:32Z","timestamp":1569901472000},"page":"1-8","source":"Crossref","is-referenced-by-count":10,"title":["Gaining Extra Supervision via Multi-task learning for Multi-Modal Video Question Answering"],"prefix":"10.1109","author":[{"given":"Junyeong","family":"Kim","sequence":"first","affiliation":[]},{"given":"Minuk","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Kyungsu","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Sungjin","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Chang D.","family":"Yoo","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1017"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"ref32","article-title":"Learning aligned cross-modal representations from weakly aligned data","author":"castrej\u00f3n","year":"2016","journal-title":"IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2598339"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00640"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref36","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.392"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.618"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.149"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123427"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00688"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.501"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1167"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.80"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00642"},{"key":"ref18","article-title":"Movie question answering: Remembering the textual cues for layered visual contents","author":"wang","year":"2018","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_41"},{"key":"ref28","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref4","article-title":"DCN+: Mixed objective and deep residual coattention for question answering","author":"xiong","year":"2018","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296918"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1160"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.360"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.9"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.285"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.232"},{"key":"ref2","article-title":"End-to-end memory networks","author":"sukhbaatar","year":"2015","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref1","article-title":"Memory networks","author":"weston","year":"2015","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/280"},{"key":"ref45","article-title":"Adam: A method for stochastic optimization","volume":"abs 1412 6980","author":"kingma","year":"2014","journal-title":"CoRR"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.563"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0966-6"},{"key":"ref42","article-title":"Bidirectional attention flow for machine comprehension","author":"seo","year":"2017","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50012-5"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref44","article-title":"Deep metric learning using triplet network","author":"hoffer","year":"2015","journal-title":"International Conference on Learning Representations (ICLR) Workshop Track"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390177"},{"key":"ref43","article-title":"Fast and accurate reading comprehension by combining self-attention and convolution","author":"yu","year":"2018","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"}],"event":{"name":"2019 International Joint Conference on Neural Networks (IJCNN)","location":"Budapest, Hungary","start":{"date-parts":[[2019,7,14]]},"end":{"date-parts":[[2019,7,19]]}},"container-title":["2019 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8840768\/8851681\/08852087.pdf?arnumber=8852087","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:51:27Z","timestamp":1658094687000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8852087\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,7]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2019.8852087","relation":{},"subject":[],"published":{"date-parts":[[2019,7]]}}}