{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:34:06Z","timestamp":1763105646451,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,2,22]],"date-time":"2019-02-22T00:00:00Z","timestamp":1550793600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,2,22]]},"DOI":"10.1145\/3318299.3318305","type":"proceedings-article","created":{"date-parts":[[2019,5,16]],"date-time":"2019-05-16T12:10:25Z","timestamp":1558008625000},"page":"412-416","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Feature Fusion Attention Visual Question Answering"],"prefix":"10.1145","author":[{"given":"Chunlin","family":"Wang","sequence":"first","affiliation":[{"name":"School of Information Science &amp; Technology Chuxiong Normal University, P.R. China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianyong","family":"Sun","sequence":"additional","affiliation":[{"name":"School of Information Science &amp; Technology Chuxiong Normal University, P.R. China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaolin","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Information Science &amp; Technology Chuxiong Normal University, P.R. China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,2,22]]},"reference":[{"doi-asserted-by":"publisher","key":"e_1_3_2_1_1_1","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_1_2_1","volume-title":"NIPS","author":"Krizhevsky A","year":"2012","unstructured":"Krizhevsky , A , Sutskever , I. , Hinton , G. E. 2012 . Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems.(Harrahs and Harveys, Lake Tahoe. Dec. 3-8, 2012) . NIPS 2012, 1097--1105. Krizhevsky, A, Sutskever, I., Hinton, G. E. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems.(Harrahs and Harveys, Lake Tahoe. Dec. 3-8, 2012). NIPS 2012, 1097--1105."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_4_1","volume-title":"Eleventh Annual Conference of the International Speech Communication Association.(Makuhari","author":"Mikolov T","year":"2010","unstructured":"Mikolov T , Karafi\u00e1t M , Burget L , \u010cernock\u00fd J , Khudanpur S. 2010 . Recurrent neural network based language model . In Eleventh Annual Conference of the International Speech Communication Association.(Makuhari , Chiba, Japan , Sep. 26-30, 2010). INTERSPEECH-2010, 1045--1048.DOI=https:\/\/www.isca-speech.org\/archive\/interspeech_2010\/i10_1045.html Mikolov T, Karafi\u00e1t M, Burget L, \u010cernock\u00fd J, Khudanpur S. 2010. Recurrent neural network based language model. In Eleventh Annual Conference of the International Speech Communication Association.(Makuhari, Chiba, Japan, Sep. 26-30, 2010). INTERSPEECH-2010, 1045--1048.DOI=https:\/\/www.isca-speech.org\/archive\/interspeech_2010\/i10_1045.html"},{"key":"e_1_3_2_1_5_1","volume-title":"Very deep convolutional networks for large-scale image recognition{J}. arXiv preprint arXiv:1409.1556","author":"Simonyan K","year":"2014","unstructured":"Simonyan K , Zisserman A. Very deep convolutional networks for large-scale image recognition{J}. arXiv preprint arXiv:1409.1556 , 2014 . Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition{J}. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_6_1","volume-title":"Neural machine translation by jointly learning to align and translate{J}. arXiv preprint arXiv:1409.0473","author":"Bahdanau D","year":"2014","unstructured":"Bahdanau D , Cho K , Bengio Y. Neural machine translation by jointly learning to align and translate{J}. arXiv preprint arXiv:1409.0473 , 2014 . Bahdanau D, Cho K, Bengio Y. Neural machine translation by jointly learning to align and translate{J}. arXiv preprint arXiv:1409.0473, 2014."},{"key":"e_1_3_2_1_7_1","volume-title":"Neural image caption generation with visual atention{J}. arXiv Pre-Print","author":"Kelvin Xu","year":"2015","unstructured":"Kelvin Xu . Neural image caption generation with visual atention{J}. arXiv Pre-Print , 2015 , 83: 89 Kelvin Xu. Neural image caption generation with visual atention{J}. arXiv Pre-Print, 2015, 83: 89"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1109\/CVPR.2016.10"},{"key":"e_1_3_2_1_9_1","volume-title":"Multimodal Residual Learning for VisualQA. In International Conference on Neural Information Processing Systems (Centre Mon","author":"Kim S.-W.","year":"2016","unstructured":"J.-H. Kim , S.-W. Lee , D. Kwak , M.-O. Heo , J. Kim , J.-W. Ha , and B.-T. Zhang . 2016 . Multimodal Residual Learning for VisualQA. In International Conference on Neural Information Processing Systems (Centre Mon Dec 5-10, 2016 Barcelona SPAIN )NIPS 2016.361--369.eprint arXiv:1606.01455.DOI=https: \/\/arxiv.org\/abs\/1606.01455 J.-H. Kim, S.-W. Lee, D. Kwak, M.-O. Heo, J. Kim, J.-W. Ha, and B.-T. Zhang. 2016. Multimodal Residual Learning for VisualQA. In International Conference on Neural Information Processing Systems (Centre Mon Dec 5-10, 2016 Barcelona SPAIN )NIPS2016.361--369.eprint arXiv:1606.01455.DOI=https: \/\/arxiv.org\/abs\/1606.01455"},{"key":"e_1_3_2_1_10_1","volume-title":"Hawaii","author":"Yu D","year":"2017","unstructured":"Yu D , Fu J , Mei T , Rui Y. 2017 . Multi-level attention networks for visual question answering.Computer Vision and Pattern Recognition (Honolulu , Hawaii , July 21-26, 2017), CVPR 2017. IEEE, New York, NY, 4709--4717. Yu D, Fu J, Mei T, Rui Y. 2017.Multi-level attention networks for visual question answering.Computer Vision and Pattern Recognition (Honolulu, Hawaii, July 21-26, 2017), CVPR 2017. IEEE, New York, NY, 4709--4717."},{"key":"e_1_3_2_1_11_1","volume-title":"Improved Fusion of Visual and Language Representations by Dense Symmetric Co-Attention for Visual Question Answering{J}. arXiv preprint arXiv:1804.00775","author":"Nguyen D K","year":"2018","unstructured":"Nguyen D K , Okatani T. Improved Fusion of Visual and Language Representations by Dense Symmetric Co-Attention for Visual Question Answering{J}. arXiv preprint arXiv:1804.00775 , 2018 . Nguyen D K, Okatani T. Improved Fusion of Visual and Language Representations by Dense Symmetric Co-Attention for Visual Question Answering{J}. arXiv preprint arXiv:1804.00775, 2018."},{"key":"e_1_3_2_1_12_1","volume-title":"Conference on Empirical Methods in Natural Language Processing","author":"Fukui D. H.","year":"2016","unstructured":"A. Fukui , D. H. Park , D. Yang , A. Rohrbach , T. Darrell , and M. Rohrbach . 2016.Multimodal compact bilinear pooling for visual question answering and visual grounding . Conference on Empirical Methods in Natural Language Processing ( Austin, TX, USA. November 1-5, 2016 ).EMNLP 2016, 457--468, Stroudsburg, PA: ACL.457--468. A. Fukui, D. H. Park, D. Yang, A. Rohrbach, T. Darrell, and M. Rohrbach.2016.Multimodal compact bilinear pooling for visual question answering and visual grounding. Conference on Empirical Methods in Natural Language Processing (Austin, TX, USA. November 1-5, 2016).EMNLP 2016, 457--468, Stroudsburg, PA: ACL.457--468."},{"key":"e_1_3_2_1_13_1","volume-title":"Hadamard product for low-rank bilinear pooling.In International Conference on Learning Representations (Caribe Hilton","author":"Kim K.-W.","year":"2016","unstructured":"J.-H. Kim , K.-W. On , W. Lim , J. Kim , J.-W. Ha , and B.-T. Zhang . Hadamard product for low-rank bilinear pooling.In International Conference on Learning Representations (Caribe Hilton , San Juan , Puerto Rico, May 2-4, 2016 ) ICLR 2016, 1--14.arXiv preprint arXiv:1610.04325. J.-H. Kim, K.-W. On, W. Lim, J. Kim, J.-W. Ha, and B.-T. Zhang. Hadamard product for low-rank bilinear pooling.In International Conference on Learning Representations (Caribe Hilton, San Juan, Puerto Rico, May 2-4, 2016) ICLR2016, 1--14.arXiv preprint arXiv:1610.04325."}],"event":{"sponsor":["Southwest Jiaotong University"],"acronym":"ICMLC '19","name":"ICMLC '19: 2019 11th International Conference on Machine Learning and Computing","location":"Zhuhai China"},"container-title":["Proceedings of the 2019 11th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3318299.3318305","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3318299.3318305","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:02:24Z","timestamp":1750208544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3318299.3318305"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,22]]},"references-count":13,"alternative-id":["10.1145\/3318299.3318305","10.1145\/3318299"],"URL":"https:\/\/doi.org\/10.1145\/3318299.3318305","relation":{},"subject":[],"published":{"date-parts":[[2019,2,22]]},"assertion":[{"value":"2019-02-22","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}