{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T11:16:01Z","timestamp":1781867761124,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,10,31]],"date-time":"2016-10-31T00:00:00Z","timestamp":1477872000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,10,31]]},"DOI":"10.1145\/2993148.2997632","type":"proceedings-article","created":{"date-parts":[[2016,11,1]],"date-time":"2016-11-01T13:46:03Z","timestamp":1478007963000},"page":"445-450","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":434,"title":["Video-based emotion recognition using CNN-RNN and C3D hybrid networks"],"prefix":"10.1145","author":[{"given":"Yin","family":"Fan","sequence":"first","affiliation":[{"name":"iQiyi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiangju","family":"Lu","sequence":"additional","affiliation":[{"name":"iQiyi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dian","family":"Li","sequence":"additional","affiliation":[{"name":"iQiyi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuanliu","family":"Liu","sequence":"additional","affiliation":[{"name":"iQiyi, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2016,10,31]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2012.26"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2397456"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830585"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2666274"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830596"},{"key":"e_1_3_2_1_6_1","unstructured":"Tran D. Bourdev L. Fergus R. Torresani L. &amp; Paluri M. 2015. Tran D. Bourdev L. Fergus R. Torresani L. &amp; Paluri M. 2015."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition.2625-2634","author":"Donahue J.","year":"2015"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2522848.2531745"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition.1-9.","author":"Szegedy C."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"He K. Zhang X. Ren S. and Sun J. 2015. Deep residual learning for image recognition. arXiv preprint arXiv:1512.03385. He K. Zhang X. Ren S. and Sun J. 2015. Deep residual learning for image recognition. arXiv preprint arXiv:1512.03385.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","unstructured":"Simonyan K. &amp; Zisserman A. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556. Simonyan K. &amp; Zisserman A. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.29.41"},{"key":"e_1_3_2_1_16_1","volume-title":"L","author":"Deng J.","year":"2009"},{"key":"e_1_3_2_1_17_1","unstructured":"Carrier P. L. Courville A. Goodfellow I. J. Mirza M. and Bengio Y. 2013 .FER-2013 face database. Technical report 1365 Universit\u00e9 de Montr\u00e9al. Carrier P. L. Courville A. Goodfellow I. J. Mirza M. and Bengio Y. 2013 .FER-2013 face database. Technical report 1365 Universit\u00e9 de Montr\u00e9al."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.137"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","first-page":"338","DOI":"10.21437\/Interspeech.2014-80","article-title":"Long shortterm memory recurrent neural network architectures for large scale acoustic modeling","author":"Sak H.","year":"2014","journal-title":"INTERSPEECH."},{"key":"e_1_3_2_1_20_1","volume-title":"Wild: A Deep Learning Approach. In Computer Vision and Pattern Recognition. CVPR.","author":"Kim B. K.","year":"2016"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Karpathy A. Toderici G. Shetty S. Leung T. Sukthankar R. and Li F.F. 2014. Large-scale Video Classification with Convolutional Neural Networks. Karpathy A. Toderici G. Shetty S. Leung T. Sukthankar R. and Li F.F. 2014. Large-scale Video Classification with Convolutional Neural Networks.","DOI":"10.1109\/CVPR.2014.223"},{"key":"e_1_3_2_1_22_1","first-page":"4694","article-title":"Beyond Short Snippets","author":"Ng J.","year":"2015","journal-title":"Deep Networks for Video Classification. In Computer Vision and Pattern Recognition. CVPR."},{"key":"e_1_3_2_1_23_1","volume-title":"Visual Attention. Workshop track - ICLR.","author":"Sharma S."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830588"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"e_1_3_2_1_26_1","unstructured":"Pan P. Xu Z. Yang Y. Wu F. and Zhuang Y. 2015. Pan P. Xu Z. Yang Y. Wu F. and Zhuang Y. 2015."},{"key":"e_1_3_2_1_27_1","unstructured":"Hierarchical Recurrent Neural Encoder for Video Representation with Application to Captioning. arXiv preprint arXiv:1511.03476. Hierarchical Recurrent Neural Encoder for Video Representation with Application to Captioning. arXiv preprint arXiv:1511.03476."},{"key":"e_1_3_2_1_28_1","volume-title":"2013 IEEE international conference on acoustics, speech and signal processing. 6645-6649","author":"Graves A."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2993148.2997638"},{"key":"e_1_3_2_1_30_1","volume-title":"Computer Vision Workshops.","author":"Jianguo L.","year":"2011"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/1778066.1778092"}],"event":{"name":"ICMI '16: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"Tokyo Japan","acronym":"ICMI '16","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 18th ACM International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2993148.2997632","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2993148.2997632","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:39:45Z","timestamp":1750217985000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2993148.2997632"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10,31]]},"references-count":31,"alternative-id":["10.1145\/2993148.2997632","10.1145\/2993148"],"URL":"https:\/\/doi.org\/10.1145\/2993148.2997632","relation":{},"subject":[],"published":{"date-parts":[[2016,10,31]]},"assertion":[{"value":"2016-10-31","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}