{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T16:25:21Z","timestamp":1764174321300,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,10,31]],"date-time":"2016-10-31T00:00:00Z","timestamp":1477872000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61272206"],"award-info":[{"award-number":["61272206"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Open Projects Program of National Laboratory of Pattern Recognition","award":["201600008"],"award-info":[{"award-number":["201600008"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,10,31]]},"DOI":"10.1145\/2993148.2997637","type":"proceedings-article","created":{"date-parts":[[2016,11,1]],"date-time":"2016-11-01T13:46:03Z","timestamp":1478007963000},"page":"506-513","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":39,"title":["Audio and face video emotion recognition in the wild using deep neural networks and small datasets"],"prefix":"10.1145","author":[{"given":"Wan","family":"Ding","sequence":"first","affiliation":[{"name":"Central China Normal University, China"}]},{"given":"Mingyu","family":"Xu","sequence":"additional","affiliation":[{"name":"University of British Columbia, Canada"}]},{"given":"Dongyan","family":"Huang","sequence":"additional","affiliation":[{"name":"A*STAR, Singapore"}]},{"given":"Weisi","family":"Lin","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore"}]},{"given":"Minghui","family":"Dong","sequence":"additional","affiliation":[{"name":"A*STAR, Singapore"}]},{"given":"Xinguo","family":"Yu","sequence":"additional","affiliation":[{"name":"Central China Normal University, China"}]},{"given":"Haizhou","family":"Li","sequence":"additional","affiliation":[{"name":"A*STAR, Singapore \/ National University of Singapore, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2016,10,31]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"2015 11th IEEE International Conference and Workshops on.","volume":"6","author":"Michel","year":"2015"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988258"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2829994"},{"volume-title":"2011 IEEE International Conference on. IEEE","year":"2011","author":"Michel","key":"e_1_3_2_1_4_1"},{"volume-title":"Computer vision using local binary patterns","year":"2011","key":"e_1_3_2_1_5_1"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2011.5771366"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2666274"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390204"},{"volume-title":"Covariance discriminative learning: A natural and efficient approach to image set classfication","year":"2012","author":"Wang R.","key":"e_1_3_2_1_11_1"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.233"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.212"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830585"},{"volume-title":"&#34;Effective face frontalization in unconstrained images.&#34","year":"2015","key":"e_1_3_2_1_15_1"},{"volume-title":"2010 IEEE Conference on. IEEE","year":"2010","author":"Matthew","key":"e_1_3_2_1_16_1"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2012.2186121"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830593"},{"volume-title":"A report on three machine learning contests.&#34","year":"2013","author":"Ian","key":"e_1_3_2_1_19_1"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830596"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2808196.2811641"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-012-9368-5"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.279188"},{"issue":"3","key":"e_1_3_2_1_24_1","first-page":"1310","article-title":"the difficulty of training recurrent neural networks.&#34;","volume":"28","author":"Mikolov Tomas","year":"2013","journal-title":"ICML"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"volume-title":"Speech and Signal Processing (ICASSP). IEEE","year":"2015","author":"Sak Hasim","key":"e_1_3_2_1_26_1"},{"volume-title":"&#34;Language Identification in Short Utterances Using Long Short-Term Memory (LSTM) Recurrent Neural Networks.&#34","year":"2016","key":"e_1_3_2_1_27_1"},{"volume-title":"&#34;How Deep Neural Networks Can Improve Emotion Recognition on Video Data.&#34","year":"2016","key":"e_1_3_2_1_28_1"},{"volume-title":"Partial least squares. Encyclopedia of statistical sciences","year":"1985","author":"Wold H.","key":"e_1_3_2_1_29_1"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2666275"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2666271"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2326393"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502224"},{"issue":"3","key":"e_1_3_2_1_35_1","first-page":"547","article-title":"RecurREnt neural network toolkit.&#34;","volume":"16","author":"Bergmann Johannes","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2830590"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"volume-title":"&#34;Imagenet classification with deep convolutional neural networks.&#34","year":"2012","author":"Sutskever Ilya","key":"e_1_3_2_1_38_1"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2012.26"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2993148.2997638"}],"event":{"name":"ICMI '16: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Tokyo Japan","acronym":"ICMI '16"},"container-title":["Proceedings of the 18th ACM International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2993148.2997637","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2993148.2997637","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:39:45Z","timestamp":1750217985000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2993148.2997637"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10,31]]},"references-count":40,"alternative-id":["10.1145\/2993148.2997637","10.1145\/2993148"],"URL":"https:\/\/doi.org\/10.1145\/2993148.2997637","relation":{},"subject":[],"published":{"date-parts":[[2016,10,31]]},"assertion":[{"value":"2016-10-31","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}