{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T12:39:20Z","timestamp":1775047160064,"version":"3.50.1"},"reference-count":37,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,5]]},"DOI":"10.1109\/icassp.2013.6638346","type":"proceedings-article","created":{"date-parts":[[2013,10,29]],"date-time":"2013-10-29T19:19:46Z","timestamp":1383074386000},"page":"3687-3691","source":"Crossref","is-referenced-by-count":258,"title":["Deep learning for robust feature generation in audiovisual emotion recognition"],"prefix":"10.1109","author":[{"given":"Yelin","family":"Kim","sequence":"first","affiliation":[]},{"given":"Honglak","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Emily Mower","family":"Provost","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"crossref","first-page":"340","DOI":"10.21437\/Interspeech.2009-110","article-title":"Emotion classification in childrens speech using fusion of acoustic and linguistic features","author":"polzehl","year":"2009","journal-title":"Proc Interspeech 2009"},{"key":"35","doi-asserted-by":"crossref","first-page":"2225","DOI":"10.21437\/Interspeech.2007-605","article-title":"Using neutral speech models for emotional speech analysis","author":"busso","year":"2007","journal-title":"Proceedings of Interspeech 2007"},{"key":"17","doi-asserted-by":"crossref","first-page":"312","DOI":"10.21437\/Interspeech.2009-103","article-title":"The interspeech 2009 emotion challenge","author":"schuller","year":"2009","journal-title":"Proc INTERSPEECH"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-1902-1_23"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2004.838534"},{"key":"33","doi-asserted-by":"publisher","DOI":"10.1162\/089976602760128018"},{"key":"15","author":"schuller","year":"2012","journal-title":"The Interspeech 2012 Speaker Trait Challenge"},{"key":"34","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947651"},{"key":"13","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML)"},{"key":"14","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-95","article-title":"Likability classification-A not so deep neural network approach","author":"brueckner","year":"2012","journal-title":"Proceedings of Interspeech"},{"key":"37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5494890"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2109382"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2129510"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1145\/1291233.1291297"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2076804"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2005.1415114"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5494893"},{"key":"24","doi-asserted-by":"crossref","first-page":"2362","DOI":"10.21437\/Interspeech.2010-646","article-title":"Context-sensitive multimodal emotion recognition from speech and facial expression using bidirectional lstm modeling","author":"wollmer","year":"2010","journal-title":"Proc Interspeech Conf"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.905145"},{"key":"26","first-page":"873","article-title":"Sparse deep belief net model for visual area v2","volume":"20","author":"lee","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"27","article-title":"Deep networks for robust visual recognition","volume":"28","author":"tang","year":"2010","journal-title":"International Conference on Machine Learning Citeseer"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1145\/2001269.2001295"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126554"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1016\/j.sigpro.2008.07.001"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1145\/1027933.1027968"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2116010"},{"key":"1","first-page":"1345","article-title":"Modeling human motion using binary latent variables","volume":"19","author":"taylor","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"30","first-page":"1106","article-title":"Imagenet classification with deep convolutional neural networks","volume":"25","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-012-9368-5"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15184-2_8"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2006.18.7.1527"},{"key":"5","first-page":"145","article-title":"Lowlevel fusion of audio and video feature for multi-modal emotion recognition","volume":"2","author":"wimmer","year":"2008","journal-title":"International Conference on Computer Vision Theory and Applications VISAPP"},{"key":"31","first-page":"194","article-title":"Information processing in dynamical systems: Foundations of harmony theory","volume":"1","author":"smolensky","year":"1986","journal-title":"Parallel Distributed Processing Explorations in the Microstructure of Cognition"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2005.1521463"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1561\/2200000006"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2010.09.020"}],"event":{"name":"ICASSP 2013 - 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2013,5,26]]},"end":{"date-parts":[[2013,5,31]]}},"container-title":["2013 IEEE International Conference on Acoustics, Speech and Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6619549\/6637585\/06638346.pdf?arnumber=6638346","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,3,10]],"date-time":"2022-03-10T17:33:41Z","timestamp":1646933621000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6638346\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,5]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/icassp.2013.6638346","relation":{},"subject":[],"published":{"date-parts":[[2013,5]]}}}