{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T06:03:19Z","timestamp":1781589799969,"version":"3.54.5"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,10]]},"DOI":"10.1109\/acii.2017.8273628","type":"proceedings-article","created":{"date-parts":[[2018,2,1]],"date-time":"2018-02-01T21:46:54Z","timestamp":1517521614000},"page":"383-388","source":"Crossref","is-referenced-by-count":26,"title":["Learning spectro-temporal features with 3D CNNs for speech emotion recognition"],"prefix":"10.1109","author":[{"given":"Jaebok","family":"Kim","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Khiet P.","family":"Truong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gwenn","family":"Englebienne","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vanessa","family":"Evers","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref11","first-page":"1995","article-title":"Convolutional networks for images, speech, and time series","volume":"3361","author":"lecun","year":"1995","journal-title":"The Handbook of Brain Theory and Neural Networks"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-736"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123353"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"ref15","article-title":"Emotional prosody speech and transcripts","author":"liberman","year":"2002","journal-title":"Linguistic Data Consortium Philadelphia"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICDEW.2006.145"},{"key":"ref17","first-page":"1517","article-title":"A database of german emotional speech","volume":"5","author":"burkhardt","year":"2005","journal-title":"Proceedings of INTER-SPEECH"},{"key":"ref18","article-title":"You stupid tin box-children interacting with the aibo robot: A cross-linguistic emotional speech corpus","author":"batliner","year":"2004","journal-title":"Proceedings of LREC"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref28","first-page":"2579","article-title":"Visualizing data using t-sne","volume":"9","author":"maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638344"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2360798"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref29","first-page":"550","article-title":"Residual networks behave like ensembles of relatively shallow networks","author":"veit","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref5","article-title":"High-level feature representation using recurrent neural network for speech emotion recognition","author":"lee","year":"2015","journal-title":"Proceedings of Interspeech"},{"key":"ref8","author":"anand","year":"0","journal-title":"Convoluted feelings convolutional and recurrent nets for detecting emotion from audio data"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-692"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2015.7344669"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"ref1","article-title":"Speech emotion recognition using deep neural network and extreme learning machine","author":"kun han","year":"2011","journal-title":"Proceedings of Interspeech"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2010.5583006"},{"key":"ref22","first-page":"19","article-title":"&#x2018;feeltrace&#x2019;: An instrument for recording perceived emotion in real time","author":"cowie","year":"2000","journal-title":"ISCA Tutorial and Research Workshop (ITRW) on Speech and Emotion"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2013.6553805"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(98)00010-0"},{"key":"ref23","author":"kingma","year":"2014","journal-title":"Adam A method for stochastic optimization"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-04898-2_420"},{"key":"ref25","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"Journal of Machine Learning Research"}],"event":{"name":"2017 Seventh International Conference on Affective Computing and Intelligent Interaction (ACII)","location":"San Antonio, TX","start":{"date-parts":[[2017,10,23]]},"end":{"date-parts":[[2017,10,26]]}},"container-title":["2017 Seventh International Conference on Affective Computing and Intelligent Interaction (ACII)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8263545\/8273570\/08273628.pdf?arnumber=8273628","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,3,12]],"date-time":"2018-03-12T21:54:52Z","timestamp":1520891692000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8273628\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/acii.2017.8273628","relation":{},"subject":[],"published":{"date-parts":[[2017,10]]}}}