{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T13:23:06Z","timestamp":1775913786473,"version":"3.50.1"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51379044"],"award-info":[{"award-number":["51379044"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2019]]},"DOI":"10.1109\/access.2019.2938007","type":"journal-article","created":{"date-parts":[[2019,8,28]],"date-time":"2019-08-28T19:56:47Z","timestamp":1567022207000},"page":"125868-125881","source":"Crossref","is-referenced-by-count":309,"title":["Speech Emotion Recognition From 3D Log-Mel Spectrograms With Deep Learning Network"],"prefix":"10.1109","volume":"7","author":[{"given":"Hao","family":"Meng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1851-6075","authenticated-orcid":false,"given":"Tianhao","family":"Yan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3985-0726","authenticated-orcid":false,"given":"Fei","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Hongwei","family":"Wei","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc 31st Conf Neural Inf Process Syst (NIPS)"},{"key":"ref38","article-title":"Recurrent highway networks","author":"zilly","year":"2016","journal-title":"arXiv 1607 03474"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref32","first-page":"1766","article-title":"Estimating phoneme class conditional probabilities from raw speech signal using convolutional neural networks","author":"palaz","year":"2013","journal-title":"Proc Conf Int Speech Commun Assoc"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2003.1224004"},{"key":"ref30","article-title":"Multi-scale context aggregation by dilated convolutions","author":"yu","year":"2015","journal-title":"arXiv 1511 07122"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472780"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref35","article-title":"Deep residual learning for image recognition","author":"he","year":"2015","journal-title":"arXiv 1512 03385"},{"key":"ref34","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"arXiv 1502 03167"},{"key":"ref10","first-page":"223","article-title":"Speech emotion recognition using deep neural network and extreme learning machine","author":"han","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2018.2860246"},{"key":"ref13","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref14","article-title":"An empirical evaluation of generic convolutional and recurrent networks for sequence modeling","author":"bai","year":"2018","journal-title":"arXiv 1803 01271"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2015.7344669"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1132"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1432"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1477"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2360798"},{"key":"ref4","first-page":"3201","article-title":"The INTERSPEECH 2011 speaker state challenge","author":"schuller","year":"2011","journal-title":"Proc ISCA Interspeech"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654984"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2010.09.020"},{"key":"ref6","first-page":"148","article-title":"The INTERSPEECH 2013 computational paralinguistics challenge: Social signals, conflict, emotion, autism","author":"schuller","year":"2013","journal-title":"Proc ISCA Interspeech"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2009.5349350"},{"key":"ref5","first-page":"1","article-title":"The INTERSPEECH 2012 speaker trait challenge","author":"schuller","year":"2012","journal-title":"Proc ISCA Interspeech"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2017.10.016"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2018.11.028"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-018-9491-z"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2017.06.006"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2432810"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1049\/iet-spr.2013.0446"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2581"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/FIT.2018.00023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1353"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1811"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1858"},{"key":"ref42","article-title":"TensorFlow: Large-scale machine learning on heterogeneous distributed systems","author":"abadi","year":"2016","journal-title":"arXiv 1603 04467"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2018.08.035"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123353"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638346"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2015.7280605"},{"key":"ref43","doi-asserted-by":"crossref","first-page":"1517","DOI":"10.21437\/Interspeech.2005-446","article-title":"A database of German emotional speech","volume":"5","author":"burkhardt","year":"2005","journal-title":"Proc INTERSPEECH"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2017.8273628"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8600701\/08817913.pdf?arnumber=8817913","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,26]],"date-time":"2022-09-26T20:50:39Z","timestamp":1664225439000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8817913\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/access.2019.2938007","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]}}}