{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T03:20:39Z","timestamp":1740108039344,"version":"3.37.3"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2018,12,11]],"date-time":"2018-12-11T00:00:00Z","timestamp":1544486400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["TRR-169"],"award-info":[{"award-number":["TRR-169"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2020,2]]},"DOI":"10.1007\/s00521-018-3869-3","type":"journal-article","created":{"date-parts":[[2018,12,11]],"date-time":"2018-12-11T08:08:55Z","timestamp":1544515735000},"page":"1007-1022","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Understanding auditory representations of emotional expressions with neural networks"],"prefix":"10.1007","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5155-5714","authenticated-orcid":false,"given":"Iris","family":"Wieser","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pablo","family":"Barros","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stefan","family":"Heinrich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,12,11]]},"reference":[{"issue":"10","key":"3869_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid O, Mohamed A, Jiang H, Deng L, Penn G, Yu D (2014) Convolutional neural networks for speech recognition. Audio Speech Lang Process 22(10):1533\u20131545","journal-title":"Audio Speech Lang Process"},{"key":"3869_CR2","doi-asserted-by":"crossref","unstructured":"Barros P, Weber C, Wermter S (2016) Learning auditory neural representations for emotion recognition. In: Proceedings of the 2016 international joint conference on neural networks. Vancouver, pp 921\u2013928","DOI":"10.1109\/IJCNN.2016.7727297"},{"key":"3869_CR3","doi-asserted-by":"crossref","unstructured":"Bengio Y, Boulanger-Lewandowski N, Pascanu R (2013) Advances in optimizing recurrent networks. In: Proceedings of the 2013 IEEE international conference on acoustics, speech and signal processing, pp 8624\u20138628","DOI":"10.1109\/ICASSP.2013.6639349"},{"key":"3869_CR4","unstructured":"Bergstra J S, Bardenet R, Bengio Y, K\u00e9gl B (2011) Algorithms for hyper-parameter optimization. In: Advances in neural information processing systems, pp 2546\u20132554"},{"key":"3869_CR5","doi-asserted-by":"crossref","unstructured":"Bergstra J, Yamins D, Cox DD (2013) Hyperopt: a python library for optimizing the hyperparameters of machine learning algorithms. In: Proceedings of the 12th python in science conference, pp 13\u201320","DOI":"10.25080\/Majora-8b375195-003"},{"issue":"4","key":"3869_CR6","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/s10579-008-9076-6","volume":"42","author":"C Busso","year":"2008","unstructured":"Busso C, Bulut M, Lee CC, Kazemzadeh A, Mower E, Kim S, Chang JN, Lee S, Narayanan S (2008) IEMOCAP: interactive emotional dyadic motion capture database. Lang Resour Eval 42(4):335\u2013359","journal-title":"Lang Resour Eval"},{"key":"3869_CR7","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1093\/acprof:oso\/9780195387643.003.0008","volume-title":"Social emotions in nature and artifact: emotions in human and human-computer interaction","author":"C Busso","year":"2013","unstructured":"Busso C, Bulut M, Narayanan S (2013) Toward effective automatic recognition systems of emotion in speech. In: Gratch J, Marsella S (eds) Social emotions in nature and artifact: emotions in human and human-computer interaction. Oxford University Press, New York, pp 110\u2013127"},{"key":"3869_CR8","doi-asserted-by":"crossref","unstructured":"Chang J, Scherer S (2017) Learning representations of emotional speech with deep convolutional generative adversarial networks. In: Proceedings of the IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 2746\u20132750","DOI":"10.1109\/ICASSP.2017.7952656"},{"key":"3869_CR9","doi-asserted-by":"crossref","unstructured":"Ciregan D, Meier U, Schmidhuber J (2012) Multi-column deep neural networks for image classification. In: Proceedings of the 2012 IEEE conference on computer vision and pattern recognition, pp 3642\u20133649","DOI":"10.1109\/CVPR.2012.6248110"},{"issue":"1","key":"3869_CR10","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/79.911197","volume":"18","author":"R Cowie","year":"2001","unstructured":"Cowie R, Douglas-Cowie E, Tsapatsoulis N, Votsis G, Kollias S, Fellenz W, Taylor JG (2001) Emotion recognition in human\u2013computer interaction. IEEE Signal Process Mag 18(1):32\u201380","journal-title":"IEEE Signal Process Mag"},{"issue":"4","key":"3869_CR11","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis S, Mermelstein P (1980) Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans Acoust Speech Signal Process 28(4):357\u2013366","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"3869_CR12","doi-asserted-by":"crossref","unstructured":"Dhall A, Goecke R, Joshi J, Sikka K, Gedeon T (2014) Emotion recognition in the wild challenge 2014: baseline, data and protocol. In: Proceedings of the 16th international conference on multimodal interaction, pp 461\u2013466","DOI":"10.1145\/2663204.2666275"},{"key":"3869_CR13","doi-asserted-by":"crossref","unstructured":"Dhall A, Ramana\u00a0Murthy OV, Goecke R, Joshi J, Gedeon T (2015) Video and image based emotion recognition challenges in the wild: Emotiw 2015. In: Proceedings of the 17th international conference on multimodal interaction, pp 423\u2013426","DOI":"10.1145\/2818346.2829994"},{"key":"3869_CR14","first-page":"19","volume":"3","author":"P Ekman","year":"1984","unstructured":"Ekman P (1984) Expression and the nature of emotion. Approaches Emot 3:19\u2013344","journal-title":"Approaches Emot"},{"issue":"3\u20134","key":"3869_CR15","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1080\/02699939208411068","volume":"6","author":"P Ekman","year":"1992","unstructured":"Ekman P (1992) An argument for basic emotions. Cognit Emot 6(3\u20134):169\u2013200","journal-title":"Cognit Emot"},{"issue":"3","key":"3869_CR16","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M Ayadi El","year":"2011","unstructured":"El Ayadi M, Kamel MS, Karray F (2011) Survey on speech emotion recognition: features, classification schemes, and databases. Pattern Recognit 44(3):572\u2013587","journal-title":"Pattern Recognit"},{"key":"3869_CR17","first-page":"1341","volume-title":"Visualizing higher-layer features of a deep network","author":"D Erhan","year":"2009","unstructured":"Erhan D, Bengio Y, Courville A, Vincent P (2009) Visualizing higher-layer features of a deep network. University of Montreal, Montreal, p 1341"},{"key":"3869_CR18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-27299-3","volume-title":"Real-time speech and music classification by large audio feature space extraction","author":"F Eyben","year":"2016","unstructured":"Eyben F (2016) Real-time speech and music classification by large audio feature space extraction. Springer, Berlin"},{"key":"3869_CR19","doi-asserted-by":"crossref","unstructured":"Eyben F, W\u00f6llmer M, Schuller B (2010) Opensmile: the Munich versatile and fast open-source audio feature extractor. In: Proceedings of the 18th ACM international conference on multimedia, pp 1459\u20131462","DOI":"10.1145\/1873951.1874246"},{"key":"3869_CR20","unstructured":"Fernandez R (2003) A computational model for the automatic recognition of affect in speech. Ph.D. thesis, Massachusetts Institute of Technology"},{"issue":"1","key":"3869_CR21","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1145\/1882471.1882479","volume":"12","author":"G Forman","year":"2010","unstructured":"Forman G, Scholz M (2010) Apples-to-apples in cross-validation studies: pitfalls in classifier performance measurement. ACM SIGKDD Explor. Newsl. 12(1):49\u201357","journal-title":"ACM SIGKDD Explor. Newsl."},{"key":"3869_CR22","doi-asserted-by":"crossref","unstructured":"Gao Y, Li B, Wang N, Zhu T (2017) Speech emotion recognition using local and global features. In: International conference on brain informatics. Springer, pp 3\u201313","DOI":"10.1007\/978-3-319-70772-3_1"},{"key":"3869_CR23","doi-asserted-by":"crossref","unstructured":"Ghosh S, Laksana E, Morency LP (2016) Representation learning for speech emotion recognition. In: Proceedings interspeech, pp 3603\u20133607","DOI":"10.21437\/Interspeech.2016-692"},{"key":"3869_CR24","first-page":"249","volume":"9","author":"X Glorot","year":"2010","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. Aistats 9:249\u2013256","journal-title":"Aistats"},{"key":"3869_CR25","first-page":"315","volume":"15","author":"X Glorot","year":"2011","unstructured":"Glorot X, Bordes A, Bengio Y (2011) Deep sparse rectifier neural networks. Aistats 15:315\u2013323","journal-title":"Aistats"},{"key":"3869_CR26","unstructured":"Golik P, Doetsch P, Ney H (2013) Cross-entropy vs. squared error training: a theoretical and experimental comparison. In: Proceedings of the 2013 Interspeech, pp 1756\u20131760"},{"issue":"1","key":"3869_CR27","doi-asserted-by":"publisher","first-page":"68","DOI":"10.4018\/jse.2010101605","volume":"1","author":"H Gunes","year":"2010","unstructured":"Gunes H, Pantic M (2010) Automatic, dimensional and continuous emotion recognition. Int J Synth Emot 1(1):68\u201399. https:\/\/doi.org\/10.4018\/jse.2010101605","journal-title":"Int J Synth Emot"},{"key":"3869_CR28","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Delving deep into rectifiers: surpassing human-level performance on imagenet classification. In: Proceedings of the 2015 IEEE international conference on computer vision, pp 1026\u20131034","DOI":"10.1109\/ICCV.2015.123"},{"key":"3869_CR29","unstructured":"Hinton GE, Srivastava N, Krizhevsky A, Sutskever I, Salakhutdinov RR (2012) Improving neural networks by preventing co-adaptation of feature detectors. arXiv:1207.0580"},{"key":"3869_CR30","doi-asserted-by":"crossref","unstructured":"Huang Z, Dong M, Mao Q, Zhan Y (2014) Speech emotion recognition using CNN. In: Proceedings of the ACM international conference on multimedia, pp 801\u2013804","DOI":"10.1145\/2647868.2654984"},{"key":"3869_CR31","doi-asserted-by":"crossref","unstructured":"Huang J, Li Y, Tao J, Lian Z, Niu M, Yi J (2018) Speech emotion recognition using semi-supervised learning with ladder networks. In: Proceedings Asian conference on affective computing and intelligent interaction (ACII Asia), pp 1\u20135","DOI":"10.1109\/ACIIAsia.2018.8470363"},{"issue":"3","key":"3869_CR32","doi-asserted-by":"publisher","first-page":"574","DOI":"10.1113\/jphysiol.1959.sp006308","volume":"148","author":"DH Hubel","year":"1959","unstructured":"Hubel DH, Wiesel TN (1959) Receptive fields of single neurones in the cat\u2019s striate cortex. J Physiol 148(3):574\u2013591","journal-title":"J Physiol"},{"key":"3869_CR33","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167"},{"key":"3869_CR34","doi-asserted-by":"crossref","unstructured":"Jin Q, Li C, Chen S, Wu H (2015) Speech emotion recognition with acoustic and lexical features. In: Proceedings of the 2015 IEEE international conference on acoustics, speech and signal processing, pp 4749\u20134753","DOI":"10.1109\/ICASSP.2015.7178872"},{"key":"3869_CR35","doi-asserted-by":"crossref","unstructured":"Keren G, Schuller BW (2016) Convolutional RNN: an enhanced model for extracting features from sequential data. arXiv:1602.05875","DOI":"10.1109\/IJCNN.2016.7727636"},{"key":"3869_CR36","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10772-011-9125-1","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi SG, Rao KS (2012) Emotion recognition from speech: a review. Int J Speech Technol (Springer) 15:99\u2013117","journal-title":"Int J Speech Technol (Springer)"},{"key":"3869_CR37","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp 1097\u20131105"},{"key":"3869_CR38","unstructured":"Lakomkin E, Weber C, Magg S, Wermter S (2018) Reusing neural speech representations for auditory emotion recognition. arXiv:1803.11508"},{"issue":"7553","key":"3869_CR39","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521(7553):436\u2013444","journal-title":"Nature"},{"issue":"4","key":"3869_CR40","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun Y, Boser B, Denker JS, Henderson D, Howard RE, Hubbard W, Jackel LD (1989) Backpropagation applied to handwritten zip code recognition. Neural Comput 1(4):541\u2013551","journal-title":"Neural Comput"},{"issue":"11","key":"3869_CR41","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132324","journal-title":"Proc IEEE"},{"key":"3869_CR42","doi-asserted-by":"publisher","first-page":"470","DOI":"10.1007\/978-3-319-14442-9_52","volume-title":"MultiMedia Modeling","author":"Mengmeng Liu","year":"2015","unstructured":"Liu M, Chen H, Li Y, Zhang F (2015) Emotional tone-based audio continuous emotion recognition. In: International conference on multimedia modeling, pp 470\u2013480"},{"key":"3869_CR43","unstructured":"Metallinou A, Narayanan S (2013) Annotation and processing of continuous emotional attributes: challenges and opportunities. In: Proceedings of the 2013 IEEE international conference and workshops on automatic face and gesture recognition, pp 1\u20138"},{"key":"3869_CR44","unstructured":"Muller U, Ben J, Cosatto E, Flepp B, Cun YL (2005) Off-road obstacle avoidance through end-to-end learning. In: Advances in neural information processing systems, pp 739\u2013746"},{"key":"3869_CR45","first-page":"372","volume":"27","author":"Y Nesterov","year":"1983","unstructured":"Nesterov Y (1983) A method of solving a convex programming problem with convergence rate O (1\/k2). Soviet Math Doklady 27:372\u2013376","journal-title":"Soviet Math Doklady"},{"key":"3869_CR46","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1016\/j.neuroimage.2014.07.063","volume":"102","author":"L Nummenmaa","year":"2014","unstructured":"Nummenmaa L, Saarim\u00e4ki H, Glerean E, Gotsopoulos A, J\u00e4\u00e4skel\u00e4inen IP, Hari R, Sams M (2014) Emotional speech synchronizes brains across listeners and engages large-scale dynamic brain networks. NeuroImage 102:498\u2013509","journal-title":"NeuroImage"},{"key":"3869_CR47","doi-asserted-by":"publisher","first-page":"56","DOI":"10.25125\/engineering-journal-IJOER-JUN-2017-9","volume":"3","author":"P Patel","year":"2017","unstructured":"Patel P, Chaudhari A, Kale R, Pund M (2017) Emotion recognition from speech with gaussian mixture models and via boosted GMM. Int J Res Sci Eng 3:56\u201364","journal-title":"Int J Res Sci Eng"},{"issue":"3","key":"3869_CR48","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1177\/002383096000300301","volume":"3","author":"I Pollack","year":"1960","unstructured":"Pollack I, Rubenstein H, Horowitz A (1960) Communication of verbal modes of expression. Lang Speech 3(3):121\u2013130","journal-title":"Lang Speech"},{"key":"3869_CR49","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.inffus.2017.02.003","volume":"37","author":"S Poria","year":"2017","unstructured":"Poria S, Cambria E, Bajpai R, Hussain A (2017) A review of affective computing: from unimodal analysis to multimodal fusion. Inf Fusion 37:98\u2013125","journal-title":"Inf Fusion"},{"key":"3869_CR50","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1016\/j.patrec.2014.11.007","volume":"66","author":"F Ringeval","year":"2015","unstructured":"Ringeval F, Eyben F, Kroupi E, Yuce A, Thiran JP, Ebrahimi T, Lalanne D, Schuller BW (2015) Prediction of asynchronous dimensional emotion ratings from audiovisual and physiological data. Pattern Recognit Lett 66:22\u201330","journal-title":"Pattern Recognit Lett"},{"key":"3869_CR51","doi-asserted-by":"crossref","unstructured":"Sainath TN, Weiss RJ, Senior A, Wilson KW, Vinyals O (2015) Learning the speech front-end with raw waveform CLDNNs. In: Proceedings of the 2015 Interspeech, pp 1\u20135","DOI":"10.21437\/Interspeech.2015-1"},{"key":"3869_CR52","series-title":"Series in affective science, handbook of affective sciences","first-page":"433","volume-title":"Vocal expression of emotion","author":"KR Scherer","year":"2003","unstructured":"Scherer KR, Johnstone T, Klasmeyer G (2003) Vocal expression of emotion series in affective Science, handbook of affective sciences. Oxford University Press, New York, pp 433\u2013456"},{"key":"3869_CR53","doi-asserted-by":"crossref","unstructured":"Schr\u00f6der M, Cowie R, Douglas-Cowie E, Westerdijk M, Gielen S (2001) Acoustic correlates of emotion dimensions in view of speech synthesis. In: Proceedings of the 2011 Interspeech, pp 87\u201390","DOI":"10.21437\/Eurospeech.2001-34"},{"key":"3869_CR54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-36806-6","volume-title":"Intelligent audio analysis","author":"BW Schuller","year":"2013","unstructured":"Schuller BW (2013) Intelligent audio analysis. Springer, Berlin"},{"issue":"9","key":"3869_CR55","doi-asserted-by":"publisher","first-page":"1062","DOI":"10.1016\/j.specom.2011.01.011","volume":"53","author":"BW Schuller","year":"2011","unstructured":"Schuller BW, Batliner A, Steidl S, Seppi D (2011) Recognising realistic emotions and affect in speech: state of the art and lessons learnt from the first challenge. Speech Commun 53(9):1062\u20131087","journal-title":"Speech Commun"},{"key":"3869_CR56","doi-asserted-by":"crossref","unstructured":"Schuller BW, Steidl S, Batliner A, Burkhardt F, Devillers L, M\u00fcller CA, Narayanan S et al (2010) The INTERSPEECH 2010 paralinguistic challenge. In: Proceedings of the 2010 Interspeech, pp 2795\u20132798","DOI":"10.21437\/Interspeech.2010-739"},{"key":"3869_CR57","unstructured":"Simonyan K, Vedaldi A, Zisserman A (2013) Deep inside convolutional networks: visualising image classification models and saliency maps. ArXiv:1312.6034"},{"key":"3869_CR58","first-page":"32","volume":"29","author":"P Song","year":"2018","unstructured":"Song P, Zheng W (2018) Feature selection based transfer subspace learning for speech emotion recognition. IEEE Trans Affect Comput 29:32\u201357","journal-title":"IEEE Trans Affect Comput"},{"key":"3869_CR59","unstructured":"Springenberg JT, Dosovitskiy A, Brox T, Riedmiller M (2014) Striving for simplicity: the all convolutional net. ArXiv:1412.6806"},{"key":"3869_CR60","doi-asserted-by":"crossref","unstructured":"Sun B, Li L, Zuo T, Chen Y, Zhou G, Wu X (2014) Combining multimodal features with hierarchical classifier fusion for emotion recognition in the wild. In: Proceedings of the 16th international conference on multimodal interaction, pp 481\u2013486","DOI":"10.1145\/2663204.2666272"},{"key":"3869_CR61","unstructured":"Sutskever I (2013) Training recurrent neural networks. Ph.D. thesis, University of Toronto"},{"key":"3869_CR62","doi-asserted-by":"crossref","unstructured":"Trigeorgis G, Ringeval F, Brueckner R, Marchi E, Nicolaou MA, Schuller BW, Zafeiriou S (2016) Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network. In: Proceedings 41st IEEE international conference on acoustics, speech, and signal processing, ICASSP, pp 5200\u20135204","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"3869_CR63","doi-asserted-by":"crossref","unstructured":"Truong KP, Leeuwen DA, Neerincx MA, Jong FM (2009) Arousal and valence prediction in spontaneous emotional speech: felt versus perceived emotion. In: Proceedings of the 2009 Interspeech, pp 2027\u20132030","DOI":"10.21437\/Interspeech.2009-583"},{"key":"3869_CR64","first-page":"4","volume":"vol","author":"F Weninger","year":"2013","unstructured":"Weninger F, Eyben F, Schuller B, Mortillaro M, Scherer K (2013) On the acoustics of emotion in audio: what speech, music, and sound have in common. Front Emot Sci vol:4","journal-title":"Front Emot Sci"},{"key":"3869_CR65","doi-asserted-by":"crossref","unstructured":"W\u00f6llmer M, Eyben F, Reiter S, Schuller BW, Cox C, Douglas-Cowie E, Cowie R et al (2008) Abandoning emotion classes-towards continuous emotion recognition with modelling of long-range dependencies. In: Proceedings of the 2008 Interspeech, pp 597\u2013600","DOI":"10.21437\/Interspeech.2008-192"},{"key":"3869_CR66","unstructured":"Wu YT, Chen HY, Liao YH, Kuo LW, Lee CC (2017) Modeling perceivers neural-responses using lobe-dependent convolutional neural network to improve speech emotion recognition. In: Proceedings of the Interspeech, pp 3261\u20133265"},{"key":"3869_CR67","doi-asserted-by":"publisher","unstructured":"Zheng WQ, Yu J, Zou Y (2015) An experimental study of speech emotion recognition based on deep convolutional neural networks. In: Proceedings of the 2015 international conference on affective computing and intelligent interaction, pp 827\u2013831 https:\/\/doi.org\/10.1109\/ACII.2015.7344669","DOI":"10.1109\/ACII.2015.7344669"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-018-3869-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-018-3869-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-018-3869-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,8]],"date-time":"2022-09-08T00:04:52Z","timestamp":1662595492000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-018-3869-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12,11]]},"references-count":67,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2020,2]]}},"alternative-id":["3869"],"URL":"https:\/\/doi.org\/10.1007\/s00521-018-3869-3","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2018,12,11]]},"assertion":[{"value":"2 December 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 November 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 December 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}