{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T00:18:19Z","timestamp":1775607499556,"version":"3.50.1"},"reference-count":34,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639583","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T18:36:34Z","timestamp":1550169394000},"page":"112-118","source":"Crossref","is-referenced-by-count":251,"title":["Multimodal Speech Emotion Recognition Using Audio and Text"],"prefix":"10.1109","author":[{"given":"Seunghyun","family":"Yoon","sequence":"first","affiliation":[]},{"given":"Seokhyun","family":"Byun","sequence":"additional","affiliation":[]},{"given":"Kyomin","family":"Jung","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref32","article-title":"Exact solutions to the nonlinear dynamics of learning in deep linear neural networks","author":"saxe","year":"2013","journal-title":"arXiv preprint arXiv 1312 6120"},{"key":"ref31","doi-asserted-by":"crossref","first-page":"1263","DOI":"10.21437\/Interspeech.2017-917","article-title":"Attentive convolutional neural network based speech emotion recognition: A study on the impact of input features, signal length, and acted speech","author":"neumann","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref30","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"chung","year":"2014","journal-title":"arXiv preprint arXiv 1412 3555"},{"key":"ref34","article-title":"High-level feature representation using recurrent neural network for speech emotion recognition","author":"lee","year":"2015","journal-title":"Sixteenth Annual Conference of the International Speech Communication Association"},{"key":"ref10","year":"2018","journal-title":"API Microsoft Speech"},{"key":"ref11","first-page":"180","article-title":"Constructing the affective lexicon ontology","volume":"27","author":"xu","year":"2008","journal-title":"Journal of the China Society for Scientific and Technical Information"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/KST.2013.6512793"},{"key":"ref13","first-page":"i","article-title":"Hidden markov model-based speech emotion recognition","volume":"1","author":"schuller","year":"2003","journal-title":"Proceedings of the 2003 International Conference on Multimedia and Expo 2003"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2011.06.004"},{"key":"ref15","article-title":"Speech emotion recognition using deep neural network and extreme learning machine","author":"han","year":"2014","journal-title":"Fifteenth Annual Conference of the International Speech Communication Association"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953131"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/PlatCon.2017.7883728"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952655"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.3115\/1219044.1219075"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502224"},{"key":"ref3","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","author":"amodei","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-2008"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref5","article-title":"Emotional chatting machine: Emotional conversation generation with internal and external memory","author":"zhou","year":"2018"},{"key":"ref8","author":"yu","year":"2016","journal-title":"Automatic Speech Recognition"},{"key":"ref7","first-page":"110","article-title":"Toward effective automatic recognition systems of emotion in speech","author":"busso","year":"2014","journal-title":"Social Emotions in Nature and Artifact"},{"key":"ref2","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2014","journal-title":"arXiv preprint arXiv 1409 0473"},{"key":"ref9","year":"2018","journal-title":"Cloud speech-to-text"},{"key":"ref1","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"1089","DOI":"10.21437\/Interspeech.2017-200","article-title":"Efficient emotion recognition from speech using deep learning on spectrograms","author":"satt","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"1098","DOI":"10.21437\/Interspeech.2017-1637","article-title":"Progressive neural networks for transfer learning in emotion recognition","author":"gideon","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref21","article-title":"Towards speech emotion recognition&#x201D; in the wild&#x201D; using aggregated corpora and deep multi-task learning","author":"kim","year":"2017","journal-title":"18th Annual Conference of the International Speech Communication Association INTERSPEECH 2017 Situated interaction"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953274"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2004.1326051"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472176"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","location":"Athens, Greece","start":{"date-parts":[[2018,12,18]]},"end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639583.pdf?arnumber=8639583","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T00:57:50Z","timestamp":1643245070000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639583\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639583","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}