{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:43:11Z","timestamp":1775068991906,"version":"3.50.1"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea (NRF) grant funded by the Korea government","doi-asserted-by":"publisher","award":["NRF-2020R1A2B5B01002085"],"award-info":[{"award-number":["NRF-2020R1A2B5B01002085"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003725","name":"Basic Science Research Program through the National Research Foundation of Korea (NRF) funded by the Ministry of Education","doi-asserted-by":"publisher","award":["NRF-2017R1A4A1015559"],"award-info":[{"award-number":["NRF-2017R1A4A1015559"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/access.2020.2984368","type":"journal-article","created":{"date-parts":[[2020,3,30]],"date-time":"2020-03-30T22:19:07Z","timestamp":1585606747000},"page":"61672-61686","source":"Crossref","is-referenced-by-count":138,"title":["Multimodal Approach of Speech Emotion Recognition Using Multi-Level Multi-Head Fusion Attention-Based Recurrent Neural Network"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7539-2016","authenticated-orcid":false,"given":"Ngoc-Huynh","family":"Ho","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3024-5060","authenticated-orcid":false,"given":"Hyung-Jeong","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3575-5035","authenticated-orcid":false,"given":"Soo-Hyung","family":"Kim","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8756-1382","authenticated-orcid":false,"given":"Gueesang","family":"Lee","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2017.8273599"},{"key":"ref38","article-title":"CNNLSTM architecture for speech emotion recognition with data augmentation","author":"etienne","year":"2018","journal-title":"arXiv 1802 05630"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref31","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2014","journal-title":"arXiv 1409 0473"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502224"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"ref36","first-page":"5642","article-title":"Multi-attention recurrent network for human communication comprehension","author":"zadeh","year":"2018","journal-title":"Proc 32nd AAAI Conf Artif Intell"},{"key":"ref35","article-title":"EmotionLines: An emotion corpus of multi-party conversations","author":"chen","year":"2018","journal-title":"arXiv 1802 08379"},{"key":"ref34","article-title":"MELD: A multimodal multi-party dataset for emotion recognition in conversations","author":"poria","year":"2018","journal-title":"arXiv 1810 02508"},{"key":"ref10","first-page":"746","article-title":"Speech emotion recognition using hidden Markov models","author":"nogueiras","year":"2001","journal-title":"Proc 7th Eur Conf Speech Commun Technol"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-448"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.5120\/11872-7667"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-013-1377-z"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-014-1755-1"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2006.11.004"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2010.10.001"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2011.06.004"},{"key":"ref17","first-page":"223","article-title":"Speech emotion recognition using deep neural network and extreme learning machine","author":"han","year":"2014","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref18","first-page":"1","article-title":"High-level feature representation using recurrent neural network for speech emotion recognition","author":"lee","year":"2015","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-917"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1193"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992553"},{"key":"ref27","article-title":"Multi-task learning for multi-modal emotion recognition and sentiment analysis","author":"shad akhtar","year":"2019","journal-title":"arXiv 1905 05812"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2012.06.016"},{"key":"ref6","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016818"},{"key":"ref5","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2018","journal-title":"arXiv 1810 04805"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s005210070006"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.608022"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2010.09.020"},{"key":"ref9","first-page":"809","article-title":"Emotion recognition in spontaneous speech using GMMs","author":"neiberg","year":"2006","journal-title":"Proc Int Conf Spoken Lang Process"},{"key":"ref1","first-page":"60","article-title":"Classification of human emotion from Deap EEG signal using hybrid improved neural networks with cuckoo search","volume":"6","author":"sreeshakthy","year":"2016","journal-title":"Broad Research in Artificial Intelligence and Neuroscience"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-36808-1_72"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178872"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3302"},{"key":"ref48","article-title":"Gaussian error linear units (GELUs)","author":"hendrycks","year":"2016","journal-title":"arXiv 1606 08415"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3304"},{"key":"ref21","article-title":"Emotion recognition from speech with recurrent neural networks","author":"chernykh","year":"2017","journal-title":"arXiv 1701 08071"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639633"},{"key":"ref24","article-title":"Real-time emotion recognition via attention gated hierarchical memory network","author":"jiao","year":"2019","journal-title":"arXiv 1911 09075"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPA.2018.8659587"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/752"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1016"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-3303"},{"key":"ref43","article-title":"Multi-modal emotion recognition on IEMOCAP dataset using deep learning","author":"tripathi","year":"2018","journal-title":"arXiv 1804 05788"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-36711-4_26"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8948470\/09050806.pdf?arnumber=9050806","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T01:08:19Z","timestamp":1641949699000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9050806\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/access.2020.2984368","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}