{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T20:21:37Z","timestamp":1771705297074,"version":"3.50.1"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/smc53992.2023.10394418","type":"proceedings-article","created":{"date-parts":[[2024,1,29]],"date-time":"2024-01-29T18:32:04Z","timestamp":1706553124000},"page":"4134-4141","source":"Crossref","is-referenced-by-count":10,"title":["Multimodal Speech Emotion Recognition Using Modality-Specific Self-Supervised Frameworks"],"prefix":"10.1109","author":[{"given":"Rutherford Agbeshi","family":"Patamia","sequence":"first","affiliation":[{"name":"School of Information and Software Engineering, University of Electronic Science and Technology of China,Chengdu,SC,China,610054"}]},{"given":"Paulo E.","family":"Santos","sequence":"additional","affiliation":[{"name":"Flinders University,Centre for Defence Engineering, Research and Training College of Science Engineering,Tonsley,SA,Australia,5042"}]},{"given":"Kingsley Nketia","family":"Acheampong","sequence":"additional","affiliation":[{"name":"School of Information and Software Engineering, University of Electronic Science and Technology of China,Chengdu,SC,China,610054"}]},{"given":"Favour","family":"Ekong","sequence":"additional","affiliation":[{"name":"School of Information and Software Engineering, University of Electronic Science and Technology of China,Chengdu,SC,China,610054"}]},{"given":"Kwabena","family":"Sarpong","sequence":"additional","affiliation":[{"name":"School of Information and Software Engineering, University of Electronic Science and Technology of China,Chengdu,SC,China,610054"}]},{"given":"She","family":"Kun","sequence":"additional","affiliation":[{"name":"School of Information and Software Engineering, University of Electronic Science and Technology of China,Chengdu,SC,China,610054"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IWCMC51323.2021.9498858"},{"key":"ref2","volume-title":"Mutual impact of acoustic and linguistic representations for continuous emotion recognition in call-center conversations","author":"Tahon","year":"2021"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICSIGSYS.2019.8811080"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-022-10007-5"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"Goodfellow","year":"2015","journal-title":"Nature"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10101163"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0196391"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1050"},{"key":"ref11","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"abs\/2006.11477","author":"Baevski","year":"2020","journal-title":"ArXiv"},{"key":"ref12","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","volume":"abs\/1810.04805","author":"Devlin","year":"2019","journal-title":"ArXiv"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/PRML52754.2021.9520692"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-6329-4_10"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054621"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2990405"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3007"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-703"},{"key":"ref19","article-title":"Exploring wav2vec 2.0 fine- tuning for improved speech emotion recognition","volume":"abs\/2110.06309","author":"Chen","year":"2021","journal-title":"ArXiv"},{"key":"ref20","article-title":"Layer normalization","volume":"abs\/1607.06450","author":"Ba","year":"2016","journal-title":"ArXiv"},{"key":"ref21","article-title":"Gaussian error linear units (gelus)","author":"Hendrycks","year":"2016","journal-title":"arXiv: Learning"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1212"},{"key":"ref24","volume-title":"Multi-modal emotion recognition on iemocap with neural networks","author":"Tripathi","year":"2018"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2984368"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-1190"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICUS52573.2021.9641435"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2067"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3156"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9688036"}],"event":{"name":"2023 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","location":"Honolulu, Oahu, HI, USA","start":{"date-parts":[[2023,10,1]]},"end":{"date-parts":[[2023,10,4]]}},"container-title":["2023 IEEE International Conference on Systems, Man, and Cybernetics (SMC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10391856\/10393862\/10394418.pdf?arnumber=10394418","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T13:13:38Z","timestamp":1706793218000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10394418\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/smc53992.2023.10394418","relation":{},"subject":[],"published":{"date-parts":[[2023,10,1]]}}}