{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:28:57Z","timestamp":1775230137301,"version":"3.50.1"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1109\/apsipaasc63619.2025.10848773","type":"proceedings-article","created":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T18:37:05Z","timestamp":1738003025000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["A Study on Multimodal Fusion and Layer Adapter in Emotion Recognition"],"prefix":"10.1109","author":[{"given":"Xiaohan","family":"Shi","sequence":"first","affiliation":[{"name":"Nagoya University,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Gao","sequence":"additional","affiliation":[{"name":"Kyoto University,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajun","family":"He","sequence":"additional","affiliation":[{"name":"Nagoya University,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinyi","family":"Mi","sequence":"additional","affiliation":[{"name":"Nagoya University,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xingfeng","family":"Li","sequence":"additional","affiliation":[{"name":"City University of Macau, Macau"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tomoki","family":"Toda","sequence":"additional","affiliation":[{"name":"Nagoya University,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/79.911197"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2764438"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376680"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3607865.3613182"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MIPRO.2016.7522336"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053648"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2018.07.041"},{"key":"ref8","first-page":"45","article-title":"Multi-modal emotion recognition from speech and text","author":"Chuang","year":"2004","journal-title":"International Journal of Computational Linguistics & Chinese Language Processing, Volume 9, Number 2, August 2004: Special Issue on New Trends of Speech and Language Processing"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747095"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2022-57"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s00530-010-0182-0"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054709"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2193"},{"key":"ref15","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International conference on machine learning","author":"Radford"},{"key":"ref16","article-title":"Roberta: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446548"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747723"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094839"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3247822"},{"issue":"11","key":"ref22","article-title":"Visualizing data using t-sne","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"Journal of machine learning research"},{"key":"ref23","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"issue":"1","key":"ref24","first-page":"1929","article-title":"Dropout: A simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"The journal of machine learning research"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1236"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1820"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/taffc.2023.3290795"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3289312"}],"event":{"name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","location":"Macau, Macao","start":{"date-parts":[[2024,12,3]]},"end":{"date-parts":[[2024,12,6]]}},"container-title":["2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10848542\/10848533\/10848773.pdf?arnumber=10848773","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,28]],"date-time":"2025-01-28T06:15:51Z","timestamp":1738044951000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10848773\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc63619.2025.10848773","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]}}}