{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T15:00:12Z","timestamp":1780498812625,"version":"3.54.1"},"reference-count":16,"publisher":"Springer Science and Business Media LLC","issue":"26","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Telecom Technology Center","award":["NTUTTTC-212P49"],"award-info":[{"award-number":["NTUTTTC-212P49"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-20409-2","type":"journal-article","created":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T04:25:34Z","timestamp":1730694334000},"page":"31093-31118","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Constructing multi-modal emotion recognition model based on convolutional neural network"],"prefix":"10.1007","volume":"84","author":[{"given":"Jong-Yih","family":"Kuo","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8488-261X","authenticated-orcid":false,"given":"Ti-Feng","family":"Hsieh","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ta-Yu","family":"Lin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,4]]},"reference":[{"issue":"10","key":"20409_CR1","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1109\/MC.2021.3092610","volume":"54","author":"A Holzinger","year":"2021","unstructured":"Holzinger A, M\u00fcller H (2021) Toward Human\u2013AI interfaces to support explainability and causability in medical AI. Computer 54(10):78\u201386. https:\/\/doi.org\/10.1109\/MC.2021.3092610","journal-title":"Computer"},{"issue":"11","key":"20409_CR2","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proceedings of the IEEE 86(11):2278\u20132324. https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proceedings of the IEEE"},{"issue":"1","key":"20409_CR3","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji S, Xu W, Yang M, Yu K (2013) 3D convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell 35(1):221\u2013231. https:\/\/doi.org\/10.1109\/TPAMI.2012.59","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"20409_CR4","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1016\/j.neucom.2019.12.076","volume":"386","author":"N Cudlenco","year":"2020","unstructured":"Cudlenco N, Popescu N, Leordeanu M (2020) Reading into the mind\u2019s eye: boosting automatic visual recognition with EEG signals. Neurocomputing 386:281\u2013292. https:\/\/doi.org\/10.1016\/j.neucom.2019.12.076","journal-title":"Neurocomputing"},{"key":"20409_CR5","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1016\/j.inffus.2018.06.003","volume":"46","author":"Y Ma","year":"2019","unstructured":"Ma Y, Hao Y, Chen M, Chen J, Lu P, Ko\u0161ir A (2019) Audiovisual emotion fusion (AVEF): a deep, efficient weighted approach. Inform Fusion 46:184\u2013192. https:\/\/doi.org\/10.1016\/j.inffus.2018.06.003","journal-title":"Inform Fusion"},{"key":"20409_CR6","doi-asserted-by":"publisher","unstructured":"Van Segbroeck M, Tsiartas A, Narayanan S (2013) A robust frontend for VAD: exploiting contextual, discriminative and spectral cues of human voice. In Proc Interspeech 704\u2013708. https:\/\/doi.org\/10.21437\/Interspeech.2013-198","DOI":"10.21437\/Interspeech.2013-198"},{"issue":"2","key":"20409_CR7","doi-asserted-by":"publisher","first-page":"756","DOI":"10.1109\/TAFFC.2019.2961089","volume":"13","author":"I Kansizoglou","year":"2019","unstructured":"Kansizoglou I, Bampis L, Gasteratos A (2019) An active learning paradigm for online audio-visual emotion recognition. IEEE Trans Affect Comput 13(2):756\u2013768. https:\/\/doi.org\/10.1109\/TAFFC.2019.2961089","journal-title":"IEEE Trans Affect Comput"},{"issue":"1","key":"20409_CR8","doi-asserted-by":"publisher","first-page":"1016","DOI":"10.1109\/TIE.2022.3150097","volume":"70","author":"L Chen","year":"2023","unstructured":"Chen L, Wang K, Li M, Wu M, Pedrycz W, Hirota K (2023) K-means clustering-based kernel canonical correlation analysis for multimodal emotion recognition in human\u2013robot interaction. IEEE Trans Industr Electron 70(1):1016\u20131024. https:\/\/doi.org\/10.1109\/TIE.2022.3150097","journal-title":"IEEE Trans Industr Electron"},{"issue":"11","key":"20409_CR9","doi-asserted-by":"publisher","first-page":"16359","DOI":"10.1007\/s11042-022-14185-0","volume":"82","author":"G Tang","year":"2023","unstructured":"Tang G, Xie Y, Li K, Liang R, Zhao L (2023) Multimodal emotion recognition from facial expression and speech based on feature fusion. Multimedia Tools Appl 82(11):16359\u201316373. https:\/\/doi.org\/10.1007\/s11042-022-14185-0","journal-title":"Multimedia Tools Appl"},{"key":"20409_CR10","doi-asserted-by":"publisher","unstructured":"Deng J, Guo J, Ververas E, Kotsia I, Zafeiriou S (2020) RetinaFace: Single-shot multi-level face localisation in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 5202\u20135211. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00525","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"20409_CR11","unstructured":"Andrew G, Arora R, Bilmes J, Livescu K (2013) Deep canonical correlation analysis. In: Proceedings of the 29th International Conference on Machine Learning 28(3):1247\u20131255"},{"key":"20409_CR12","doi-asserted-by":"publisher","unstructured":"Martin O, Kotsia I, Macq B, Pitas I (2006) The eNTERFACE\u201905 audio-visual emotion database. In Proceedings of the 22nd International conference on data engineering workshops, Atlanta, GA, USA. https:\/\/doi.org\/10.1109\/ICDEW.2006.145","DOI":"10.1109\/ICDEW.2006.145"},{"key":"20409_CR13","unstructured":"Krizhevsky A, Sutskever I, Hinton GE\u00a0(2012) ImageNet classification with deep convolutional neural networks. In: Advances in Neural Information Processing Systems (NIPS),\u00a025:1097\u20131105"},{"key":"20409_CR14","doi-asserted-by":"publisher","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV): 4489\u20134497. Santiago, Chile. https:\/\/doi.org\/10.1109\/ICCV.2015.510","DOI":"10.1109\/ICCV.2015.510"},{"key":"20409_CR15","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: Proceedings of the International Conference on Learning Representations (ICLR) abs\/1412.6980"},{"key":"20409_CR16","doi-asserted-by":"publisher","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp 4510\u20134520. https:\/\/doi.org\/10.1109\/CVPR.2018.00474","DOI":"10.1109\/CVPR.2018.00474"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20409-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-20409-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20409-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T03:25:17Z","timestamp":1754018717000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-20409-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":16,"journal-issue":{"issue":"26","published-online":{"date-parts":[[2025,8]]}},"alternative-id":["20409"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-20409-2","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"29 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 October 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 November 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}