{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T20:05:54Z","timestamp":1776456354273,"version":"3.51.2"},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2020,3,4]],"date-time":"2020-03-04T00:00:00Z","timestamp":1583280000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,3,4]],"date-time":"2020-03-04T00:00:00Z","timestamp":1583280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61872267"],"award-info":[{"award-number":["61872267"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s11042-020-08796-8","type":"journal-article","created":{"date-parts":[[2020,3,4]],"date-time":"2020-03-04T17:02:53Z","timestamp":1583341373000},"page":"16205-16214","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":28,"title":["Multi-modal feature fusion based on multi-layers LSTM for video emotion recognition"],"prefix":"10.1007","volume":"80","author":[{"given":"Weizhi","family":"Nie","sequence":"first","affiliation":[]},{"given":"Yan","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Dan","family":"Song","sequence":"additional","affiliation":[]},{"given":"Kun","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,3,4]]},"reference":[{"key":"8796_CR1","doi-asserted-by":"crossref","unstructured":"Chen C, Wu Z, Jiang YG (2016) Emotion in context: Deep semantic feature fusion for video emotion recognition. In: Proceedings of the 24th ACM International Conference on Multimedia, ACM, pp 127\u2013131","DOI":"10.1145\/2964284.2967196"},{"key":"8796_CR2","doi-asserted-by":"crossref","unstructured":"Dhall A, Goecke R, Joshi J, Wagner M, Gedeon T (2013) Emotion recognition in the wild challenge 2013. In: Proceedings of the 15th ACM on International Conference on Multimodal Interaction, ACM, pp 509\u2013516","DOI":"10.1145\/2522848.2531739"},{"issue":"4","key":"8796_CR3","first-page":"151","volume":"8","author":"P Ekman","year":"1970","unstructured":"Ekman P, Keltner D (1970) Universal facial expressions of emotion. California Mental Health Research Digest 8(4):151\u2013158","journal-title":"California Mental Health Research Digest"},{"issue":"1-2","key":"8796_CR4","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1007\/s12193-009-0032-6","volume":"3","author":"F Eyben","year":"2010","unstructured":"Eyben F, W\u00f6llmer M, Graves A, Schuller B, Douglas-Cowie E, Cowie R (2010) On-line emotion recognition in a 3-d activation-valence-time continuum using acoustic and linguistic cues. J Multimodal User Interfaces 3(1-2):7\u201319","journal-title":"J Multimodal User Interfaces"},{"key":"8796_CR5","doi-asserted-by":"crossref","unstructured":"Gao Z, Xuan HZ, Zhang H, Wan S, Choo KKR (2019) Adaptive fusion and category-level dictionary learning model for multi-view human action recognition. IEEE Internet of Things Journal","DOI":"10.1109\/JIOT.2019.2911669"},{"issue":"1","key":"8796_CR6","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji S, Xu W, Yang M, Yu K (2013) 3d convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell 35(1):221\u2013231","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"8796_CR7","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s12193-015-0195-2","volume":"10","author":"SE Kahou","year":"2016","unstructured":"Kahou SE, Bouthillier X, Lamblin P, Gulcehre C, Michalski V, Konda K, Jean S, Froumenty P, Dauphin Y, Boulanger-Lewandowski N et al (2016) Emonets: Multimodal deep learning approaches for emotion recognition in video. J Multimodal User Interfaces 10(2):99\u2013111","journal-title":"J Multimodal User Interfaces"},{"key":"8796_CR8","doi-asserted-by":"crossref","unstructured":"Metallinou A, Lee S, Narayanan S (2008) Audio-visual emotion recognition using gaussian mixture models for face and voice. In: Tenth IEEE International Symposium on Multimedia, 2008. ISM 2008, IEEE, pp 250\u2013257","DOI":"10.1109\/ISM.2008.40"},{"key":"8796_CR9","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781"},{"key":"8796_CR10","unstructured":"Ngiam J, Khosla A, Kim M, Nam J, Lee H, Ng AY (2011) Multimodal deep learning. In: Proceedings of the 28th International Conference on Machine Learning (ICML-11), pp 689\u2013696"},{"key":"8796_CR11","unstructured":"P\u00e9rez-Rosas V, Mihalcea R, Morency LP (2013) Utterance-level multimodal sentiment analysis. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), vol 1, pp 973\u2013982"},{"key":"8796_CR12","doi-asserted-by":"crossref","unstructured":"Poria S, Cambria E, Gelbukh A (2015) Deep convolutional neural network textual features and multiple kernel learning for utterance-level multimodal sentiment analysis. In: Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing, pp 2539\u20132544","DOI":"10.18653\/v1\/D15-1303"},{"key":"8796_CR13","doi-asserted-by":"crossref","unstructured":"Poria S, Chaturvedi I, Cambria E, Bisio F (2016) Sentic lda: Improving on lda with semantic similarity for aspect-based sentiment analysis. In: 2016 International Joint Conference on Neural Networks (IJCNN), IEEE, pp 4465\u20134473","DOI":"10.1109\/IJCNN.2016.7727784"},{"issue":"2","key":"8796_CR14","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1109\/T-AFFC.2011.37","volume":"3","author":"M Soleymani","year":"2011","unstructured":"Soleymani M, Pantic M, Pun T (2011) Multimodal emotion recognition in response to videos. IEEE Trans Affect Comput 3(2):211\u2013223","journal-title":"IEEE Trans Affect Comput"},{"issue":"2","key":"8796_CR15","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1109\/34.908962","volume":"23","author":"YI Tian","year":"2001","unstructured":"Tian YI, Kanade T, Cohn JF (2001) Recognizing action units for facial expression analysis. IEEE Trans Pattern Anal Mach Intell 23(2):97\u2013115","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"8796_CR16","doi-asserted-by":"crossref","unstructured":"Wang H, Meghawat A, Morency LP, Xing EP (2017) Select-additive learning: Improving generalization in multimodal sentiment analysis. In: 2017 IEEE International conference on multimedia and expo (ICME), IEEE, pp 949\u2013954","DOI":"10.1109\/ICME.2017.8019301"},{"key":"8796_CR17","doi-asserted-by":"crossref","unstructured":"Williams J, Kleinegesse S, Comanescu R, Radu O (2018) Recognizing emotions in video using multimodal dnn feature fusion. In: Proceedings of Grand Challenge and Workshop on Human Multimodal Language (Challenge-HML), pp 11\u201319","DOI":"10.18653\/v1\/W18-3302"},{"key":"8796_CR18","doi-asserted-by":"crossref","unstructured":"Zadeh A, Chen M, Poria S, Cambria E, Morency LP (2017) Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250","DOI":"10.18653\/v1\/D17-1115"},{"issue":"6","key":"8796_CR19","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MIS.2016.94","volume":"31","author":"A Zadeh","year":"2016","unstructured":"Zadeh A, Zellers R, Pincus E, Morency LP (2016) Multimodal sentiment intensity analysis in videos: Facial gestures and verbal messages. IEEE Intell Syst 31 (6):82\u201388","journal-title":"IEEE Intell Syst"},{"key":"8796_CR20","doi-asserted-by":"crossref","unstructured":"Zhao S, Ding G, Gao Y, Han J (2017) Learning visual emotion distributions via multi-modal features fusion. In: Proceedings of the 2017 ACM on Multimedia Conference, ACM, pp 369\u2013377","DOI":"10.1145\/3123266.3130858"},{"key":"8796_CR21","unstructured":"Zhao S, Gao Y, Ding G, Chua TS (2017) Real-time multimedia social event detection in microblog. IEEE Transactions on Cybernetics (99), pp 1\u201314"},{"key":"8796_CR22","doi-asserted-by":"crossref","unstructured":"Zhao S, Yao H, Gao Y, Ding G, Chua TS (2016) Predicting personalized image emotion perceptions in social networks. IEEE Transactions on Affective Computing","DOI":"10.1145\/2964284.2964289"},{"key":"8796_CR23","doi-asserted-by":"crossref","unstructured":"Zhao S, Zhao X, Ding G, Keutzer K (2018) Emotiongan: Unsupervised domain adaptation for learning discrete probability distributions of image emotions. In: 2018 ACM Multimedia conference on multimedia conference, ACM, pp 1319\u20131327","DOI":"10.1145\/3240508.3240591"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08796-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-08796-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08796-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,28]],"date-time":"2023-09-28T03:17:28Z","timestamp":1695871048000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-08796-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,4]]},"references-count":23,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["8796"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-08796-8","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,3,4]]},"assertion":[{"value":"4 May 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 November 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 February 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 March 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}