{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,17]],"date-time":"2026-01-17T11:07:59Z","timestamp":1768648079305,"version":"3.49.0"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2020,10,31]],"date-time":"2020-10-31T00:00:00Z","timestamp":1604102400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,10,31]],"date-time":"2020-10-31T00:00:00Z","timestamp":1604102400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s11042-020-10030-4","type":"journal-article","created":{"date-parts":[[2020,10,31]],"date-time":"2020-10-31T02:02:46Z","timestamp":1604109766000},"page":"8213-8240","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":46,"title":["Video multimodal emotion recognition based on Bi-GRU and attention fusion"],"prefix":"10.1007","volume":"80","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2555-343X","authenticated-orcid":false,"given":"Ruo-Hong","family":"Huan","sequence":"first","affiliation":[]},{"given":"Jia","family":"Shu","sequence":"additional","affiliation":[]},{"given":"Sheng-Lin","family":"Bao","sequence":"additional","affiliation":[]},{"given":"Rong-Hua","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Peng","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Kai-Kai","family":"Chi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,31]]},"reference":[{"key":"10030_CR1","unstructured":"Bairaju SPR, Ari S, Garimella RM (2019) Emotion detection using visual information with deep auto-encoders[C]\/\/2019 IEEE 5th international conference for convergence in technology (I2CT). IEEE:1\u20135"},{"issue":"12","key":"10030_CR2","first-page":"107","volume":"5","author":"YH Byeon","year":"2014","unstructured":"Byeon YH, Kwak KC (2014) Facial expression recognition using 3D convolutional neural network[J]. Int J Adv Comput Sci Appl 5(12):107\u2013112","journal-title":"Int J Adv Comput Sci Appl"},{"key":"10030_CR3","first-page":"960","volume-title":"COVAREP\u2014A collaborative voice analysis repository for speech technologies[C]\/\/ IEEE international conference on acoustics, speech and signal processing","author":"G Degottex","year":"2014","unstructured":"Degottex G, Kane J, Drugman T et al (2014) COVAREP\u2014A collaborative voice analysis repository for speech technologies[C]\/\/ IEEE international conference on acoustics, speech and signal processing. IEEE, Florence, pp 960\u2013964"},{"key":"10030_CR4","doi-asserted-by":"crossref","unstructured":"Drugman T, Alwan A (2011) Joint robust voicing detection and pitch estimation based on residual harmonics[C]\/\/ Twelfth Annual Conference of the International Speech Communication Association : 1973\u20131976.","DOI":"10.21437\/Interspeech.2011-519"},{"issue":"3","key":"10030_CR5","doi-asserted-by":"publisher","first-page":"994","DOI":"10.1109\/TASL.2011.2170835","volume":"20","author":"T Drugman","year":"2012","unstructured":"Drugman T, Thomas M, Gudnason J, Naylor P, Dutoit T (2012) Detection of glottal closure instants from speech signals: a quantitative review[J]. IEEE Trans Audio Speech Lang Process 20(3):994\u20131006","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"10030_CR6","doi-asserted-by":"crossref","unstructured":"Ebrahimi Kahou S, Michalski V, Konda K, et al (2015) Recurrent neural networks for emotion recognition in video[C]\/\/ Proceedings of the 2015 ACM on international conference on multimodal interaction, ACM, Seattle, Washington, USA, Nov 09-13: 467\u2013474.","DOI":"10.1145\/2818346.2830596"},{"issue":"3\u20134","key":"10030_CR7","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1080\/02699939208411068","volume":"6","author":"P Ekman","year":"1992","unstructured":"Ekman P (1992) An argument for basic emotions[J]. Cognit Emot 6(3\u20134):169\u2013200","journal-title":"Cognit Emot"},{"issue":"6","key":"10030_CR8","doi-asserted-by":"publisher","first-page":"1125","DOI":"10.1037\/h0077722","volume":"39","author":"P Ekman","year":"1980","unstructured":"Ekman P, Freisen WV, Ancoli S (1980) Facial signs of emotional experience[J]. J Pers Soc Psychol 39(6):1125\u20131134","journal-title":"J Pers Soc Psychol"},{"key":"10030_CR9","unstructured":"Fujisaki H, Ljungqvist M (1986) Proposal and evaluation of models for the glottal source waveform[C]\/\/ ICASSP'86. IEEE International Conference on Acoustics, Speech, and Signal Processing. IEEE, Tokyo, Japan, 11: 1605-1608."},{"key":"10030_CR10","doi-asserted-by":"crossref","unstructured":"Ghosh S, Laksana E, Morency L P, et al. (2016) Representation Learning for Speech Emotion Recognition[C]\/\/ Interspeech : 3603\u20133607.","DOI":"10.21437\/Interspeech.2016-692"},{"key":"10030_CR11","unstructured":"Hatzivassiloglou V, McKeown K R (1997) Predicting the semantic orientation of adjectives[C]\/\/ proceedings of the 35th annual meeting of the association for computational linguistics and eighth conference of the european chapter of the association for computational linguistics. Assoc Comput Linguist Madrid, Spain, July 07 : 174\u2013181."},{"key":"10030_CR12","doi-asserted-by":"crossref","unstructured":"Iyyer M, Manjunatha V, Boyd-Graber J, et al (2015) Deep unordered composition rivals syntactic methods for text classification[C]\/\/ Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing, 1: 1681-1691.","DOI":"10.3115\/v1\/P15-1162"},{"key":"10030_CR13","doi-asserted-by":"crossref","unstructured":"Kalchbrenner N, Grefenstette E, Blunsom P (2014) A convolutional neural network for modelling sentences[J]. arXiv preprint arXiv:1404.2188.","DOI":"10.3115\/v1\/P14-1062"},{"issue":"6","key":"10030_CR14","doi-asserted-by":"publisher","first-page":"1170","DOI":"10.1109\/TASL.2013.2245653","volume":"21","author":"J Kane","year":"2013","unstructured":"Kane J, Gobl C (2013) Wavelet maxima dispersion for breathy to tense voice discrimination[J]. IEEE Trans Audio Speech Lang Process 21(6):1170\u20131179","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"10030_CR15","unstructured":"Kingma D P, Ba J (2014) Adam: a method for stochastic optimization[J]. arXiv preprint arXiv:1412.6980."},{"key":"10030_CR16","doi-asserted-by":"crossref","unstructured":"Kumawat S, Verma M, Raman S (2019) LBVCNN: local binary volume convolutional neural network for facial expression recognition from image sequences[C]\/\/proceedings of the IEEE conference on computer vision and pattern recognition workshops.","DOI":"10.1109\/CVPRW.2019.00030"},{"key":"10030_CR17","doi-asserted-by":"crossref","unstructured":"Lee J, Tashev I (2015) High-level feature representation using recurrent neural network for speech emotion recognition[C]\/\/ Sixteenth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2015-336"},{"issue":"6","key":"10030_CR18","doi-asserted-by":"publisher","first-page":"616","DOI":"10.1002\/tee.21905","volume":"8","author":"J Li","year":"2013","unstructured":"Li J, Ren F (2013) A hybrid approach for word emotion recognition[J]. IEEJ Trans Electr Electron Eng 8(6):616\u2013626","journal-title":"IEEJ Trans Electr Electron Eng"},{"key":"10030_CR19","first-page":"1","volume-title":"Speech emotion recognition using convolutional and recurrent neural networks[C]\/\/ Asia-Pacific signal and information processing association annual summit and conference (APSIPA)","author":"W Lim","year":"2016","unstructured":"Lim W, Jang D, Lee T (2016) Speech emotion recognition using convolutional and recurrent neural networks[C]\/\/ Asia-Pacific signal and information processing association annual summit and conference (APSIPA). IEEE, Jeju, pp 1\u20134"},{"key":"10030_CR20","doi-asserted-by":"crossref","unstructured":"Liu Z, Shen Y, Lakshminarasimhan V B, et al (2018) Efficient low-rank multimodal fusion with modality-specific factors[C]\/\/ proceedings of the 56th annual meeting of the Association for Computational Linguistics (volume 1: long papers).","DOI":"10.18653\/v1\/P18-1209"},{"issue":"3\u20134","key":"10030_CR21","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1007\/s00779-019-01232-1","volume":"23","author":"L Ma","year":"2019","unstructured":"Ma L, Ju F, Wan J, Shen X (2019) Emotional computing based on cross-modal fusion and edge network data incentive[J]. Pers Ubiquit Comput 23(3\u20134):363\u2013372","journal-title":"Pers Ubiquit Comput"},{"key":"10030_CR22","unstructured":"Moilanen K, Pulman S (2007) Sentiment composition[C]\/\/ Proceedings of RANLP , 7: 378\u2013382."},{"key":"10030_CR23","unstructured":"Morency LP, Mihalcea R, Doshi P (2011) Towards multimodal sentiment analysis: harvesting opinions from the web[C]\/\/ proceedings of the 13th international conference on multimodal interfaces, ICMI 2011, Alicante, Spain, Nov 14-18, 2011. ACM:169\u2013176"},{"key":"10030_CR24","doi-asserted-by":"crossref","unstructured":"Nojavanasghari B, Gopinath D, Koushik J, et al (2016) Deep multimodal fusion for persuasiveness prediction[C]\/\/ international conference on multimodal interfaces (ICMI). ACM.","DOI":"10.1145\/2993148.2993176"},{"key":"10030_CR25","unstructured":"Orjesek R, Jarina R, Chmulik M, et al (2019) DNN based music emotion recognition from raw audio signal[C]\/\/2019 29th international conference RADIOELEKTRONIKA (RADIOELEKTRONIKA). IEEE: 1\u20134."},{"key":"10030_CR26","first-page":"50","volume":"12-16","author":"S Park","year":"2014","unstructured":"Park S, Shim HS, Chatterjee M et al (2014) Computational analysis of persuasiveness in social multimedia: a novel dataset and multimodal prediction approach[C]\/\/ proceedings of the 16th international conference on multimodal interaction. ACM, Istanbul, Turkey, Nov 12-16:50\u201357","journal-title":"ACM, Istanbul, Turkey, Nov"},{"key":"10030_CR27","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning C (2014) Glove: Global vectors for word representation[C]\/\/ Proceedings of the 2014 conference on empirical methods in natural language processing : 1532\u20131543.","DOI":"10.3115\/v1\/D14-1162"},{"key":"10030_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/1-4020-4102-0_1","volume-title":"Contextual valence shifters[M]\/\/ computing attitude and affect in text: theory and applications","author":"L Polanyi","year":"2006","unstructured":"Polanyi L, Zaenen A (2006) Contextual valence shifters[M]\/\/ computing attitude and affect in text: theory and applications. Springer, Dordrecht, pp 1\u201310"},{"key":"10030_CR29","doi-asserted-by":"crossref","unstructured":"Poria S, Cambria E, Gelbukh A (2015) Deep convolutional neural network textual features and multiple kernel learning for utterance-level multimodal sentiment analysis[C]\/\/ Proceedings of the 2015 conference on empirical methods in natural language processing : 2539\u20132544.","DOI":"10.18653\/v1\/D15-1303"},{"key":"10030_CR30","unstructured":"Poria S, Cambria E, Howard N, et al (2015) Fusing audio, visual and textual clues for sentiment analysis from multimodal content[J]. Neurocomputing: S0925231215011297."},{"key":"10030_CR31","doi-asserted-by":"crossref","unstructured":"Rajagopalan S S, Morency L P (2016) Tadas Baltrus\u0306aitis, et al. Extending Long Short-Term Memory for Multi-View Structured Learning[M]\/\/ Computer Vision \u2013 ECCV 2016. Springer International Publishing.","DOI":"10.1007\/978-3-319-46478-7_21"},{"key":"10030_CR32","unstructured":"Seyeditabari A, Tabari N, Gholizadeh S, et al (2019) Emotion Detection in Text: Focusing on Latent Representation[J]. arXiv preprint arXiv:1907.09369."},{"issue":"20","key":"10030_CR33","doi-asserted-by":"publisher","first-page":"29607","DOI":"10.1007\/s11042-019-07813-9","volume":"78","author":"K Shrivastava","year":"2019","unstructured":"Shrivastava K, Kumar S, Jain DK et al (2019) An effective approach for emotion detection in multimedia text data using sequence based convolutional neural network[J]. Multimed Tools Appl 78(20):29607\u201329639","journal-title":"Multimed Tools Appl"},{"key":"10030_CR34","unstructured":"Socher R, Perelygin A, Wu J, et al (2013) Recursive deep models for semantic compositionality over a sentiment treebank[C]\/\/ proceedings of the 2013 conference on empirical methods in natural language processing, Seattle, WA, USA, Oct 18-21: 1631-1642."},{"key":"10030_CR35","unstructured":"Socher, R, et al (2013) Recursive deep models for semantic compositionality over a sentiment treebank[C]\/\/ Proceedings of the 2013 conference on empirical methods in natural language processing."},{"issue":"2","key":"10030_CR36","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1162\/COLI_a_00049","volume":"37","author":"M Taboada","year":"2011","unstructured":"Taboada M, Brooke J, Tofiloski M, Voll K, Stede M (2011) Lexicon-based methods for sentiment analysis[J]. Comput Linguist 37(2):267\u2013307","journal-title":"Comput Linguist"},{"key":"10030_CR37","unstructured":"Takamura H, Inui T, Okumura M (2006) Latent variable models for semantic orientations of phrases[C]\/\/ 11th conference of the European chapter of the association for. Comput Linguist:201\u2013208"},{"key":"10030_CR38","first-page":"5200","volume-title":"Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network[C]\/\/ IEEE international conference on acoustics, speech and signal processing (ICASSP)","author":"G Trigeorgis","year":"2016","unstructured":"Trigeorgis G, Ringeval F, Brueckner R et al (2016) Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network[C]\/\/ IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, Shanghai, pp 5200\u20135204"},{"key":"10030_CR39","unstructured":"Wang H, Meghawat A, Morency L P, et al (2017) Select-additive learning: improving generalization in multimodal sentiment analysis[C]\/\/2017 IEEE international conference on multimedia and expo (ICME). IEEE : 949\u2013954."},{"key":"10030_CR40","doi-asserted-by":"crossref","unstructured":"Wu X, et al (2019) Speech Emotion Recognition Using Capsule Networks[C]\/\/ ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE.","DOI":"10.1109\/ICASSP.2019.8683163"},{"key":"10030_CR41","unstructured":"Yang B, Cardie C (2012) Extracting opinion expressions with semi-Markov conditional random fields[C]\/\/ proceedings of the 2012 joint conference on empirical methods in natural language processing and computational natural language learning. Assoc Comput Linguist, Jeju Island, Korea, July 12-14 : 1335\u20131345."},{"key":"10030_CR42","doi-asserted-by":"crossref","unstructured":"Zadeh A, Chen M, Poria S, et al (2017) Tensor fusion network for multimodal sentiment analysis[J]. arXiv preprint arXiv:1707.07250.","DOI":"10.18653\/v1\/D17-1115"},{"key":"10030_CR43","doi-asserted-by":"crossref","unstructured":"Zadeh A, Liang P P, Mazumder N, et al (2018) Memory fusion network for multi-view sequential learning[C]\/\/thirty-second AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"10030_CR44","doi-asserted-by":"crossref","unstructured":"Zadeh A, Liang P, Poria S, et al (2018) Multi-attention recurrent network for human communication comprehension[C]\/\/thirty-second AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v32i1.12024"},{"key":"10030_CR45","unstructured":"Zadeh A, Zellers R, Pincus E, et al (2016) MOSI: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos[J]. arXiv preprint arXiv:1606.06259."},{"key":"10030_CR46","unstructured":"Zhao J, Chen S, Wang S, et al (2018) Emotion recognition using multimodal features[C]\/\/2018 first Asian conference on affective computing and intelligent interaction (ACII Asia). IEEE: 1\u20136."}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10030-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-020-10030-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10030-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,25]],"date-time":"2022-11-25T23:16:52Z","timestamp":1669418212000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-020-10030-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,31]]},"references-count":46,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["10030"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-10030-4","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,10,31]]},"assertion":[{"value":"22 March 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 August 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 October 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Custom code is available without restriction.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code availability"}}]}}