{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T22:51:24Z","timestamp":1754261484220,"version":"3.37.3"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"14","license":[{"start":{"date-parts":[[2019,2,11]],"date-time":"2019-02-11T00:00:00Z","timestamp":1549843200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004543","name":"Chinese Scholarship Council","doi-asserted-by":"crossref","award":["201706290115"],"award-info":[{"award-number":["201706290115"]}],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"crossref"}]},{"name":"the Shaanxi Provincial International Science and Technology Collaboration Project","award":["2017KW-ZD-14"],"award-info":[{"award-number":["2017KW-ZD-14"]}]},{"name":"the Shaanxi Provincial International Science and Technology Collaboration Project","award":["2017KW-ZD-14"],"award-info":[{"award-number":["2017KW-ZD-14"]}]},{"name":"VUB Interdisciplinary Research Program through the EMO-App project"},{"name":"VUB Interdisciplinary Research Program through the EMO-App project"},{"name":"VUB Interdisciplinary Research Program through the EMO-App project"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2019,7]]},"DOI":"10.1007\/s11042-019-7313-1","type":"journal-article","created":{"date-parts":[[2019,2,11]],"date-time":"2019-02-11T12:08:20Z","timestamp":1549886900000},"page":"19387-19412","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Continuous affect recognition with weakly supervised learning"],"prefix":"10.1007","volume":"78","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3582-6809","authenticated-orcid":false,"given":"Ercheng","family":"Pei","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongmei","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mitchel","family":"Alioscha-Perez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hichem","family":"Sahli","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,2,11]]},"reference":[{"key":"7313_CR1","doi-asserted-by":"crossref","unstructured":"Baltru\u0161aitis T, Banda N, Robinson P (2013) Dimensional affect recognition using continuous conditional random fields. In: Proceedings of the 10th IEEE international conference and workshops on automatic face and gesture recognition (FG 2013). IEEE, pp 1\u20138","DOI":"10.1109\/FG.2013.6553785"},{"key":"7313_CR2","doi-asserted-by":"crossref","DOI":"10.1093\/oso\/9780198538493.001.0001","volume-title":"Neural networks for pattern recognition","author":"CM Bishop","year":"1995","unstructured":"Bishop CM (1995) Neural networks for pattern recognition. Oxford University Press, London"},{"key":"7313_CR3","doi-asserted-by":"crossref","unstructured":"Brady K, Gwon Y, Khorrami P, Godoy E, Campbell W, Dagli C, Huang TS (2016) Multi-modal audio, video and physiological sensor learning for continuous emotion prediction. In: Proceedings of the 6th international workshop on audio\/visual emotion challenge. ACM, pp 97\u2013104","DOI":"10.1145\/2988257.2988264"},{"key":"7313_CR4","doi-asserted-by":"crossref","unstructured":"Chao L, Tao J, Yang M, Li Y, Wen Z (2014) Multi-scale temporal modeling for dimensional emotion recognition in video. In: Proceedings of the 4th international workshop on audio\/visual emotion challenge. ACM, pp 11\u201318","DOI":"10.1145\/2661806.2661811"},{"key":"7313_CR5","doi-asserted-by":"crossref","unstructured":"Chao L, Tao J, Yang M, Li Y, Wen Z (2015) Long short term memory recurrent neural network based multimodal dimensional emotion recognition. In: Proceedings of the 5th international workshop on audio\/visual emotion challenge. ACM, pp 65\u201372","DOI":"10.1145\/2808196.2811634"},{"key":"7313_CR6","doi-asserted-by":"crossref","unstructured":"Chen S, Jin Q (2015) Multi-modal dimensional emotion recognition using recurrent neural networks. In: Proceedings of the 5th international workshop on audio\/visual emotion challenge. ACM, pp 49\u201356","DOI":"10.1145\/2808196.2811638"},{"key":"7313_CR7","doi-asserted-by":"crossref","unstructured":"Chen S, Jin Q, Zhao J, Wang S (2017) Multimodal multi-task learning for dimensional and continuous emotion recognition. In: Proceedings of the 7th annual workshop on audio\/visual emotion challenge. ACM, pp 19\u201326","DOI":"10.1145\/3133944.3133949"},{"key":"7313_CR8","doi-asserted-by":"crossref","unstructured":"Dhall A, Goecke R, Joshi J, Wagner M, Gedeon T (2013) Emotion recognition in the wild challenge 2013. In: Proceedings of the 15th ACM on International conference on multimodal interaction. ACM, pp 509\u2013516","DOI":"10.1145\/2522848.2531739"},{"key":"7313_CR9","doi-asserted-by":"crossref","unstructured":"Dhall A, Goecke R, Joshi J, Sikka K, Gedeon T (2014) Emotion recognition in the wild challenge 2014: Baseline, data and protocol. In: Proceedings of the 16th international conference on multimodal interaction. ACM, pp 461\u2013466","DOI":"10.1145\/2663204.2666275"},{"key":"7313_CR10","doi-asserted-by":"crossref","unstructured":"Dhall A, Ramana Murthy O, Goecke R, Joshi J, Gedeon T (2015) Video and image based emotion recognition challenges in the wild: Emotiw 2015. In: Proceedings of the 2015 international conference on multimodal interaction. ACM, pp 423\u2013426","DOI":"10.1145\/2818346.2829994"},{"key":"7313_CR11","doi-asserted-by":"crossref","unstructured":"Dhall A, Goecke R, Joshi J, Hoey J, Gedeon T (2016) Emotiw 2016: Video and group-level emotion recognition challenges. In: Proceedings of the 18th ACM international conference on multimodal interaction. ACM, pp 427\u2013432","DOI":"10.1145\/2993148.2997638"},{"key":"7313_CR12","doi-asserted-by":"crossref","unstructured":"Dhall A, Goecke R, Ghosh S, Joshi J, Hoey J, Gedeon T (2017) From individual to group-level emotion recognition: Emotiw 5.0. In: Proceedings of the 19th ACM international conference on multimodal interaction. ACM, pp 524\u2013528","DOI":"10.1145\/3136755.3143004"},{"key":"7313_CR13","volume-title":"Pattern classification","author":"RO Duda","year":"1973","unstructured":"Duda RO, Hart PE, Stork DG (1973) Pattern classification. Wiley, New York"},{"key":"7313_CR14","volume-title":"Unmasking the face: a guide to recognizing emotions from facial clues","author":"P Ekman","year":"2003","unstructured":"Ekman P, Friesen WV (2003) Unmasking the face: a guide to recognizing emotions from facial clues. Ishk, Los Altos"},{"issue":"18","key":"7313_CR15","doi-asserted-by":"publisher","first-page":"7429","DOI":"10.1007\/s11042-014-1986-2","volume":"74","author":"CE Erdem","year":"2015","unstructured":"Erdem CE, Turan C, Aydin Z (2015) Baum-2: a multilingual audio-visual affective face database. Multimed Tools Appl 74(18):7429\u20137459","journal-title":"Multimed Tools Appl"},{"key":"7313_CR16","doi-asserted-by":"crossref","unstructured":"Gers FA, Schmidhuber J, Cummins F (1999) Learning to forget: Continual prediction with lstm. In: Proceedings ICANN 1999, 9th international conference on artificial neural networks. IET, pp 850\u2013855","DOI":"10.1049\/cp:19991218"},{"issue":"6","key":"7313_CR17","doi-asserted-by":"publisher","first-page":"7803","DOI":"10.1007\/s11042-016-3418-y","volume":"76","author":"D Ghimire","year":"2017","unstructured":"Ghimire D, Jeong S, Lee J, Park SH (2017) Facial expression recognition based on local region specific features and support vector machines. Multimed Tools Appl 76(6):7803\u20137821","journal-title":"Multimed Tools Appl"},{"issue":"6","key":"7313_CR18","doi-asserted-by":"publisher","first-page":"7921","DOI":"10.1007\/s11042-016-3428-9","volume":"76","author":"D Ghimire","year":"2017","unstructured":"Ghimire D, Lee J, Li ZN, Jeong S (2017) Recognition of facial expressions based on salient geometric features and support vector machines. Multimed Tools Appl 76(6):7921\u20137946","journal-title":"Multimed Tools Appl"},{"key":"7313_CR19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2","volume-title":"Supervised sequence labelling with recurrent neural networks","author":"A Graves","year":"2012","unstructured":"Graves A (2012) Supervised sequence labelling with recurrent neural networks. Springer, Berlin"},{"issue":"5-6","key":"7313_CR20","doi-asserted-by":"publisher","first-page":"602","DOI":"10.1016\/j.neunet.2005.06.042","volume":"18","author":"A Graves","year":"2005","unstructured":"Graves A, Schmidhuber J (2005) Framewise phoneme classification with bidirectional lstm and other neural network architectures. Neural Netw 18(5-6):602\u2013610","journal-title":"Neural Netw"},{"key":"7313_CR21","doi-asserted-by":"crossref","unstructured":"Graves A, Jaitly N, Mohamed A (2013) Hybrid speech recognition with deep bidirectional lstm. In: 2013 IEEE Workshop on automatic speech recognition and understanding (ASRU). IEEE, pp 273\u2013278","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"7313_CR22","doi-asserted-by":"crossref","unstructured":"Graves A, Mohamed A, Hinton G (2013) Speech recognition with deep recurrent neural networks. In: Proceedings of the 2013 IEEE international conference on acoustics, speech and signal processing (ICASSP 2013). IEEE, pp 6645\u20136649","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"7313_CR23","doi-asserted-by":"crossref","unstructured":"Han J, Zhang Z, Ringeval F, Schuller B (2017) Reconstruction-error-based learning for continuous emotion recognition in speech. In: Proceedings of the 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP 2017). IEEE, pp 2367\u20132371","DOI":"10.1109\/ICASSP.2017.7952580"},{"key":"7313_CR24","doi-asserted-by":"crossref","unstructured":"He L, Jiang D, Yang L, Pei E, Wu P, Sahli H (2015) Multimodal affective dimension prediction using deep bidirectional long short-term memory recurrent neural networks. In: Proceedings of the 5th international workshop on audio\/visual emotion challenge. ACM, pp 73\u201380","DOI":"10.1145\/2808196.2811641"},{"key":"7313_CR25","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1016\/j.patrec.2015.10.008","volume":"69","author":"J Hern\u00e1ndez-Gonz\u00e1lez","year":"2016","unstructured":"Hern\u00e1ndez-Gonz\u00e1lez J, Inza I, Lozano JA (2016) Weak supervision and other non-standard classification problems: a taxonomy. Pattern Recogn Lett 69:49\u201355","journal-title":"Pattern Recogn Lett"},{"issue":"8","key":"7313_CR26","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"7313_CR27","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, van der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the 2017 IEEE international conference on computer vision and pattern recognition (CVPR). IEEE, pp 2261\u20132269","DOI":"10.1109\/CVPR.2017.243"},{"key":"7313_CR28","doi-asserted-by":"crossref","unstructured":"Kaya H, \u00c7illi F, Salah AA (2014) Ensemble cca for continuous emotion prediction. In: Proceedings of the 4th international workshop on audio\/visual emotion challenge. ACM, pp 19\u201326","DOI":"10.1145\/2661806.2661814"},{"key":"7313_CR29","doi-asserted-by":"crossref","unstructured":"Le D, Aldeneh Z, Provost EM (2017) Discretized continuous speech emotion recognition with multi-task deep recurrent neural network. In: Proceedings of the 17th annual conference of the international speech communication association (INTERSPEECH 2017)","DOI":"10.21437\/Interspeech.2017-94"},{"issue":"1","key":"7313_CR30","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/BF01238028","volume":"1","author":"C Lisetti","year":"1998","unstructured":"Lisetti C (1998) Affective computing. Pattern Anal Applic 1(1):71\u201373","journal-title":"Pattern Anal Applic"},{"issue":"2","key":"7313_CR31","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1007\/s11042-009-0344-2","volume":"49","author":"M Mansoorizadeh","year":"2010","unstructured":"Mansoorizadeh M, Charkari NM (2010) Multimodal information fusion application to human emotion recognition from face and speech. Multimed Tools Appl 49(2):277\u2013297","journal-title":"Multimed Tools Appl"},{"key":"7313_CR32","unstructured":"Mathieu B, Essid S, Fillon T, Prado J, Richard G (2010) Yaafe, an easy to use and efficient audio feature extraction software. In: Proceedings of the 11th international society for music information retrieval conference (ISMIR 2010), pp 441\u2013446"},{"key":"7313_CR33","doi-asserted-by":"crossref","unstructured":"Nguyen MH, Torresani L, De La Torre F, Rother C (2009) Weakly supervised discriminative localization and classification: a joint learning process. In: Proceedings of the 12th international conference on computer vision (ICCV 2009). IEEE, pp 1925\u20131932","DOI":"10.1109\/ICCV.2009.5459426"},{"key":"7313_CR34","unstructured":"Nicolaou MA, Gunes H, Pantic M (2010) Automatic segmentation of spontaneous data using dimensional labels from multiple coders. In: Proceedings of LREC int. workshop on multimodal corpora: advances in capturing, coding and analyzing multimodality. Citeseer, pp 43\u201348"},{"issue":"2","key":"7313_CR35","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1109\/T-AFFC.2011.9","volume":"2","author":"MA Nicolaou","year":"2011","unstructured":"Nicolaou MA, Gunes H, Pantic M (2011) Continuous prediction of spontaneous affect from multiple cues and modalities in valence-arousal space. IEEE Trans Affect Comput 2(2):92\u2013105","journal-title":"IEEE Trans Affect Comput"},{"key":"7313_CR36","doi-asserted-by":"crossref","unstructured":"Nicolle J, Rapp V, Bailly K, Prevost L, Chetouani M (2012) Robust continuous prediction of human emotions using multiscale dynamic cues. In: Proceedings of the 14th ACM international conference on multimodal interaction. ACM, pp 501\u2013508","DOI":"10.1145\/2388676.2388783"},{"key":"7313_CR37","doi-asserted-by":"crossref","unstructured":"Ozkan D, Scherer S, Morency LP (2012) Step-wise emotion recognition using concatenated-hmm. In: Proceedings of the 14th ACM international conference on multimodal interaction. ACM, pp 477\u2013484","DOI":"10.1145\/2388676.2388780"},{"key":"7313_CR38","doi-asserted-by":"crossref","unstructured":"Pei E, Yang L, Jiang D, Sahli H (2015) Multimodal dimensional affect recognition using deep bidirectional long short-term memory recurrent neural networks. In: Proceedings of the 2015 international conference on affective computing and intelligent interaction (ACII 2015). IEEE, pp 208\u2013214","DOI":"10.1109\/ACII.2015.7344573"},{"key":"7313_CR39","doi-asserted-by":"crossref","unstructured":"Povolny F, Matejka P, Hradis M, Popkov\u00e1 A, Otrusina L, Smrz P, Wood I, Robin C, Lamel L (2016) Multimodal emotion recognition for avec 2016 challenge. In: Proceedings of the 6th international workshop on audio\/visual emotion challenge. ACM, pp 75\u201382","DOI":"10.1145\/2988257.2988268"},{"key":"7313_CR40","unstructured":"Prenter PM, et al. (2008) Splines and variational methods, Courier Corporation, Chelmsford"},{"issue":"11","key":"7313_CR41","doi-asserted-by":"publisher","first-page":"1119","DOI":"10.1016\/0167-8655(94)90127-9","volume":"15","author":"P Pudil","year":"1994","unstructured":"Pudil P, Novovi\u010dov\u00e1 J, Kittler J (1994) Floating search methods in feature selection. Pattern Recogn Lett 15(11):1119\u20131125","journal-title":"Pattern Recogn Lett"},{"key":"7313_CR42","doi-asserted-by":"crossref","unstructured":"Ringeval F, Sonderegger A, Sauer J, Lalanne D (2013) Introducing the recola multimodal corpus of remote collaborative and affective interactions. In: Proceedings of the 10th IEEE international conference and workshops on automatic face and gesture recognition (FG 2013). IEEE, pp 1\u20138","DOI":"10.1109\/FG.2013.6553805"},{"key":"7313_CR43","doi-asserted-by":"crossref","unstructured":"Ringeval F, Schuller B, Valstar M, Cowie R, Pantic M (2015) Avec 2015: The 5th international audio\/visual emotion challenge and workshop. In: Proceedings of the 23rd ACM international conference on multimedia. ACM, pp 1335\u20131336","DOI":"10.1145\/2733373.2806408"},{"key":"7313_CR44","doi-asserted-by":"crossref","unstructured":"Ringeval F, Schuller B, Valstar M, Jaiswal S, Marchi E, Lalanne D, Cowie R, Pantic M (2015) Av+ ec 2015: The first affect recognition challenge bridging across audio, video, and physiological data. In: Proceedings of the 5th international workshop on audio\/visual emotion Challenge. ACM, pp 3\u20138","DOI":"10.1145\/2808196.2811642"},{"key":"7313_CR45","doi-asserted-by":"crossref","unstructured":"Ringeval F, Schuller B, Valstar M, Gratch J, Cowie R, Scherer S, Mozgai S, Cummins N, Schmitt M, Pantic M (2017) Avec 2017: Real-life depression, and affect recognition workshop and challenge. In: Proceedings of the 7th annual workshop on audio\/visual emotion challenge. ACM, pp 3\u20139","DOI":"10.1145\/3133944.3133953"},{"issue":"6","key":"7313_CR46","doi-asserted-by":"publisher","first-page":"1161","DOI":"10.1037\/h0077714","volume":"39","author":"JA Russell","year":"1980","unstructured":"Russell JA (1980) A circumplex model of affect. J Pers Soc Psychol 39(6):1161","journal-title":"J Pers Soc Psychol"},{"key":"7313_CR47","doi-asserted-by":"crossref","unstructured":"Schuller B, Valster M, Eyben F, Cowie R, Pantic M (2012) Avec 2012: the continuous audio\/visual emotion challenge. In: Proceedings of the 14th ACM international conference on multimodal interaction. ACM, pp 449\u2013456","DOI":"10.1145\/2388676.2388776"},{"key":"7313_CR48","doi-asserted-by":"crossref","unstructured":"Schuller B, Steidl S, Batliner A, Vinciarelli A, Scherer K, Ringeval F, Chetouani M, Weninger F, Eyben F, Marchi E, et al. (2013) The interspeech 2013 computational paralinguistics challenge: social signals, conflict, emotion, autism. In: Proceedings of the 14th annual conference of the international speech communication association (INTERSPEECH 2013)","DOI":"10.21437\/Interspeech.2013-56"},{"issue":"2","key":"7313_CR49","doi-asserted-by":"publisher","first-page":"935","DOI":"10.1007\/s11042-014-2333-3","volume":"75","author":"MH Siddiqi","year":"2016","unstructured":"Siddiqi MH, Ali R, Idris M, Khan AM, Kim ES, Whang MC, Lee S (2016) Human facial expression recognition using curvelet feature extraction and normalized mutual information feature selection. Multimed Tools Appl 75(2):935\u2013959","journal-title":"Multimed Tools Appl"},{"key":"7313_CR50","doi-asserted-by":"crossref","unstructured":"Sidorov M, Minker W (2014) Emotion recognition and depression diagnosis by acoustic and visual features: a multimodal approach. In: Proceedings of the 4th international workshop on audio\/visual emotion challenge. ACM, pp 81\u201386","DOI":"10.1145\/2661806.2661816"},{"key":"7313_CR51","doi-asserted-by":"crossref","unstructured":"Somandepalli K, Gupta R, Nasir M, Booth BM, Lee S, Narayanan SS (2016) Online affect tracking with multimodal kalman filters. In: Proceedings of the 6th international workshop on audio\/visual emotion challenge. ACM, pp 59\u201366","DOI":"10.1145\/2988257.2988259"},{"key":"7313_CR52","doi-asserted-by":"crossref","unstructured":"Sun B, Cao S, Li L, He J, Yu L (2016) Exploring multimodal visual features for continuous affect recognition. In: Proceedings of the 6th international workshop on audio\/visual emotion challenge. ACM, pp 83\u201388","DOI":"10.1145\/2988257.2988270"},{"key":"7313_CR53","doi-asserted-by":"crossref","unstructured":"Trigeorgis G, Ringeval F, Brueckner R, Marchi E, Nicolaou MA, Schuller B, Zafeiriou S (2016) Adieu features? end-to-end speech emotion recognition using a deep convolutional recurrent network. In: Proceedings of the 2016 IEEE international conference on acoustics, speech and signal processing (ICASSP 2016). IEEE, pp 5200\u20135204","DOI":"10.1109\/ICASSP.2016.7472669"},{"key":"7313_CR54","doi-asserted-by":"crossref","unstructured":"Valstar MF, Jiang B, Mehu M, Pantic M, Scherer K (2011) The first facial expression recognition and analysis challenge. In: Proceedings of the 2011 IEEE international conference on automatic face & gesture recognition and workshops (FG 2011). IEEE, pp 921\u2013926","DOI":"10.1109\/FG.2011.5771374"},{"key":"7313_CR55","doi-asserted-by":"crossref","unstructured":"Valstar M, Schuller B, Smith K, Eyben F, Jiang B, Bilakhia S, Schnieder S, Cowie R, Pantic M (2013) Avec 2013: the continuous audio\/visual emotion and depression recognition challenge. In: Proceedings of the 3rd ACM international workshop on Audio\/visual emotion challenge. ACM, pp 3\u201310","DOI":"10.1145\/2512530.2512533"},{"key":"7313_CR56","doi-asserted-by":"crossref","unstructured":"Valstar M, Schuller B, Smith K, Almaev T, Eyben F, Krajewski J, Cowie R, Pantic M (2014) Avec 2014: 3d dimensional affect and depression recognition challenge. In: Proceedings of the 4th international workshop on audio\/visual emotion challenge. ACM, pp 3\u201310","DOI":"10.1145\/2661806.2661807"},{"key":"7313_CR57","doi-asserted-by":"crossref","unstructured":"Valstar MF, Almaev T, Girard JM, McKeown G, Mehu M, Yin L, Pantic M, Cohn JF (2015) Fera 2015-second facial expression recognition and analysis challenge. In: 11Th IEEE international conference and workshops on automatic face and gesture recognition (FG 2015), vol 6. IEEE, pp 1\u20138","DOI":"10.1109\/FG.2015.7284874"},{"key":"7313_CR58","doi-asserted-by":"crossref","unstructured":"Valstar M, Gratch J, Schuller B, Ringeval F, Lalanne D, Torres Torres M, Scherer S, Stratou G, Cowie R, Pantic M (2016) Avec 2016: Depression, mood, and emotion recognition workshop and challenge. In: Proceedings of the 6th international workshop on audio\/visual emotion challenge. ACM, pp 3\u201310","DOI":"10.1145\/2988257.2988258"},{"key":"7313_CR59","doi-asserted-by":"crossref","unstructured":"Valstar MF, S\u00e1nchez-Lozano E, Cohn JF, Jeni LA, Girard JM, Zhang Z, Yin L, Pantic M (2017) Fera 2017-addressing head pose in the third facial expression recognition and analysis challenge. In: 12th IEEE international conference on automatic face & gesture recognition (FG 2017). IEEE, pp 839\u2013847","DOI":"10.1109\/FG.2017.107"},{"key":"7313_CR60","doi-asserted-by":"crossref","unstructured":"Van Der Maaten L (2012) Audio-visual emotion challenge 2012: a simple approach. In: Proceedings of the 14th ACM international conference on multimodal interaction. ACM, pp 473\u2013476","DOI":"10.1145\/2388676.2388779"},{"issue":"2","key":"7313_CR61","doi-asserted-by":"publisher","first-page":"2159","DOI":"10.1007\/s11042-015-3119-y","volume":"76","author":"GK Verma","year":"2017","unstructured":"Verma GK, Tiwary US (2017) Affect representation and recognition in 3d continuous valence\u2013arousal\u2013dominance space. Multimed Tools Appl 76(2):2159\u20132183","journal-title":"Multimed Tools Appl"},{"key":"7313_CR62","unstructured":"Ververidis D, Kotropoulos C (2006) Fast sequential floating forward selection applied to emotional speech features estimated on des and susas data collections. In: Proceedings of the 14th european signal processing conference. IEEE, pp 1\u20135"},{"issue":"22","key":"7313_CR63","doi-asserted-by":"publisher","first-page":"9983","DOI":"10.1007\/s11042-014-2319-1","volume":"74","author":"F Wang","year":"2015","unstructured":"Wang F, Sahli H, Gao J, Jiang D, Verhelst W (2015) Relevance units machine based dimensional and continuous speech emotion prediction. Multimed Tools Appl 74(22):9983\u201310000","journal-title":"Multimed Tools Appl"},{"issue":"4","key":"7313_CR64","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1016\/j.csl.2014.01.001","volume":"28","author":"F Weninger","year":"2014","unstructured":"Weninger F, Geiger J, W\u00f6llmer M, Schuller B, Rigoll G (2014) Feature enhancement by deep lstm networks for asr in reverberant multisource environments. Comput Speech Lang 28(4):888\u2013902","journal-title":"Comput Speech Lang"},{"issue":"3","key":"7313_CR65","first-page":"547","volume":"16","author":"F Weninger","year":"2015","unstructured":"Weninger F, Bergmann J, Schuller BW (2015) Introducing currennt: the munich open-source cuda recurrent neural network toolkit. J Mach Learn Res 16(3):547\u2013551","journal-title":"J Mach Learn Res"},{"key":"7313_CR66","unstructured":"Weninger F, Ringeval F, Marchi E, Schuller B (2016) Discriminatively trained recurrent neural networks for continuous dimensional emotion recognition from audio. In: Proceedings of the twenty-fifth international joint conference on artificial intelligence. AAAI Press, pp 2196\u20132202"},{"issue":"10","key":"7313_CR67","doi-asserted-by":"publisher","first-page":"1550","DOI":"10.1109\/5.58337","volume":"78","author":"PJ Werbos","year":"1990","unstructured":"Werbos PJ (1990) Backpropagation through time: what it does and how to do it. Proc IEEE 78(10):1550\u20131560","journal-title":"Proc IEEE"},{"key":"7313_CR68","first-page":"433","volume":"1","author":"RJ Williams","year":"1995","unstructured":"Williams RJ, Zipser D (1995) Gradient-based learning algorithms for recurrent networks and their computational complexity. Backpropagation: Theory, architectures, and applications 1:433\u2013486","journal-title":"Backpropagation: Theory, architectures, and applications"},{"key":"7313_CR69","doi-asserted-by":"crossref","unstructured":"W\u00f6llmer M, Eyben F, Reiter S, Schuller B, Cox C, Douglas-Cowie E, Cowie R (2008) Abandoning emotion classes-towards continuous emotion recognition with modelling of long-range dependencies. In: Proceedings of the ninth annual conference of the international speech communication association (INTERSPEECH 2008), pp 597\u2013600","DOI":"10.21437\/Interspeech.2008-192"},{"issue":"5","key":"7313_CR70","doi-asserted-by":"publisher","first-page":"867","DOI":"10.1109\/JSTSP.2010.2057200","volume":"4","author":"M Wollmer","year":"2010","unstructured":"Wollmer M, Schuller B, Eyben F, Rigoll G (2010) Combining long short-term memory and dynamic bayesian networks for incremental emotion-sensitive artificial listening. IEEE J Sel Top Sign Proces 4(5):867\u2013881","journal-title":"IEEE J Sel Top Sign Proces"},{"issue":"2","key":"7313_CR71","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/j.imavis.2012.03.001","volume":"31","author":"M W\u00f6llmer","year":"2013","unstructured":"W\u00f6llmer M, Kaiser M, Eyben F, Schuller B, Rigoll G (2013) Lstm-modeling of continuous emotions in an audiovisual affect recognition framework. Image Vis Comput 31(2):153\u2013163","journal-title":"Image Vis Comput"},{"key":"7313_CR72","doi-asserted-by":"crossref","unstructured":"Zhang Z, Ringeval F, Han J, Deng J, Marchi E, Schuller B (2016) Facing realism in spontaneous emotion recognition from speech: Feature enhancement by autoencoder with lstm neural networks. In: Proceedings of the 17th annual conference of the international speech communication association (INTERSPEECH 2016), pp 3593\u20133597","DOI":"10.21437\/Interspeech.2016-998"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-019-7313-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-019-7313-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-019-7313-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,14]],"date-time":"2024-07-14T18:14:09Z","timestamp":1720980849000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-019-7313-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,11]]},"references-count":72,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2019,7]]}},"alternative-id":["7313"],"URL":"https:\/\/doi.org\/10.1007\/s11042-019-7313-1","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2019,2,11]]},"assertion":[{"value":"30 June 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 December 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 January 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 February 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}