{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T17:14:02Z","timestamp":1740158042617,"version":"3.37.3"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2021,4,5]],"date-time":"2021-04-05T00:00:00Z","timestamp":1617580800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,4,5]],"date-time":"2021-04-05T00:00:00Z","timestamp":1617580800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Ambient Intell Human Comput"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1007\/s12652-021-03200-1","type":"journal-article","created":{"date-parts":[[2021,4,5]],"date-time":"2021-04-05T18:02:44Z","timestamp":1617645764000},"page":"4787-4797","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Training universal background models with restricted data for speech emotion recognition"],"prefix":"10.1007","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5781-8730","authenticated-orcid":false,"given":"Imen","family":"Trabelsi","sequence":"first","affiliation":[]},{"given":"Filipo Studzinski","family":"Perotto","sequence":"additional","affiliation":[]},{"given":"Usman","family":"Malik","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,4,5]]},"reference":[{"key":"3200_CR1","doi-asserted-by":"crossref","unstructured":"Alhasan K, Aliyu S, Chen L, Chen F (2019) Ica-based eeg feature analysis and classification of learning styles. 2019 IEEE international conference on dependable. autonomic and secure computing, international conference on pervasive intelligence and computing, international conference on cloud and big data computing, international conference on cyber science and technology congress (DASC\/PiCom\/CBDCom\/CyberSciTech), IEEE, pp 271\u2013276","DOI":"10.1109\/DASC\/PiCom\/CBDCom\/CyberSciTech.2019.00057"},{"issue":"1","key":"3200_CR2","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1016\/j.ipm.2015.03.004","volume":"52","author":"A Aljanaki","year":"2016","unstructured":"Aljanaki A, Wiering F, Veltkamp RC (2016) Studying emotion induced by music through a crowdsourcing game. Inf Process Manag 52(1):115\u2013128","journal-title":"Inf Process Manag"},{"key":"3200_CR3","doi-asserted-by":"publisher","first-page":"1161","DOI":"10.1037\/a0025827","volume":"12","author":"T B\u00e4nziger","year":"2012","unstructured":"B\u00e4nziger T, Mortillaro M, Scherer KR (2012) Introducing the Geneva multimodal expression corpus for experimental research on emotion perception. Emotion 12:1161\u20131179","journal-title":"Emotion"},{"key":"3200_CR4","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1007\/s10919-013-0165-x","volume":"38","author":"T Banziger","year":"2014","unstructured":"Banziger T, Patel S, Scherer K (2014) The role of perceived voice and speech characteristics in vocal emotion communication. J Nonverbal Behav 38:31\u201352","journal-title":"J Nonverbal Behav"},{"key":"3200_CR5","doi-asserted-by":"crossref","unstructured":"Burkhardt F, Paeschke A, Rolfes M, Sendlmeier WF, Weiss B (2005) A database of German emotional speech. In: Ninth European conference on speech communication and technology","DOI":"10.21437\/Interspeech.2005-446"},{"key":"3200_CR6","doi-asserted-by":"publisher","DOI":"10.4324\/9780429493898","volume-title":"Empathy: a social psychological approach","author":"MH Davis","year":"2018","unstructured":"Davis MH (2018) Empathy: a social psychological approach. Routledge"},{"key":"3200_CR7","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"A Dempster","year":"1977","unstructured":"Dempster A, Laid N, Durbin D (1977) Maximum likelihood from incomplete data via the EM algorithm. J R Stat Soc 39:1\u201338","journal-title":"J R Stat Soc"},{"key":"3200_CR8","doi-asserted-by":"crossref","unstructured":"Desplanques B, Demuynck K (2018) Cross-lingual speech emotion recognition through factor analysis. In: Interspeech2018, ISCA, pp 3648\u20133652","DOI":"10.21437\/Interspeech.2018-1778"},{"key":"3200_CR9","first-page":"526","volume":"2020","author":"V Dissanayake","year":"2020","unstructured":"Dissanayake V, Zhang H, Billinghurst M, Nanayakkara S (2020) Speech emotion recognition\u2014in the wild-using an autoencoder. Proc Interspeech 2020:526\u2013530","journal-title":"Proc Interspeech"},{"issue":"1","key":"3200_CR10","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1177\/1745691615596992","volume":"11","author":"P Ekman","year":"2016","unstructured":"Ekman P (2016) What scientists who study emotion agree about. Perspect Psychol Sci 11(1):31\u201334","journal-title":"Perspect Psychol Sci"},{"issue":"2","key":"3200_CR11","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","volume":"7","author":"F Eyben","year":"2016","unstructured":"Eyben F, Scherer KR, Schuller BW, Sundberg J, Andr\u00e9 E, Busso C, Devillers LY, Epps J, Laukka P, Narayanan SS et al (2016) The Geneva minimalistic acoustic parameter set (GEMAPS) for voice research and affective computing. IEEE Trans Affect Comput 7(2):190\u2013202","journal-title":"IEEE Trans Affect Comput"},{"key":"3200_CR12","doi-asserted-by":"crossref","unstructured":"Gangamohan P, Kadiri SR, Yegnanarayana B (2016) Analysis of emotional speech\u2013a review. In: Toward robotic socially believable behaving systems-Volume I, Springer, pp 205\u2013238","DOI":"10.1007\/978-3-319-31056-5_11"},{"key":"3200_CR13","unstructured":"Garofolo JS (1993) Timit acoustic phonetic continuous speech corpus. Linguistic Data Consortium"},{"key":"3200_CR14","unstructured":"Haq S, Jackson P (2009) Speaker-dependent audio-visual emotion recognition. In: International conference on auditory-visual speech processing, pp 53\u201358"},{"issue":"1","key":"3200_CR15","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/s10772-016-9386-9","volume":"20","author":"A Haque","year":"2017","unstructured":"Haque A, Rao KS (2017) Modification of energy spectra, epoch parameters and prosody for emotion conversion in speech. Int J Speech Technol 20(1):15\u201325","journal-title":"Int J Speech Technol"},{"issue":"7","key":"3200_CR16","doi-asserted-by":"publisher","first-page":"1890","DOI":"10.1109\/TASL.2010.2102753","volume":"19","author":"T Hasan","year":"2011","unstructured":"Hasan T, Hansen JH (2011) A study on universal background model training in speaker verification. IEEE Trans Audio Speech Lang Process 19(7):1890\u20131899","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"3200_CR17","first-page":"3","volume":"26","author":"M Hofmann","year":"2006","unstructured":"Hofmann M (2006) Support vector machines-kernels and the kernel trick. Notes 26:3","journal-title":"Notes"},{"issue":"2","key":"3200_CR18","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1109\/72.991427","volume":"13","author":"CW Hsu","year":"2002","unstructured":"Hsu CW, Lin CJ (2002) A comparison of methods for multiclass support vector machines. IEEE Trans Neural Netw 13(2):415\u2013425","journal-title":"IEEE Trans Neural Netw"},{"issue":"1","key":"3200_CR19","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1016\/j.acha.2016.09.001","volume":"43","author":"X Huang","year":"2017","unstructured":"Huang X, Maier A, Hornegger J, Suykens JA (2017) Indefinite kernels in least squares support vector machines and principal component analysis. Appl Comput Harmon Anal 43(1):162\u2013172","journal-title":"Appl Comput Harmon Anal"},{"issue":"5","key":"3200_CR20","doi-asserted-by":"publisher","first-page":"1787","DOI":"10.1007\/s12652-017-0644-8","volume":"10","author":"Y Huang","year":"2019","unstructured":"Huang Y, Tian K, Wu A, Zhang G (2019) Feature fusion methods research based on deep belief networks for speech emotion recognition under noise condition. J Ambient Intell Human Comput 10(5):1787\u20131798","journal-title":"J Ambient Intell Human Comput"},{"key":"3200_CR21","doi-asserted-by":"publisher","first-page":"101894","DOI":"10.1016\/j.bspc.2020.101894","volume":"59","author":"D Issa","year":"2020","unstructured":"Issa D, Demirci MF, Yazici A (2020) Speech emotion recognition with deep convolutional neural networks. Biomed Signal Process Control 59:101894","journal-title":"Biomed Signal Process Control"},{"key":"3200_CR22","first-page":"57","volume":"20","author":"D Keltner","year":"2017","unstructured":"Keltner D, Cordaro DT (2017) Understanding multimodal emotional expressions: recent advances in basic emotion theory. Sci Facial Exp 20:57\u201376","journal-title":"Sci Facial Exp"},{"key":"3200_CR23","volume-title":"Social media and machine learning","author":"L Kerkeni","year":"2019","unstructured":"Kerkeni L, Serrestou Y, Mbarki M, Raoof K, Mahjoub MA, Cleder C (2019) Automatic speech emotion recognition using machine learning. Social media and machine learning. IntechOpen"},{"issue":"2","key":"3200_CR24","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10772-011-9125-1","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi SG, Rao KS (2012) Emotion recognition from speech: a review. Int J Speech Technol 15(2):99\u2013117","journal-title":"Int J Speech Technol"},{"issue":"6","key":"3200_CR25","doi-asserted-by":"publisher","first-page":"444","DOI":"10.1016\/j.tics.2016.03.011","volume":"20","author":"PA Kragel","year":"2016","unstructured":"Kragel PA, LaBar KS (2016) Decoding the nature of emotion in the brain. Trends Cogn Sci 20(6):444\u2013455","journal-title":"Trends Cogn Sci"},{"key":"3200_CR26","doi-asserted-by":"crossref","unstructured":"Latif S, Rana R, Younis S, Qadir J, Epps J (2018) Transfer learning for improving speech emotion classification accuracy. In: Interspeech, pp 257\u2013261","DOI":"10.21437\/Interspeech.2018-1625"},{"key":"3200_CR27","doi-asserted-by":"crossref","unstructured":"Lee L, Rose RC (1996) Speaker normalization using efficient frequency warping procedures. In: 1996 IEEE international conference on acoustics, speech, and signal processing conference proceedings, IEEE, vol\u00a01, pp 353\u2013356","DOI":"10.1109\/ICASSP.1996.541105"},{"issue":"1","key":"3200_CR28","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s12652-011-0086-7","volume":"3","author":"HCK Lin","year":"2012","unstructured":"Lin HCK, Hsieh MC, Loh LC, Wang CH (2012) An emotion recognition mechanism based on the combination of mutual information and semantic clues. J Ambient Intell Human Comput 3(1):19\u201329","journal-title":"J Ambient Intell Human Comput"},{"issue":"4","key":"3200_CR29","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1016\/j.ipm.2010.09.001","volume":"47","author":"I Lopatovska","year":"2011","unstructured":"Lopatovska I, Arapakis I (2011) Theories, methods and current research on emotions in library and information science, information retrieval and human-computer interaction. Inf Process Manag 47(4):575\u2013592","journal-title":"Inf Process Manag"},{"issue":"4","key":"3200_CR30","doi-asserted-by":"publisher","first-page":"567","DOI":"10.1007\/s12652-017-0464-x","volume":"8","author":"E Lozano-Monasor","year":"2017","unstructured":"Lozano-Monasor E, L\u00f3pez MT, Vigo-Bustos F, Fern\u00e1ndez-Caballero A (2017) Facial expression recognition in ageing adults: from lab to ambient assisted living. J Ambient Intell Human Comput 8(4):567\u2013578","journal-title":"J Ambient Intell Human Comput"},{"key":"3200_CR31","doi-asserted-by":"crossref","unstructured":"McLaughlin J, Reynolds DA, Gleason T (1999) A study of computation speed-ups of the GMM-UBM speaker recognition system. In: Sixth European conference on speech communication and technology","DOI":"10.21437\/Eurospeech.1999-284"},{"key":"3200_CR32","unstructured":"Meyer D, Wien FT (2015) Support vector machines: the interface to libsvm in package e1071. Tech. rep, FH Technikum Wien, Austria"},{"key":"3200_CR33","unstructured":"Pols LC, et\u00a0al. (1977) Spectral analysis and identification of dutch vowels in monosyllabic words"},{"key":"3200_CR34","unstructured":"Rabiner L (1993) Fundamentals of speech recognition. Fundamentals of speech recognition"},{"issue":"11","key":"3200_CR35","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1016\/0167-8655(95)00075-R","volume":"16","author":"H Ralambondrainy","year":"1995","unstructured":"Ralambondrainy H (1995) A conceptual version of the k-means algorithm. Pattern Recogn Lett 16(11):1147\u20131157","journal-title":"Pattern Recogn Lett"},{"issue":"3","key":"3200_CR36","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1016\/j.ipm.2008.09.003","volume":"45","author":"J Rong","year":"2009","unstructured":"Rong J, Li G, Chen YPP (2009) Acoustic feature selection for automatic emotion recognition from speech. Inf Process Manag 45(3):315\u2013328","journal-title":"Inf Process Manag"},{"key":"3200_CR37","unstructured":"Schmitt M, Janott C, Pandit V, Qian K, Heiser C, Hemmert W, Schuller B (2016) A bag-of-audio-words approach for snore sounds\u2019 excitation localisation. Speech Communication, 12. ITG Symposium, VDE, pp 1\u20135"},{"issue":"2","key":"3200_CR38","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1109\/T-AFFC.2010.8","volume":"1","author":"B Schuller","year":"2010","unstructured":"Schuller B, Vlasenko B, Eyben F, Wollmer M, Stuhlsatz A, Wendemuth A, Rigoll G (2010) Cross-corpus acoustic emotion recognition: variances and strategies. IEEE Trans Affect Comput 1(2):119\u2013131","journal-title":"IEEE Trans Affect Comput"},{"key":"3200_CR39","doi-asserted-by":"crossref","unstructured":"Schuller B, Steidl S, Batliner A, Vinciarelli A, Scherer K, Ringeval F, Chetouani M, Weninger F, Eyben F, Marchi E et\u00a0al (2013) The interspeech 2013 computational paralinguistics challenge: social signals, conflict, emotion, autism. In: Interspeech, pp 122\u2013126","DOI":"10.21437\/Interspeech.2013-56"},{"issue":"1","key":"3200_CR41","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1016\/j.csl.2014.08.003","volume":"29","author":"B Schuller","year":"2015","unstructured":"Schuller B, Steidl S, Batliner A, N\u00f6th E, Vinciarelli A, Burkhardt F, van Son R, Weninger F, Eyben F, Bocklet T, Mohammadi G, Weiss B (2015) A survey on perceived speaker traits: personality, likability, pathology, and the first challenge. Comput Speech Lang 29(1):100\u2013131","journal-title":"Comput Speech Lang"},{"key":"3200_CR42","doi-asserted-by":"crossref","unstructured":"Schuller BW, Steidl S, Batliner A, Marschik PB, Baumeister H, Dong F, Hantke S, Pokorny FB, Rathner EM, Bartl-Pokorny KD et\u00a0al (2018) The interspeech 2018 computational paralinguistics challenge: atypical and self-assessed affect, crying and heart beats. In: Interspeech, pp 122\u2013126","DOI":"10.21437\/Interspeech.2018-51"},{"key":"3200_CR43","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1016\/j.csl.2018.02.004","volume":"53","author":"B Schuller","year":"2019","unstructured":"Schuller B, Weninger F, Zhang Y, Ringeval F, Batliner A, Steidl S, Eyben F, Marchi E, Vinciarelli A, Scherer K et al (2019) Affective and behavioural computing: lessons learnt from the first computational paralinguistics challenge. Comput Speech Lang 53:156\u2013180","journal-title":"Comput Speech Lang"},{"issue":"4","key":"3200_CR44","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1023\/A:1023237014909","volume":"28","author":"C Sobin","year":"1999","unstructured":"Sobin C, Alpert M (1999) Emotion in speech: the acoustic attributes of fear, anger, sadness, and joy. J Psycholinguist Res 28(4):347\u2013365","journal-title":"J Psycholinguist Res"},{"issue":"1","key":"3200_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/97.736233","volume":"6","author":"J Sohn","year":"1999","unstructured":"Sohn J, Kim NS, Sung W (1999) A statistical model-based voice activity detection. IEEE Signal Process Lett 6(1):1\u20133","journal-title":"IEEE Signal Process Lett"},{"issue":"1","key":"3200_CR46","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/s10772-018-9491-z","volume":"21","author":"M Swain","year":"2018","unstructured":"Swain M, Routray A, Kabisatpathy P (2018a) Databases, features and classifiers for speech emotion recognition: a review. Int J Speech Technol 21(1):93\u2013120","journal-title":"Int J Speech Technol"},{"issue":"1","key":"3200_CR47","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1007\/s10772-018-9491-z","volume":"21","author":"M Swain","year":"2018","unstructured":"Swain M, Routray A, Kabisatpathy P (2018b) Databases, features and classifiers for speech emotion recognition: a review. Int J Speech Technol 21(1):93\u2013120","journal-title":"Int J Speech Technol"},{"key":"3200_CR48","doi-asserted-by":"crossref","unstructured":"Tzinis E, Paraskevopoulos G, Baziotis C, Potamianos A (2018) Integrating recurrence dynamics for speech emotion recognition. In: Interspeech, pp 927\u2013931","DOI":"10.21437\/Interspeech.2018-1377"},{"key":"3200_CR49","unstructured":"Vafeiadis A, Kalatzis D, Votis K, Giakoumis D, Tzovaras D, Chen L, Hamzaoui R (2017) Acoustic scene classification: from a hybrid classifier to deep learning"},{"key":"3200_CR50","doi-asserted-by":"publisher","first-page":"103226","DOI":"10.1016\/j.engappai.2019.08.020","volume":"89","author":"A Vafeiadis","year":"2020","unstructured":"Vafeiadis A, Votis K, Giakoumis D, Tzovaras D, Chen L, Hamzaoui R (2020) Audio content analysis for unobtrusive event detection in smart homes. Eng Appl Artif Intell 89:103226","journal-title":"Eng Appl Artif Intell"},{"key":"3200_CR51","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-2440-0","volume-title":"The nature of statistical learning theory","author":"V Vapnik","year":"1995","unstructured":"Vapnik V (1995) The nature of statistical learning theory, vol 2. Spring, New York"},{"issue":"2","key":"3200_CR52","doi-asserted-by":"publisher","first-page":"2159","DOI":"10.1007\/s11042-015-3119-y","volume":"76","author":"GK Verma","year":"2017","unstructured":"Verma GK, Tiwary US (2017) Affect representation and recognition in 3d continuous valence-arousal-dominance space. Multimed Tools Appl 76(2):2159\u20132183","journal-title":"Multimed Tools Appl"},{"issue":"1","key":"3200_CR53","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1016\/j.csl.2014.09.003","volume":"30","author":"CH You","year":"2015","unstructured":"You CH, Li H, Lee KA (2015) Relevance factor of maximum a posteriori adaptation for GMM-NAP-SVM in speaker and language recognition. Comput Speech Lang 30(1):116\u2013134","journal-title":"Comput Speech Lang"},{"key":"3200_CR54","first-page":"1","volume":"20","author":"J Zhang","year":"2020","unstructured":"Zhang J, Zhou Y, Liu Y (2020) EEG-based emotion recognition using an improved radial basis function neural network. J Ambient Intell Human Comput 20:1\u201312","journal-title":"J Ambient Intell Human Comput"}],"container-title":["Journal of Ambient Intelligence and Humanized Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03200-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12652-021-03200-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03200-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T16:07:10Z","timestamp":1724774830000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12652-021-03200-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,4,5]]},"references-count":53,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2022,10]]}},"alternative-id":["3200"],"URL":"https:\/\/doi.org\/10.1007\/s12652-021-03200-1","relation":{},"ISSN":["1868-5137","1868-5145"],"issn-type":[{"type":"print","value":"1868-5137"},{"type":"electronic","value":"1868-5145"}],"subject":[],"published":{"date-parts":[[2021,4,5]]},"assertion":[{"value":"3 December 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 April 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}