{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:24:36Z","timestamp":1775229876210,"version":"3.50.1"},"reference-count":114,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2014,8,23]],"date-time":"2014-08-23T00:00:00Z","timestamp":1408752000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cogn Comput"],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1007\/s12559-014-9296-6","type":"journal-article","created":{"date-parts":[[2014,8,22]],"date-time":"2014-08-22T08:50:21Z","timestamp":1408697421000},"page":"892-913","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Investigation of Speaker Group-Dependent Modelling for Recognition of Affective States from Speech"],"prefix":"10.1007","volume":"6","author":[{"given":"Ingo","family":"Siegert","sequence":"first","affiliation":[]},{"given":"David","family":"Philippou-H\u00fcbner","sequence":"additional","affiliation":[]},{"given":"Kim","family":"Hartmann","sequence":"additional","affiliation":[]},{"given":"Ronald","family":"B\u00f6ck","sequence":"additional","affiliation":[]},{"given":"Andreas","family":"Wendemuth","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,8,23]]},"reference":[{"issue":"3","key":"9296_CR1","doi-asserted-by":"crossref","first-page":"556","DOI":"10.1016\/j.csl.2010.10.001","volume":"25","author":"EM Albornoz","year":"2011","unstructured":"Albornoz EM, Milone DH, Rufiner HL. Spoken emotion recognition using hierarchical classifiers. Comput Speech Lang. 2011;25(3):556\u201370.","journal-title":"Comput Speech Lang"},{"issue":"6","key":"9296_CR2","doi-asserted-by":"crossref","first-page":"1304","DOI":"10.1121\/1.1914702","volume":"55","author":"B Atal","year":"1974","unstructured":"Atal B. Effectiveness of linear prediction characteristics of the speech wave for automatic speaker identification and verification. J Acoust Soc Am. 1974;55(6):1304\u201312.","journal-title":"J Acoust Soc Am"},{"key":"9296_CR3","doi-asserted-by":"crossref","unstructured":"Bahari M, Van Hamme H. Speaker age estimation using hidden markov model weight supervectors. In: Proceedings of the 11th ISSPA; 2012. p. 517\u2013521.","DOI":"10.1109\/ISSPA.2012.6310606"},{"key":"9296_CR4","unstructured":"Batliner A, Fischer K, Huber R, Spiker J, North E. Desperately seeking emotions: actors, wizards and human beings. In: Proceedings of the ISCA workshop on speech and emotion; 2000. p. 195\u2013200."},{"issue":"1\u20132","key":"9296_CR5","doi-asserted-by":"crossref","first-page":"117","DOI":"10.1016\/S0167-6393(02)00079-1","volume":"40","author":"A Batliner","year":"2003","unstructured":"Batliner A, Fischer K, Huber R, Spilker J, N\u00f6th E. How to find trouble in communication. Speech Commun. 2003;40(1\u20132):117\u201343.","journal-title":"Speech Commun"},{"key":"9296_CR6","unstructured":"Batliner A, Hacker C, Steidl S, N\u00f6th E, Russell M, Wong M. \u201cYou stupid tin box\u201d- children interacting with the AIBO robot: a cross-linguistic emotional speech corpus. In: Proceedings of LREC; 2004. p. 865\u2013868."},{"issue":"1","key":"9296_CR7","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1016\/j.csl.2009.12.003","volume":"25","author":"A Batliner","year":"2011","unstructured":"Batliner A, Steidl S, Schuller B, Seppi D, Vogt T, Wagner J, Devillers L, Vidrascu L, Aharonson V, Kessous L, Amir N. Whodunnit\u2014searching for the most important feature types signalling emotion-related user states in speech. Comput Speech Lang. 2011;25(1):4\u201328.","journal-title":"Comput Speech Lang"},{"key":"9296_CR8","unstructured":"Becker-Asano C. WASABI : Affect simulation for agents with believable interactivity. Ph.D. thesis, Universit\u00e4t Bielefeld; 2008."},{"key":"9296_CR9","doi-asserted-by":"crossref","unstructured":"B\u00f6ck R, H\u00fcbner D, Wendemuth A. Determining optimal signal features and parameters for hmm-based emotion classification. In: Proceedings of the 15th IEEE mediterranean electrotechnical conference; 2010. p. 1586\u20131590.","DOI":"10.1109\/MELCON.2010.5476295"},{"key":"9296_CR10","doi-asserted-by":"crossref","unstructured":"B\u00f6ck R, Limbrecht K, Walter S, Hrabal D, Traue HC, Gl\u00fcge S, Wendemuth A. Intraindividual and interindividual multimodal emotion analyses in human\u2013machine interaction. In: IEEE interantional multi-disciplinary conference on cognitive methods in situation awareness and decision support; 2012. p. 59\u201364.","DOI":"10.1109\/CogSIMA.2012.6188409"},{"key":"9296_CR11","doi-asserted-by":"crossref","unstructured":"Bocklet T, Maier A, Bauer J, Burkhardt F, Noth E. Age and gender recognition for telephone applications based on GMM supervectors and support vector machines. In: Proceedings of IEEE ICASSP\u201908; 2008. p. 1605\u20131608.","DOI":"10.1109\/ICASSP.2008.4517932"},{"key":"9296_CR12","unstructured":"Burkhardt F, Eckert M, Johannsen W, Stegmann J. A database of age and gender annotated telephone speech. In: Proceedings of the 7th LREC. ELRA; 2010."},{"key":"9296_CR13","doi-asserted-by":"crossref","unstructured":"Burkhardt F, Paeschke A, Rolfes M, Sendlmeier W, Weiss B. A database of german emotional speech. In: Proceedings of interspeech; 2005. p. 1516\u20131520.","DOI":"10.21437\/Interspeech.2005-446"},{"key":"9296_CR14","doi-asserted-by":"crossref","unstructured":"Busso C, Deng Z, Yildirim S, Bulut M, Lee C, Kazemzadeh A, Lee S, Neumann U, Narayanan S. Analysis of emotion recognition using facial expressions, speech and multimodal information. In: Proceedings of the 6th ICMI. New York, USA: ACM; 2004. p. 205\u2013211.","DOI":"10.1145\/1027933.1027968"},{"issue":"5\u20136","key":"9296_CR15","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1007\/BF01420597","volume":"30","author":"LD Butler","year":"1994","unstructured":"Butler LD, Nolen-Hoeksema S. Gender differences in responses to depressed mood in a college sample. Sex Roles. 1994;30(5\u20136):331\u201346.","journal-title":"Sex Roles"},{"issue":"5","key":"9296_CR16","doi-asserted-by":"crossref","first-page":"3246","DOI":"10.1121\/1.411700","volume":"97","author":"J Cohen","year":"1995","unstructured":"Cohen J, Kamm T, Andreou AG. Vocal tract normalization in speech recognition: compensating for systematic speaker variability. J Acoust Soc Am. 1995;97(5):3246\u20137.","journal-title":"J Acoust Soc Am"},{"issue":"1\u20132","key":"9296_CR17","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/S0167-6393(02)00071-7","volume":"40","author":"R Cowie","year":"2003","unstructured":"Cowie R, Cornelius RR. Describing the emotional states that are expressed in speech. Speech Commun. 2003;40(1\u20132):5\u201332.","journal-title":"Speech Commun"},{"key":"9296_CR18","doi-asserted-by":"crossref","unstructured":"Cullen A, Harte N. Feature sets for automatic classification of dimensional affect. In: IET Irish signals and systems conference (ISSC 2012); 2012. p. 1\u20136.","DOI":"10.1049\/ic.2012.0211"},{"issue":"4","key":"9296_CR19","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis S, Mermelstein P. Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans Acoust Speech Signal Process. 1980;28(4):357\u201366.","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"9296_CR20","doi-asserted-by":"crossref","unstructured":"Dellwo V, Leemann A, Kolly MJ. Speaker idiosyncratic rhythmic features in the speech signal. In: Proceedings of Interspeech; 2012. Portland, Oregon.","DOI":"10.21437\/Interspeech.2012-342"},{"issue":"3\u20134","key":"9296_CR21","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1016\/S0167-6393(02)00030-4","volume":"39","author":"J Veth de","year":"2003","unstructured":"de Veth J, Boves L. On the efficiency of classical rasta filtering for continuous speech recognition: keeping the balance between acoustic pre-processing and acoustic modelling. Speech Commun. 2003;39(3\u20134):269\u201386.","journal-title":"Speech Commun"},{"key":"9296_CR22","doi-asserted-by":"crossref","first-page":"920","DOI":"10.1007\/s11055-012-9658-z","volume":"42","author":"E Dmitrieva","year":"2012","unstructured":"Dmitrieva E, Gelman V. The relationship between the perception of emotional intonation of speech in conditions of interference and the acoustic parameters of speech signals in adults of different gender and age. Neurosci Behav Physiol. 2012;42:920\u20138.","journal-title":"Neurosci Behav Physiol"},{"key":"9296_CR23","doi-asserted-by":"crossref","unstructured":"Dobri\u0161ek S, Gaj\u0161ek R, Miheli\u010d F, Pave\u0161i\u0107 N, \u0160truc V. Towards efficient multi-modal emotion recognition. Int J Adv Robot Syst. 2013;10:53. doi: 10.5772\/54002 .","DOI":"10.5772\/54002"},{"key":"9296_CR24","doi-asserted-by":"crossref","unstructured":"Douglas-Cowie E, Cowie R, Sneddon I, Cox C, Lowry O, McRorie M, Martin JC, Devillers L, Abrilian S, Batliner A, Amir N, Karpouzis K. The HUMAINE database: addressing the collection and annotation of naturalistic and induced emotional data. In: Proceedings of ACII\u201907; 2007. p. 488\u2013500.","DOI":"10.1007\/978-3-540-74889-2_43"},{"key":"9296_CR25","doi-asserted-by":"crossref","unstructured":"Douglas-Cowie E, Devillers L, Martin JC, Cowie R, Savvidou S, Abrilian S, Cox C. Multimodal databases of everyday emotion: facing up to complexity. In: European conference on speech communication and technology; 2005. p. 813\u2013816.","DOI":"10.21437\/Interspeech.2005-381"},{"key":"9296_CR26","doi-asserted-by":"crossref","unstructured":"Dumouchel P, Dehak N, Attabi Y, Dehak R, Boufaden N. Cepstral and long-term features for emotion recognition. In: Proceedings of Interspeech;2009. p. 344\u2013347.","DOI":"10.21437\/Interspeech.2009-111"},{"key":"9296_CR27","doi-asserted-by":"crossref","unstructured":"Ekman P. Handbook of cognition and emotion, chap. basic emotions. Sussex, UK: Wiley; 2005. p. 45\u201360.","DOI":"10.1002\/0470013494.ch3"},{"key":"9296_CR28","doi-asserted-by":"crossref","unstructured":"Emori T, Shinoda K. Rapid vocal tract length normalization using maximum likelihood estimation. In: Proceedings of EUROSPEECH 2001, 7th European conference on speech communication and technology. Denmark: Aalborg; 2001. p. 1649\u20131652.","DOI":"10.21437\/Eurospeech.2001-204"},{"key":"9296_CR29","doi-asserted-by":"crossref","unstructured":"Engberg IS, Hansen AV. Documentation of the danish emotional speech database (DES). Technical report. Denmark: Center for Person, Kommunikation, Aalborg University; 1996. Internal aau report.","DOI":"10.21437\/Eurospeech.1997-482"},{"key":"9296_CR30","unstructured":"Frommer J, Michaelis B, R\u00f6sner D, Wendemuth A, Friesen R, Haase, M, Kunze M, Andrich R, Lange J, Panning A, Siegert I. Towards emotion and affect detection in the multimodal last minute corpus. In: Proceedings of the 8th LREC; 2012."},{"key":"9296_CR31","unstructured":"Frommer J, R\u00f6sner D, Haase M, Lange J, Friesen R, Otto M. Detection and avoidance of failures in dialogues\u2014wizard of oz experiment operator\u2019s manual. Pabst Science Publishers; 2012."},{"key":"9296_CR32","doi-asserted-by":"crossref","unstructured":"Gajsek R, Zibert J, Justin T, Struc V, Vesnicer B, Mihelic F. Gender and affect recognition based on gmm and gmm-ubm modeling with relevance map estimation. In: Proceedings of interspeech; 2010. p. 2810\u20132813.","DOI":"10.21437\/Interspeech.2010-743"},{"key":"9296_CR33","doi-asserted-by":"crossref","unstructured":"Giuliani D, Gerosa M. Investigating recognition of children\u2019s speech. In: Proceedings of IEEE international conference on acoustics, speech, and signal processing (ICASSP \u201903), vol. 2; 2003. p. II-137-40.","DOI":"10.1109\/ICASSP.2003.1202313"},{"key":"9296_CR34","doi-asserted-by":"crossref","unstructured":"Gl\u00fcge S, B\u00f6ck R, Wendemuth A. Segmented-memory recurrent neural networks versus hidden markov models in emotion recognition from speech. In: Proceedings of the 3rd international joint conference on computational intelligence. Paris, France; 2011. p. 308\u2013315.","DOI":"10.5220\/0003644003080315"},{"key":"9296_CR35","unstructured":"Gnjatovi\u0107 M, R\u00f6sner D. On the role of the NIMITEK corpus in developing an emotion adaptive spoken dialogue system. In: Proceedings of the 7th LREC. Marrakech, Morocco; 2008."},{"key":"9296_CR36","doi-asserted-by":"crossref","unstructured":"Grimm M, Kroschel K, Narayanan S. The vera am mittag german audio\u2013visual emotional speech database. In: Proceedings of ICME; 2008. p. 865\u2013868.","DOI":"10.1109\/ICME.2008.4607572"},{"issue":"4","key":"9296_CR37","doi-asserted-by":"crossref","first-page":"590","DOI":"10.1037\/0882-7974.12.4.590","volume":"12","author":"J Gross","year":"1997","unstructured":"Gross J, Carstensen L, Pasupathi M, Tsai J, Skorpen C, Hsu A. Emotion and aging: experience, expression, and control. Psychol Aging. 1997;12(4):590\u20139.","journal-title":"Psychol Aging"},{"issue":"1","key":"9296_CR38","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1145\/1656274.1656278","volume":"11","author":"M Hall","year":"2009","unstructured":"Hall M, Frank E, Holmes G, Pfahringer B, Reutemann P, Witten IH. The weka data mining software: an update. SIGKDD Explor Newsl. 2009;11(1):10\u20138.","journal-title":"SIGKDD Explor Newsl"},{"key":"9296_CR39","doi-asserted-by":"crossref","unstructured":"Hartmann K, Siegert I, Philippou-H\u00fcbner D, Wendemuth A. Emotion detection in HCI: from speech features to emotion space. In: Proceedings of the 12th IFAC, IFIP, IFORS, IEA symposium on analysis, design, and evaluation of human\u2013machine systems. Las Vegas, USA; 2013.","DOI":"10.3182\/20130811-5-US-2037.00049"},{"key":"9296_CR40","doi-asserted-by":"crossref","unstructured":"Hassan A, Damper RI, Niranjan M. On acoustic emotion recognition: compensating for covariate shift. IEEE Trans Audio Speech Lang Process. 2013;21(7):1458\u20131468.","DOI":"10.1109\/TASL.2013.2255278"},{"issue":"4","key":"9296_CR41","doi-asserted-by":"crossref","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hermansky","year":"1994","unstructured":"Hermansky H, Morgan N. Rasta processing of speech. IEEE Trans Speech Audio Process. 1994;2(4):578\u201389.","journal-title":"IEEE Trans Speech Audio Process"},{"key":"9296_CR42","unstructured":"Ho CH. Speaker modelling for voice conversion. Ph.D. thesis, Department of Electronic and Computer Engineering, Brunel University, London; 2001."},{"key":"9296_CR43","unstructured":"Hubeika V. Estimation of gender and age from recorded speech. In: Proceedings of the ACM student research competition. Czech Technical University; 2006. p. 25\u201332."},{"key":"9296_CR44","doi-asserted-by":"crossref","unstructured":"Kelly F, Harte N. Effects of long-term ageing on speaker verification. In: Proceedings of the COST 2101 European conference on Biometrics and ID management. Berlin: Springer; 2011. p. 113\u2013124.","DOI":"10.1007\/978-3-642-19530-3_11"},{"issue":"1","key":"9296_CR45","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1016\/j.specom.2009.08.009","volume":"52","author":"T Kinnunen","year":"2010","unstructured":"Kinnunen T, Li H. An overview of text-independent speaker recognition: from features to supervectors. Speech Commun. 2010;52(1):12\u201340.","journal-title":"Speech Commun"},{"issue":"9\u201310","key":"9296_CR46","doi-asserted-by":"crossref","first-page":"1172","DOI":"10.1016\/j.specom.2011.01.007","volume":"53","author":"M Kockmann","year":"2011","unstructured":"Kockmann M, Burget L, \u010cernock\u00fd JH. Application of speaker- and language identification state-of-the-art techniques for emotion recognition. Speech Commun. 2011;53(9\u201310):1172\u201385.","journal-title":"Speech Commun"},{"key":"9296_CR47","first-page":"353","volume":"1","author":"L Lee","year":"1996","unstructured":"Lee L, Rose R. Speaker normalization using efficient frequency warping procedures. IEEE Int Conf Acoust Speech Signal Process. 1996;1:353\u20136.","journal-title":"IEEE Int Conf Acoust Speech Signal Process"},{"issue":"1","key":"9296_CR48","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1109\/89.650310","volume":"6","author":"L Lee","year":"1998","unstructured":"Lee L, Rose R. A frequency warping approach to speaker normalization. IEEE Trans Speech Audio Process. 1998;6(1):49\u201360.","journal-title":"IEEE Trans Speech Audio Process"},{"key":"9296_CR49","first-page":"207","volume":"3","author":"MW Lee","year":"2012","unstructured":"Lee MW, Kwak KC. Performance comparison of gender and age group recognition for human-robot interaction. IJACSA. 2012;3:207\u201311.","journal-title":"IJACSA"},{"key":"9296_CR50","doi-asserted-by":"crossref","unstructured":"Lee S, Potamianos A, Narayanan S. Analysis of children\u2019s speech: duration, pitch and formants. In: Proceedings of interspeech, vol 1; 1997. p. 473\u2013476.","DOI":"10.21437\/Eurospeech.1997-161"},{"key":"9296_CR51","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1016\/j.csl.2012.01.008","volume":"27","author":"M Li","year":"2012","unstructured":"Li M, Han K, Narayanan S. Automatic speaker age and gender recognition using acoustic and prosodic level information fusion. Comput Speech Lang. 2012;27:151\u201367.","journal-title":"Comput Speech Lang"},{"key":"9296_CR52","doi-asserted-by":"crossref","unstructured":"Li M, Jung CS, Han KJ. Combining five acoustic level modeling methods for automatic speaker age and gender recognition. In: Proceedings of interspeech; 2010. p. 2826\u20132829.","DOI":"10.21437\/Interspeech.2010-747"},{"issue":"6","key":"9296_CR53","first-page":"1075","volume":"18","author":"LK Lipov\u010dan","year":"2009","unstructured":"Lipov\u010dan LK, Prizmi\u0107 Z, Franc R. Age and gender differences in affect regulation strategies. Drustvena istrazivanja: J Gen Social Issues. 2009;18(6):1075\u201388.","journal-title":"Drustvena istrazivanja: J Gen Social Issues"},{"key":"9296_CR54","doi-asserted-by":"crossref","unstructured":"Martin O, Kotsia I, Macq B, Pitas I. The enterface\u201905 audio\u2013visual emotion database. In: Proceedings of the 22nd international conference on data engineering workshops. Washington, DC, USA: IEEE Computer Society; 2006.","DOI":"10.1109\/ICDEW.2006.145"},{"issue":"2","key":"9296_CR55","doi-asserted-by":"crossref","first-page":"215","DOI":"10.3758\/BF03212421","volume":"3","author":"D Massaro","year":"1996","unstructured":"Massaro D, Egan P. Perceiving affect from the voice and the face. Psychon Bull Rev. 1996;3(2):215\u201321.","journal-title":"Psychon Bull Rev"},{"key":"9296_CR56","unstructured":"McDougall, W.: An introduction to social psychology. Adamant Media Corporation, Chestnut Hill, USA, Facsimile reprint of a 1912 edition by John W. Boston: Luce & Co.; 2001."},{"key":"9296_CR57","doi-asserted-by":"crossref","unstructured":"McKeown G, Valstar M, Cowie R, Pantic M. The semaine corpus of emotionally coloured character interactions. In: Proceedings of ICME; 2010. p. 1079\u20131084.","DOI":"10.1109\/ICME.2010.5583006"},{"issue":"2","key":"9296_CR58","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1177\/1368430207088035","volume":"11","author":"K McRae","year":"2008","unstructured":"McRae K, Ochsner KN, Mauss IB, Gabrieli JJD, Gross JJ. Gender differences in emotion regulation: an fMRI study of cognitive reappraisal. Group Process Intergroup Relat. 2008;11(2):143\u201362.","journal-title":"Group Process Intergroup Relat"},{"issue":"4","key":"9296_CR59","doi-asserted-by":"crossref","first-page":"13:1","DOI":"10.1145\/1998384.1998387","volume":"7","author":"H Meinedo","year":"2011","unstructured":"Meinedo H, Trancoso I. Age and gender detection in the i-dash project. ACM Trans Speech Lang Process. 2011;7(4):13:1\u201316.","journal-title":"ACM Trans Speech Lang Process"},{"key":"9296_CR60","unstructured":"Mengistu KT. Robust acoustic and semantic modeling in a telephone-based spoken dialog system. Ph.D. thesis, Otto von Guericke University Magdeburg; 2009."},{"issue":"6","key":"9296_CR61","first-page":"63","volume":"35","author":"JD Morris","year":"1995","unstructured":"Morris JD. SAM: the self-assessment manikin an efficient cross-cultural measurement of emotional response. J Advert Res. 1995;35(6):63\u20138.","journal-title":"J Advert Res"},{"key":"9296_CR62","doi-asserted-by":"crossref","unstructured":"Mower E, Metallinou A, Lee C, Kazemzadeh A, Busso C, Lee S, Narayanan S. Interpreting ambiguous emotional expressions. In: 3rd Internatinal conference on affective computing and intelligent interaction and workshops (ACII); 2009.","DOI":"10.1109\/ACII.2009.5349500"},{"key":"9296_CR63","unstructured":"Neiberg D, Elenius K, Laskowski K. A database of german emotional speech. In: Proceedings of interspeech; 2006. p. 809\u2013812."},{"key":"9296_CR64","volume-title":"Advanced data mining techniques","author":"DL Olson","year":"2008","unstructured":"Olson DL, Delen D. Advanced data mining techniques. Berlin: Springer; 2008."},{"key":"9296_CR65","doi-asserted-by":"crossref","unstructured":"Paleari M, Huet B, Chellali R. Towards multimodal emotion recognition: a new approach. In: Proceedings of the ACM international conference on image and video retrieval. New York, NY, USA: ACM; 2010. p. 174\u2013181.","DOI":"10.1145\/1816041.1816069"},{"key":"9296_CR66","doi-asserted-by":"crossref","unstructured":"Palm G, Glodek M. Towards emotion recognition in human computer interaction. In: Neural nets and surroundings, smart innovation, systems and technologies, vol. 19. Berlin: Springer; 2013. p. 323\u201336.","DOI":"10.1007\/978-3-642-35467-0_32"},{"key":"9296_CR67","doi-asserted-by":"crossref","unstructured":"Panning A, Siegert I, Al-Hamadi A, Wendemuth A, R\u00f6sner D, Frommer J, Krell G, Michaelis B. Multimodal affect recognition in spontaneous HCI environment. In: IEEE international conference on signal processing, communications and computing; 2012. p. 430\u2013435.","DOI":"10.1109\/ICSPCC.2012.6335662"},{"key":"9296_CR68","volume-title":"Emotion, a psychoevolutionary synthesis","author":"R Plutchik","year":"1980","unstructured":"Plutchik R. Emotion, a psychoevolutionary synthesis. New York: Harper & Row; 1980."},{"key":"9296_CR69","doi-asserted-by":"crossref","unstructured":"Potamianos A, Narayanan S. A review of the acoustic and linguistic properties of children\u2019s speech. In: IEEE 9th workshop on multimedia signal processing (MMSP 2007); 2007. p. 22\u201325.","DOI":"10.1109\/MMSP.2007.4412809"},{"key":"9296_CR70","doi-asserted-by":"crossref","first-page":"399","DOI":"10.1109\/TASSP.1976.1162846","volume":"24","author":"L Rabiner","year":"1976","unstructured":"Rabiner L, Cheng MJ, Rosenberg AE, McGonegal CA. A comparative performance study of several pitch detection algorithms. IEEE Trans ASSP. 1976;24:399\u2013417.","journal-title":"IEEE Trans ASSP"},{"issue":"2","key":"9296_CR71","first-page":"143","volume":"16","author":"K Rao","year":"2013","unstructured":"Rao K, Koolagudi S, Vempada R. Emotion recognition from speech using global and local prosodic features. IJST. 2013;16(2):143\u201360.","journal-title":"IJST"},{"key":"9296_CR72","doi-asserted-by":"crossref","unstructured":"Rosenberg A. Classifying skewed data: importance weighting to optimize average recall. In: Proceedings of Interspeech; 2012.","DOI":"10.21437\/Interspeech.2012-131"},{"key":"9296_CR73","doi-asserted-by":"crossref","unstructured":"R\u00f6sner D, Friesen R, Otto M, Lange J, Haase M, Frommer J. Intentionality in interacting with companion systems\u2014an empirical approach. In: Human\u2013computer interaction, towards mobile and intelligent interaction environments, LNCS, vol. 6763. Berlin, Heidelberg: Springer; 2011. p. 593\u2013602.","DOI":"10.1007\/978-3-642-21616-9_67"},{"key":"9296_CR74","unstructured":"R\u00f6sner D, Frommer J, Andrich R, Friesen R, Haase M, Kunze M, Lange J, Otto M. LAST MINUTE: a novel corpus to support emotion, sentiment and social signal processing. In: 4th International workshop on corpora for research on emotion sentiment and social signals\u2014ES3. ELRA; 2012. p. 82\u201389."},{"key":"9296_CR75","unstructured":"R\u00f6sner D, Frommer J, Friesen R, Haase M, Lange J, Otto M. LAST MINUTE: a multimodal corpus of speech-based user-companion interactions. In: Proceedings of the 8th LREC; 2012. p. 96\u2013103."},{"issue":"12","key":"9296_CR76","doi-asserted-by":"crossref","first-page":"1535","DOI":"10.1016\/j.patrec.2009.12.036","volume":"31","author":"P Ruvolo","year":"2010","unstructured":"Ruvolo P, Fasel I, Movellan JR. A learning approach to hierarchical feature selection and aggregation for audio classification. Pattern Recogn Lett. 2010;31(12):1535\u201342.","journal-title":"Pattern Recogn Lett"},{"key":"9296_CR77","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1093\/oso\/9780195130072.003.0005","volume-title":"Appraisal processes in emotion: theory, methods, research","author":"K Scherer","year":"2001","unstructured":"Scherer K. Appraisal considered as a process of multilevel sequential checking. In: Scherer KR, Schorr A, Johnstone T, editors. Appraisal processes in emotion: theory, methods, research. Oxford: Oxford University Press; 2001. p. 92\u2013120."},{"key":"9296_CR78","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1080\/02699930500305016","volume":"20","author":"K Scherer","year":"2006","unstructured":"Scherer K, Dan E, Flykt A. What determines a feeling\u2019s position in affective space? A case for appraisal. Cogn Emot. 2006;20:92\u2013113.","journal-title":"Cogn Emot"},{"key":"9296_CR79","unstructured":"Schiel F. Automatic phonetic transcription of non-prompted speech. In: Proceedings of the XIVth international congress of phonetic sciences, ICPhS99. San Francisco; 1999. p. 607\u2013610."},{"issue":"9\u201310","key":"9296_CR80","doi-asserted-by":"crossref","first-page":"1062","DOI":"10.1016\/j.specom.2011.01.011","volume":"53","author":"B Schuller","year":"2011","unstructured":"Schuller B, Batliner A, Steidl S, Seppi D. Recognising realistic emotions and affect in speech: state of the art and lessons learnt from the first challenge. Speech Commun. 2011;53(9\u201310):1062\u201387.","journal-title":"Speech Commun"},{"issue":"12","key":"9296_CR81","doi-asserted-by":"crossref","first-page":"1760","DOI":"10.1016\/j.imavis.2009.02.013","volume":"27","author":"B Schuller","year":"2009","unstructured":"Schuller B, M\u00fcller R, Eyben F, Gast J, H\u00f6rnler B, W\u00f6llmer M, Rigoll G, H\u00f6thker A, Konosu H. Being bored? Recognising natural interest by extensive audiovisual integration for real-life application. Image Vis Comput. 2009;27(12):1760\u201374.","journal-title":"Image Vis Comput"},{"key":"9296_CR82","doi-asserted-by":"crossref","unstructured":"Schuller B, Seppi D, Batliner A, Maier A, Steidl S. Towards more reality in the recognition of emotional speech. In: IEEE international conference on acoustics, speech and signal processing, vol. 4; 2007. p. IV-941\u2013IV-944.","DOI":"10.1109\/ICASSP.2007.367226"},{"key":"9296_CR83","doi-asserted-by":"crossref","unstructured":"Schuller B, Steidl S, Batliner A. The interspeech 2009 emotion challenge. In: Proceedings of INTERSPEECH\u20192009. Brighton, UK: ISCA; 2009. p. 312\u2013315.","DOI":"10.21437\/Interspeech.2009-103"},{"key":"9296_CR84","doi-asserted-by":"crossref","unstructured":"Schuller B, Vlasenko B, Eyben F, Rigoll G, Wendemuth A. Acoustic emotion recognition: a benchmark comparison of performances. In: Proceedings of the IEEE automatic speech recognition and understanding workshop, ASRU 2009. Merano, Italy; 2009 . p. 552\u2013557.","DOI":"10.1109\/ASRU.2009.5372886"},{"key":"9296_CR85","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1109\/T-AFFC.2010.8","volume":"I","author":"B Schuller","year":"2010","unstructured":"Schuller B, Vlasenko B, Eyben F, Wollmer M, Stuhlsatz A, Wendemuth A, Rigoll G. Cross-corpus acoustic emotion recognition: variances and strategies. IEEE Trans Affect Comput. 2010;I:119\u201331.","journal-title":"IEEE Trans Affect Comput"},{"key":"9296_CR86","first-page":"285","volume-title":"The role of prosody in affective speech, linguistic insights, studies in language and communication","author":"B Schuller","year":"2009","unstructured":"Schuller B, W\u00f6llmer M, Eyben F, Rigoll G. Spectral or voice quality? Feature type relevance for the discrimination of emotion pairs. In: Hancil S, editor. The role of prosody in affective speech, linguistic insights, studies in language and communication, vol. 97. Frankfurt am Main: Peter Lang Publishing Group; 2009. p. 285\u2013307."},{"key":"9296_CR87","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1007\/978-3-642-12127-2_33","volume-title":"Multiple classifier systems, LNCS","author":"F Schwenker","year":"2010","unstructured":"Schwenker F, Scherer S, Schmidt M, Schels M, Glodek M. Multiple classifier systems for the recogonition of human emotions. In: Gayar N, Kittler J, Roli F, editors. Multiple classifier systems, LNCS, vol. 5997. Berlin: Springer; 2010. p. 315\u201324."},{"issue":"2","key":"9296_CR88","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1007\/s10772-012-9170-4","volume":"16","author":"I Shahin","year":"2013","unstructured":"Shahin I. Gender-dependent emotion recognition based on HMMs and SPHMMs. Int J Speech Technol. 2013;16(2):133\u201341.","journal-title":"Int J Speech Technol"},{"key":"9296_CR89","unstructured":"Siegert I, B\u00f6ck R, Philippou-H\u00fcbner D, Wendemuth A. Investigation of hierarchical classification for simultaneous gender and age recognitions. In: Proceedings of the 23. ESSV; 2012."},{"key":"9296_CR90","doi-asserted-by":"crossref","unstructured":"Siegert I, B\u00f6ck R, Wendemuth A. Inter-rater reliability for emotion annotation in human\u2013computer interaction\u2014comparison and methodological improvements. J Multimodal User Interfaces. 2013;8(1):17\u201328.","DOI":"10.1007\/s12193-013-0129-9"},{"key":"9296_CR91","doi-asserted-by":"crossref","unstructured":"Siegert I, B\u00f6ck R, Philippou-H\u00fcbner D, Vlasenko B, Wendemuth A. Appropriate emotional labeling of non-acted speech using basic emotions, Geneva emotion wheel and self assessment manikins. In: Proceedings of ICME; 2011.","DOI":"10.1109\/ICME.2011.6011929"},{"key":"9296_CR92","unstructured":"Siegert I, B\u00f6ck R, Wendemuth A. The influence of context knowledge for multimodal annotation on natural material. In: Joint Proceedings of the IVA 2012 workshops. Otto von Guericke University Magdeburg; 2012. p. 25\u201332."},{"key":"9296_CR93","doi-asserted-by":"crossref","unstructured":"Siegert I, Glodek M, Panning A, Krell G, Schwenker F, Al-Hamadi A, Wendemuth A. Using speaker group-dependent modelling to improve fusion of fragmentary classifier decisions. In: IEEE international conference on cybernetics (CYBCONF); 2013. p. 132\u2013137.","DOI":"10.1109\/CYBConf.2013.6617458"},{"key":"9296_CR94","doi-asserted-by":"crossref","unstructured":"Siegert I, Hartmann K, B\u00f6ck R, Wendemuth A. Speaker group-dependent modelling for affect recognition from speech. In: ERM4HCI 2013: The 1st workshop on emotion representation and modelling in human\u2013computer-interaction-systems; 2013.","DOI":"10.1145\/2522848.2535891"},{"key":"9296_CR95","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"525","DOI":"10.1007\/978-3-540-87391-4_67","volume-title":"Text, speech and dialogue","author":"S Steidl","year":"2008","unstructured":"Steidl S, Batliner A, N\u00f6th E, Hornegger J. Quantification of segmentation and f0 errors and their effect on emotion recognition. In: Sojka P, Hor\u00e1k A, Kope\u010dek I, Pala K, editors. Text, speech and dialogue, vol. 5246., Lecture Notes in Computer ScienceBerlin: Springer; 2008. p. 525\u201334."},{"key":"9296_CR96","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"456","DOI":"10.1007\/978-3-642-23851-2_47","volume-title":"Knowledge-based and intelligent information and engineering systems","author":"M Suzuki","year":"2011","unstructured":"Suzuki M, Tsuchiya S, Ren F. A novel emotion recognizer from speech using both prosodic and linguistic features. In: K\u00f6nig A, Dengel A, Hinkelmann K, Kise K, Howlett R, Jain L, editors. Knowledge-based and intelligent information and engineering systems, vol. 6881., Lecture Notes in Computer ScienceBerlin: Springer; 2011. p. 456\u201365."},{"key":"9296_CR97","unstructured":"Takahashi K. Remarks on emotion recognition from biopotential signals. In: 2nd International conference on autonomous robots and agents; 2004. p. 186\u2013191."},{"key":"9296_CR98","unstructured":"Tan L, Karnjanadecha M. Pitch detection algorithm: autocorrelation method and AMDF. In: Proceedings of the 3rd international symposium on communications and information technology; 2003. pp. 551\u2013556."},{"issue":"9","key":"9296_CR99","doi-asserted-by":"crossref","first-page":"1049","DOI":"10.1016\/j.specom.2012.04.006","volume":"54","author":"KP Truong","year":"2012","unstructured":"Truong KP, van Leeuwen DA, de Jong FM. speech-based recognition of self-reported and observed emotion in a dimensional space. Speech Commun. 2012;54(9):1049\u201363.","journal-title":"Speech Commun"},{"key":"9296_CR100","doi-asserted-by":"crossref","unstructured":"Truong KP, Neerincx MA, van Leeuwen DA. Assessing agreement of observer- and self-annotations in spontaneous multimodal emotion data. In: Proceedings of interspeech; 2008. p. 318\u2013321.","DOI":"10.21437\/Interspeech.2008-95"},{"key":"9296_CR101","unstructured":"Vaughan B, Kosidis S, Cullen C, Wang Y. Task-based mood induction procedures for the elicitation of natural emotional responses. In: The 4th international conference on cybernetics and information technologies, systems and applications. Orlando, Florida; 2007."},{"key":"9296_CR102","doi-asserted-by":"crossref","unstructured":"Vergin R, Farhat A, O\u2019Shaughnessy D. Robust gender-dependent acoustic\u2013phonetic modelling in continuous speech recognition based on a new automatic male\/female classification. In: 4th International conference on spoken language processing; 1996. p. 1081\u20131084.","DOI":"10.21437\/ICSLP.1996-284"},{"issue":"2","key":"9296_CR103","doi-asserted-by":"crossref","first-page":"260","DOI":"10.1109\/TIT.1967.1054010","volume":"13","author":"A Viterbi","year":"1967","unstructured":"Viterbi A. Error bounds for convolutional codes and an asymptotically optimum decoding algorithm. IEEE Trans Inf Theory. 1967;13(2):260\u20139.","journal-title":"IEEE Trans Inf Theory"},{"key":"9296_CR104","unstructured":"Vogt T, Andr\u00e9 E. Improving automatic emotion recognition from speech via gender differentiation. In: Proceedings of the 5th LREC; 2006."},{"key":"9296_CR105","doi-asserted-by":"crossref","unstructured":"Walter S, Scherer S, Schels M, Glodek M, Hrabal D, Schmidt M, B\u00f6ck R, Limbrecht K, Traue H, Schwenker F. Multimodal emotion classification in naturalistic user behavior. In: Human\u2013computer interaction, towards mobile and intelligent interaction environments, LNCS, vol. 6763. Berlin, Heidelberg: Springer; 2011. p. 603\u201311.","DOI":"10.1007\/978-3-642-21616-9_68"},{"key":"9296_CR106","doi-asserted-by":"crossref","unstructured":"Wegmann S, McAllaster D, Orloff J, Peskin B. Speaker normalization on conversational telephone speech. In: Proceedings of IEEE ICASSP\u201996, vol. 1; 1996. p. 339\u2013341.","DOI":"10.1109\/ICASSP.1996.541101"},{"key":"9296_CR107","doi-asserted-by":"crossref","unstructured":"Wendemuth A, Biundo S. A companion technology for cognitive technical systems. In: Cognitive behavioural systems, LNCS, vol. 7403. Berlin, Heidelberg: Springer; 2012. p. 89\u2013103.","DOI":"10.1007\/978-3-642-34584-5_7"},{"key":"9296_CR108","unstructured":"Wong E, Sridharan S. Utilise vocal tract length normalisation for robust automatic language identification. In: Proceedings of the 9th Australian international conference on speech science and technology. Melbourne, Victoria, Australia; 2002."},{"key":"9296_CR109","volume-title":"Vorlesungen \u00fcber die Menschen- und Tierseele","author":"W Wundt","year":"1906","unstructured":"Wundt W. Vorlesungen \u00fcber die Menschen- und Tierseele. 4th ed. Leipzig: L. Voss; 1906.","edition":"4"},{"key":"9296_CR110","unstructured":"Young S, Evermann G, Gales M, Hain T, Kershaw D, Liu X, Moore G, Odell J, Ollason D, Povey D, Valtchev V, Woodland P. The HTK book (for HTK Version 3.4). Cambridge: Cambridge University Press; 2006."},{"key":"9296_CR111","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1109\/TPAMI.2008.52","volume":"31","author":"Z Zeng","year":"2009","unstructured":"Zeng Z, Pantic M, Roisman GI, Huang TS. A survey of affect recognition methods: audio, visual, and spontaneous expressions. IEEE Trans Pattern Anal Mach Intell. 2009;31:39\u201358.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"9296_CR112","doi-asserted-by":"crossref","first-page":"570","DOI":"10.1109\/TMM.2008.921737","volume":"10","author":"Z Zeng","year":"2008","unstructured":"Zeng Z, Tu J, Pianfetti BM, Huang TS. Audio\u2013visual affective expression recognition through multistream fused HMM. Trans Multimed. 2008;10(4):570\u20137.","journal-title":"Trans Multimed"},{"key":"9296_CR113","doi-asserted-by":"crossref","unstructured":"Zhan P, Waibel A. Vocal tract length normalization for large vocabulary continuous speech recognition. Technical report, CMU-CS-97-148. Carnegie Mellon University; 1997.","DOI":"10.21236\/ADA333514"},{"key":"9296_CR114","first-page":"46","volume-title":"Multimedia and signal processing, communications in computer and information science","author":"S Zhang","year":"2012","unstructured":"Zhang S, Li L, Zhao Z. Audio\u2013visual emotion recognition based on facial expression and affective speech. In: Wang F, Lei J, Lau R, Zhang J, editors. Multimedia and signal processing, communications in computer and information science, vol. 346. Berlin: Springer; 2012. p. 46\u201352."}],"container-title":["Cognitive Computation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-014-9296-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12559-014-9296-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12559-014-9296-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T09:24:05Z","timestamp":1746350645000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12559-014-9296-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,8,23]]},"references-count":114,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2014,12]]}},"alternative-id":["9296"],"URL":"https:\/\/doi.org\/10.1007\/s12559-014-9296-6","relation":{},"ISSN":["1866-9956","1866-9964"],"issn-type":[{"value":"1866-9956","type":"print"},{"value":"1866-9964","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,8,23]]}}}