{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T12:51:22Z","timestamp":1780059082985,"version":"3.54.0"},"reference-count":83,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2016,1]]},"DOI":"10.1109\/taslp.2015.2487051","type":"journal-article","created":{"date-parts":[[2015,10,5]],"date-time":"2015-10-05T18:45:38Z","timestamp":1444070738000},"page":"16-28","source":"Crossref","is-referenced-by-count":80,"title":["Towards a Small Set of Robust Acoustic Features for Emotion Recognition: Challenges"],"prefix":"10.1109","volume":"24","author":[{"given":"Marie","family":"Tahon","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Laurence","family":"Devillers","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref73","first-page":"2830","article-title":"The Interspeech 2010paralinguistic challenge","author":"schuller","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref72","doi-asserted-by":"crossref","first-page":"344","DOI":"10.21437\/Interspeech.2009-111","article-title":"Cepstral and long-term features for emotion recognition","author":"dumouchel","year":"2009","journal-title":"Proc INTERSPEECH"},{"key":"ref71","author":"rabiner","year":"1993","journal-title":"Fundamentals of speech recognition"},{"key":"ref70","author":"xiao","year":"2007","journal-title":"?A dimensional emotion model driven multi-stage classification of emotional speech ?"},{"key":"ref76","first-page":"681","article-title":"Articulation degree as a prosodic dimension of expressive speech","author":"beller","year":"2008","journal-title":"Speech Prosody"},{"key":"ref77","first-page":"1022","article-title":"Muli-interval discretization of continuous-valued attributes for classification learning","author":"fayyad","year":"1993","journal-title":"Proc Int Joint Conf Artif Intell"},{"key":"ref74","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-474","article-title":"Vowels formants analysis allows straightforward detection of high arousal acted and spontaneous emotions","author":"vlasenko","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref39","article-title":"Cinemo - a French spoken language resource for complex emotions: Facts and baselines","author":"schuller","year":"2010","journal-title":"Proc LREC"},{"key":"ref75","first-page":"889","article-title":"Emotion recognition based on phoneme classes","author":"lee","year":"2004","journal-title":"Proc ICSLP"},{"key":"ref38","first-page":"449","article-title":"Features importance analysis for emotional speech classification","volume":"13784 2005","author":"tao","year":"2005","journal-title":"ACII"},{"key":"ref78","first-page":"412","article-title":"A comparative study on feature selection in text categorization","author":"yang","year":"1997","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1979.11327"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2010-365","article-title":"A quick sequential forward floating feature selection algorithm for emotion detection from speech","author":"brendel","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15760-8_43"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.21437\/SpeechProsody.2010-123","article-title":"Approaching multi-lingual emotion recognition from speech - on language dependency of acoustic\/prosodic features for anger recognition","author":"polzehl","year":"2010","journal-title":"Speech Prosody"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1109\/TSA.2004.838534","article-title":"Toward detecting emotions in spoken dialogs","volume":"13","author":"lee","year":"2005","journal-title":"IEEE Trans Speech Audio Process"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2008.09.003"},{"key":"ref36","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2010-285","article-title":"Acoustic feature analysis in speech emotion primitives estimation","author":"wu","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-9868.2007.00627.x"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2008.10.005"},{"key":"ref60","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/S0167-6393(00)00067-4","article-title":"Transcriber: Development and use of a tool assisting speech corpora production","volume":"33","author":"barras","year":"2000","journal-title":"Speech Commun"},{"key":"ref62","doi-asserted-by":"crossref","DOI":"10.21437\/SpeechProsody.2012-173","article-title":"Usual voice quality features for emotionnal valence detection","author":"tahon","year":"2012","journal-title":"Speech Prosody"},{"key":"ref61","first-page":"161","author":"scherer","year":"1994","journal-title":"Affect Bursts"},{"key":"ref63","article-title":"Building a system for emotions detection from speech to control an affective avatar","author":"brendel","year":"2010","journal-title":"Proc LREC"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2010.03.003"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2008.03.012"},{"key":"ref27","first-page":"240","article-title":"Ceices: Combining efforts for improving automatic classification of emotional user states: A ? forced co-operation ? initiative","author":"batliner","year":"2006","journal-title":"Proc Lang Technol Conf"},{"key":"ref65","article-title":"A large set of audio features for sound description (similarity and classification) in the CUIDADO project","author":"peeters","year":"2004","journal-title":"Proc IRCAM"},{"key":"ref66","first-page":"97","article-title":"Accurate short-term analysis of the fundamental frequency and the harmonics-to-noise ratio of a sampled sound","volume":"17","author":"boersma","year":"1993","journal-title":"Inst Phonetic Sci Univ of Amsterdam Report"},{"key":"ref29","article-title":"The relevance of feature type for the automatic classification of emotional user states: Low level descriptors and functionals","author":"schuller","year":"2007","journal-title":"Proc INTERSPEECH"},{"key":"ref67","article-title":"Efficient spectral envelope estimation and its application to pitch shifting and envelope preservation","author":"r\ufffdbel","year":"2005","journal-title":"Proc Digital Audio Effects (DAFx)"},{"key":"ref68","article-title":"Hilbert-Huang transform for non-linear characterization of speech rhythm","author":"ringeval","year":"2009","journal-title":"Proc Workshop Non Linear Speech Process (ISCA)"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.3813\/AAA.918307"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2006-275","article-title":"Real-life emotions detection with lexical and paralinguistic cues on human-human call center dialogs","author":"devillers","year":"2006","journal-title":"Proc INTERSPEECH"},{"key":"ref1","author":"devillers","year":"2010","journal-title":"Automatic Detection of Emotion from Vocal Expression"},{"key":"ref20","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-781","article-title":"Real-life emotion detection from speech in human-robot interaction: Experiments across diverse corpora with child and adult voices","author":"tahon","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref22","article-title":"Affective links in a child-robot interaction","author":"delaborde","year":"2010","journal-title":"Proc LREC"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1655260.1655265"},{"key":"ref24","author":"ekman","year":"1999","journal-title":"Handbook of Cognition and Emotion"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1155\/2010\/782802"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2012.02.005"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2013.26"},{"key":"ref51","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-89","article-title":"Personality traits detection using a parallelized modified SFFS algorithm","author":"chastagnol","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref59","article-title":"Coding emotional events in audiovisual corpora","author":"devillers","year":"2008","journal-title":"Proc LREC"},{"key":"ref58","article-title":"Impact of the social behaviors of the robot on the user?s emotions: Importance of the task and the subject?s age","author":"delaborde","year":"2012","journal-title":"Proc Workshop Affect Compagnons Artif Interact"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/1877826.1877846"},{"key":"ref56","article-title":"A corpus for identification of speakers and their emotions","author":"tahon","year":"2010","journal-title":"Proc LREC"},{"key":"ref55","first-page":"835","article-title":"An analytic distance metric for Gaussian mixture models with application in image retrieval","volume":"ii","author":"sfikas","year":"2005","journal-title":"Proc 15th Int Conf Artif Neural Netw Formal Models And Their Applicat (ICANN?05)"},{"key":"ref54","article-title":"Phone clustering using the Battacharyya distance","volume":"4","author":"mak","year":"1996","journal-title":"Proc ICSLP"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1121\/1.3647301"},{"key":"ref52","doi-asserted-by":"crossref","DOI":"10.21437\/SpeechProsody.2010-134","article-title":"Acoustic measures characterizing anger across corpora collected in artificial or natural context","author":"tahon","year":"2010","journal-title":"Speech Prosody"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"1695","DOI":"10.21437\/Eurospeech.1997-482","article-title":"Design, recording and verification of a Danish emotional speech database","author":"engberg","year":"1997","journal-title":"Proc EUROSPEECH"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"1517","DOI":"10.21437\/Interspeech.2005-446","article-title":"A database of German emotional speech","author":"burkhardt","year":"2005","journal-title":"Proc INTERSPEECH"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2010.8"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2011.01.011"},{"key":"ref13","first-page":"17","article-title":"Laryngealizations and emotions: How many babushkas?","author":"batliner","year":"2007","journal-title":"Proc Int Workshop Paralinguistic Speech - Between Models and Data (ParaLing? 07)"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.03.007"},{"key":"ref15","first-page":"71","author":"batliner","year":"2011","journal-title":"Automatic emotion recognition from speech"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1007\/s12369-015-0297-8"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2009.5349500"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-83476-9_37"},{"key":"ref17","first-page":"171","article-title":"<formula formulatype=\"inline\"><tex Notation=\"TeX\">$\\ll$<\/tex> <\/formula> you stupid tin box <formula formulatype=\"inline\"><tex Notation=\"TeX\">$\\gg$<\/tex><\/formula>- children interacting with the Aibo robot: A cross-linguistic emotional speech corpus","author":"batliner","year":"2004","journal-title":"Proc LREC"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.20"},{"key":"ref83","author":"young","year":"2006","journal-title":"The HTK Book Version 3 4"},{"key":"ref19","article-title":"Speech &#38; multimodal resources: The HERME database of spontaneous multimodal human-robot dialogues","author":"han","year":"2012","journal-title":"Proc LREC"},{"key":"ref80","article-title":"Comparative study of speaker personality traits recognition in conversational and broadcast news speech","author":"alam","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2005.09.024"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2009.12.003"},{"key":"ref6","first-page":"695","volume":"44","author":"scherer","year":"2005","journal-title":"What Are Emotions? and How Can They Be Measured?"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(02)00084-5"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"488","DOI":"10.1007\/978-3-540-74889-2_43","article-title":"The HUMAINE database: Addressing the collection and annotation of naturalistic and in induced emotional data","volume":"4638","author":"douglas-cowie","year":"2007","journal-title":"Lecture Notes in Comput Sci Affect Comput Intell Interact"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/0092-6566(77)90037-X"},{"key":"ref49","doi-asserted-by":"crossref","first-page":"2350","DOI":"10.21437\/Interspeech.2010-643","article-title":"Real-life emotion-related states detection in call centers: A cross-corpora study","author":"devillers","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15184-2_14"},{"key":"ref46","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2009-103","article-title":"The Interspeech 2009emotion challenge","author":"schuller","year":"2009","journal-title":"Proc INTERSPEECH"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"1628","DOI":"10.21437\/Interspeech.2012-447","article-title":"A preliminary study on cross-databases emotion recognition using the glottal features in speech","author":"sun","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref48","article-title":"An acoustic study of emotions expressed in speech","author":"yildirim","year":"2004","journal-title":"Proc ICSLP"},{"key":"ref47","article-title":"Selecting training data for cross-corpus speech emotion recognition: Prototypicality vs. generalization","author":"schuller","year":"2011","journal-title":"Proc AVIOS Speech Process"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163986"},{"key":"ref41","first-page":"77","article-title":"Cross-corpus classification of realistic emotions: Some pilot experiments","author":"eyben","year":"2010","journal-title":"Proc LREC Workshop EMOTION Corpora for Res Emot and Affect"},{"key":"ref44","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-468","article-title":"Using multiple databases for training emotion recognition: To unite or to vote?","author":"schuller","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/SocialCom-PASSAT.2012.97"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/7322312\/7289391.pdf?arnumber=7289391","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T22:37:09Z","timestamp":1748644629000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7289391\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,1]]},"references-count":83,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2015.2487051","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,1]]}}}