{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T12:57:06Z","timestamp":1770814626153,"version":"3.50.1"},"publisher-location":"London","reference-count":49,"publisher":"Springer London","isbn-type":[{"value":"9781848003057","type":"print"},{"value":"9781848003064","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-1-84800-306-4_6","type":"book-chapter","created":{"date-parts":[[2008,12,1]],"date-time":"2008-12-01T22:01:16Z","timestamp":1228168876000},"page":"93-110","source":"Crossref","is-referenced-by-count":14,"title":["Emotion Perception and Recognition from Speech"],"prefix":"10.1007","author":[{"given":"Chung-Hsien","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jui-Feng","family":"Yeh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ze-Jing","family":"Chuang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"6_CR1_6","first-page":"181","volume-title":"IEEE International Symposium on Circuits and Systems","author":"M. W. Bhatti","year":"2004","unstructured":"Bhatti, M. W., Wang, Y., & Guan, L. (2004). A neural network approach for human emotion recognition in speech. IEEE International Symposium on Circuits and Systems, Vancouver, Canada (pp. 181\u2013184)."},{"key":"6_CR2_6","first-page":"3","volume-title":"The Neuropsychology of Emotion","author":"J. C. Borod","year":"2000","unstructured":"Borod, J. C., & Madigan, N. K. (2000). Neuropsychology of emotion and emotional disorders: An overview and research directions. In J. C. Borod (Ed.), The Neuropsychology of Emotion (pp. 3\u201328). New York: Oxford University Press."},{"key":"6_CR3_6","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1023\/A:1013215010749","volume":"12","author":"C. Breazeal","year":"2002","unstructured":"Breazeal, C., & Aryananda, L. (2002). Recognition of affective communicative intent in robot-directed speech. Autonomic Robots, 12, 83\u2013104.","journal-title":"Autonomic Robots"},{"key":"6_CR4_6","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L. Breiman","year":"2001","unstructured":"Breiman, L. (2001). Random forests. Machine Learning, 45, 5\u201332.","journal-title":"Machine Learning"},{"issue":"2","key":"6_CR5_6","first-page":"1","volume":"9","author":"Z. J. Chuang","year":"2004","unstructured":"Chuang, Z. J., & Wu, C. H. (2004). Multi-modal emotion recognition from speech and text. International Journal of Computational Linguistics and Chinese Language Processing, 9(2), 1\u201318","journal-title":"International Journal of Computational Linguistics and Chinese Language Processing"},{"key":"6_CR6_6","doi-asserted-by":"crossref","first-page":"477","DOI":"10.21437\/Interspeech.2005-320","volume-title":"Interspeech 2005","author":"J. Cichosz","year":"2005","unstructured":"Cichosz, J., & Slot, K. (2005). Low-dimensional feature space derivation for emotion recognition. In Interspeech 2005, Lisbon, Portugal (pp. 477\u2013480)."},{"key":"6_CR7_6","unstructured":"Cichosz, J., & Slot, K. (2007). Emotion recognition in speech signal using emotion-extracting binary decision trees. ACII2007. http:\/\/www.di.uniba.it\/intint\/DC-ACII07\/Chicosz.pdf"},{"key":"6_CR8_6","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/TIT.1967.1053964","volume":"13","author":"T. T. Cover","year":"1967","unstructured":"Cover, T. T., & Hart, P. E. (1967). Nearest neighbour pattern classification. IEEE Transactions on Information Theory, 13, 21\u201327.","journal-title":"IEEE Transactions on Information Theory"},{"key":"6_CR9_6","volume-title":"Descartes' error: Emotion, reason and the human brain","author":"A. Damasio","year":"1994","unstructured":"Damasio, A. (1994). Descartes' error: Emotion, reason and the human brain. New York: Grosset\/Putnam."},{"issue":"6","key":"6_CR10_6","doi-asserted-by":"publisher","first-page":"568","DOI":"10.1109\/TSA.2003.818076","volume":"11","author":"L. Deng","year":"2003","unstructured":"Deng, L., Droppo, J., & Acero, A. (2003). Recursive estimation of nonstationary noise using iterative stochastic approximation for robust speech recognition. IEEE Transactions on Speech and Au-dio, 11(6), 568\u2013580.","journal-title":"IEEE Transactions on Speech and Au-dio"},{"key":"6_CR11_6","doi-asserted-by":"publisher","first-page":"407","DOI":"10.1016\/j.neunet.2005.03.007","volume":"18","author":"L. Devillers","year":"2005","unstructured":"Devillers, L., Vidrascu, L., & Lamel, L. (2005). Challenges in real-life emotion annotation and machine learning based detection. Neural Networks, 18, 407\u2013422.","journal-title":"Neural Networks"},{"key":"6_CR12_6","doi-asserted-by":"crossref","unstructured":"D'Mello, S. Picard, R. W. & Graesser, A. (2007). Towards an affect-sensitive autotutor. IEEE Intelligent Systems, Special issue on intelligent educational systems, 53\u201361.","DOI":"10.1109\/MIS.2007.79"},{"key":"6_CR13_6","volume-title":"Documentation of the Danish emotional speech database (DES). Internal AAU Report","author":"I. S. Engberg","year":"1996","unstructured":"Engberg, I. S., & Hansen, A. V. (1996). Documentation of the Danish emotional speech database (DES). Internal AAU Report, Center for Person Kommunikation, Denmark."},{"key":"6_CR14_6","doi-asserted-by":"crossref","first-page":"473","DOI":"10.21437\/Interspeech.2005-319","volume-title":"Interspeech 2005","author":"R. Fernandez","year":"2005","unstructured":"Fernandez, R., & Picard, R. W. (2005). Classical and novel discriminant features for affect recognition from speech. In Interspeech 2005, Lisbon, Portugal (pp. 473\u2013476)."},{"issue":"4","key":"6_CR15_6","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1016\/j.neunet.2005.03.006","volume":"18","author":"N. Fragopanagos","year":"2005","unstructured":"Fragopanagos, N. & Taylor, J. G. (2005). Emotion recognition in human-computer interaction. Neural Networks, 18(4), 389\u2013405.","journal-title":"Neural Networks"},{"key":"6_CR16_6","unstructured":"Huber, R., Noth, E., Batliner, A., Buckow, J., Warnke, V., & Niemann, H. (1998). You beep machine - emotion in automatic speech understanding systems. In Proceedings of the Workshop on Text, Speech, and Dialog. Masark University (pp. 223\u2013228)."},{"key":"6_CR17_6","unstructured":"Inanoglu, Z., & Caneel, R. (2005). Emotive alertHMM-based emotion detection in voicemail messages. In IEEE Intelligent User Interfaces '05, San Diego (pp. 251\u2013253)."},{"key":"6_CR18_6","first-page":"205","volume":"67","author":"G. Katz","year":"1996","unstructured":"Katz, G., Cohn, J., & Moore, C. (1996). A combination of vocal F0 dynamic and summary features discriminates between pragmatic categories of infant-directed speech. Child Development, 67, 205\u2013217.","journal-title":"Child Development"},{"key":"6_CR19_6","doi-asserted-by":"crossref","unstructured":"Kwon O., Chan K., Hao J., & Lee T. (2003). Emotion recognition by speech signals. In Proceedings of Eurospeech 2003, Geneva (pp. 125\u2013128).","DOI":"10.21437\/Eurospeech.2003-80"},{"key":"6_CR20_6","doi-asserted-by":"crossref","first-page":"1825","DOI":"10.21437\/Interspeech.2007-509","volume-title":"Proceedings of Interspeech 2007","author":"C.-H. Lee","year":"2007","unstructured":"Lee, C.-H., Clements, M., Dusan, S., Fosler-Lussier, E., Johnson, K., Juang, B.-H., & Rabiner, L. (2007). An overview on automatic speech attribute transcription (ASAT). In Proceedings of Interspeech 2007, August 27\u201331, Antwerp, Belgium (pp. 1825\u20131828)."},{"issue":"2","key":"6_CR21_6","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TSA.2004.838534","volume":"13","author":"C. M. Lee","year":"2005","unstructured":"Lee, C. M., & Narayanan, S. S. (2005). Toward detecting emotions in spoken dialogs. IEEE Transactions on Speech and Audio Processing, 13(2), 293\u2013303.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"6_CR22_6","volume-title":"Prosody in Speech Recognition and Understanding","author":"M. Levity","year":"2001","unstructured":"Levity, M., Huberz, R., Batlinery, A., & Noeth, E. (2001). Use of prosodic speech characteristics for automated detection of alcohol intoxication. In Prosody in Speech Recognition and Understanding, Molly Pitcher Inn, Red Bank, NJ."},{"key":"6_CR23_6","doi-asserted-by":"crossref","first-page":"1845","DOI":"10.21437\/Interspeech.2005-583","volume-title":"Proceedings of Interspeech","author":"J. Liscombe","year":"2005","unstructured":"Liscombe, J., Riccardi, G., & Hakkani-Tr, D. (2005). Using context to improve emotion detection in spoken dialogue systems. In Proceedings of Interspeech, Lisbon, Portugal (pp. 1845\u20131848)."},{"key":"6_CR24_6","doi-asserted-by":"crossref","unstructured":"Litman D., & Silliman, S. (2004). Itspoke. An intelligent tutoring spoken dialogue system. In Proceedings of the 4th Meeting of HLT\/NAACL (Companion Proceedings), Boston, May (pp. 233\u2013236).","DOI":"10.3115\/1614025.1614027"},{"key":"6_CR25_6","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.specom.2006.11.004","volume":"49","author":"D. Morrison","year":"2007","unstructured":"Morrison, D., Wang, R., & De Silva, L. C. (2007). Ensemble methods for spoken emotion recognition in call-centres. Speech Communication, 49, 98\u2013112.","journal-title":"Speech Communication"},{"issue":"2","key":"6_CR26_6","doi-asserted-by":"publisher","first-page":"1097","DOI":"10.1121\/1.405558","volume":"93","author":"I. R. Murray","year":"1993","unstructured":"Murray, I. R., & Arnott, J. L. (1993). Towards the simulation of emotion in synthetic speech: A review of the literature on human vocal emotion. Journal of the Acoustic Society of America, 93(2), 1097\u20131108.","journal-title":"Journal of the Acoustic Society of America"},{"key":"6_CR27_6","doi-asserted-by":"crossref","unstructured":"Nakatsu, R., Solomides, A., & Tosa, N. (1999). Emotion recognition and its application to computer agents with spontaneous interactive capabilities. In Proceedings of the IEEE International Conference on Multimedia Computing and Systems (ICMCS '99) (vol. 2; pp. 804\u2013808).","DOI":"10.1109\/MMCS.1999.778589"},{"issue":"4","key":"6_CR28_6","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1016\/S0167-6393(03)00099-2","volume":"41","author":"T. Nwe","year":"2003","unstructured":"Nwe, T., Foo, S., & De Silva, L. (2003). Speech emotion recognition using hidden Markov models. Speech Communications, 41(4), 603\u2013623.","journal-title":"Speech Communications"},{"key":"6_CR29_6","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1037\/0033-295X.97.3.315","volume":"97","author":"A. Ortony","year":"1990","unstructured":"Ortony, A., & Turner, T. J. (1990). What's basic about basic emotions? Psychological Review, 97, 315\u2013331.","journal-title":"Psychological Review"},{"key":"6_CR30_6","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1016\/S1071-5819(02)00141-6","volume":"59","author":"P. Oudeyer","year":"2003","unstructured":"Oudeyer, P. (2003). The production and recognition of emotions in speech: features and algorithms. International Journal of Human\u2014Computer Studies, 59, 157\u2013183.","journal-title":"International Journal of Human\u2014Computer Studies"},{"key":"6_CR31_6","unstructured":"Paeschke, A., & Sendlmeier, W. (2000). Prosodic characteristics of emotional speech: Measurements of fundamental frequency movements. In Proceedings of ISCA ITRW on Speech and Emotion, Belfast (pp. 75\u201380)."},{"issue":"9","key":"6_CR32_6","doi-asserted-by":"publisher","first-page":"1370","DOI":"10.1109\/JPROC.2003.817122","volume":"91","author":"M. Pantic","year":"2003","unstructured":"Pantic, M., & Rothkrantz, L. J. K. (2003). Toward an affect-sensitive multimodal human\u2014computer interaction. Proceedings of the IEEE, 91(9), 1370\u20131390","journal-title":"Proceedings of the IEEE"},{"key":"6_CR33_6","volume-title":"Emotions in social psychology","author":"W. Parrott","year":"2001","unstructured":"Parrott, W. (2001). Emotions in social psychology, Philadelphia: Psychology Press."},{"key":"6_CR34_6","doi-asserted-by":"crossref","unstructured":"Petrushin, V. (2000). Emotion recognition in speech signal: Experimental study, development, and application. In Proceedings of the Sixth International Conference on Spoken Language Processing (ICSLP 2000), Beijing (pp. 222\u2013225).","DOI":"10.21437\/ICSLP.2000-791"},{"key":"6_CR35_6","unstructured":"Petrushin, V. A. (1999). Emotion in speech recognition and application to call centers. In Proceedings of Artificial Neural Networks In Engineering (ANNIE 99) (pp. 7\u201310)."},{"issue":"10","key":"6_CR36_6","doi-asserted-by":"publisher","first-page":"1175","DOI":"10.1109\/34.954607","volume":"23","author":"R. W. Picard","year":"2001","unstructured":"Picard, R. W., Vyzas, E., & Healey, J. (2001). Toward machine emotional intelligence: analysis of affective physiological state. IEEE Transactions on Pattern Analysis and Machine Intelligence, 23(10), 1175\u20131191","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"6_CR37_6","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1016\/S1071-5819(02)00141-6","volume":"59","author":"O. Pierre-Yves","year":"2003","unstructured":"Pierre-Yves, O. (2003). The production and recognition of emotions in speech: Features and algorithms. International Journal of Human-Computer Studies, 59, 157\u2013183","journal-title":"International Journal of Human-Computer Studies"},{"key":"6_CR38_6","doi-asserted-by":"crossref","unstructured":"Rahurkar, M. A., & Hansen, J. H. L.(2003). Frequency distribution based weighted sub-band approach for classification of emotional\/stressful content in speech. In Eighth European Conference on Speech Communication and Technology, Geneva (pp. 721\u2013724).","DOI":"10.21437\/Eurospeech.2003-305"},{"key":"6_CR39_6","volume-title":"The media equation: How people treat computers, television, and new media like real people and places","author":"B. Reeves","year":"1996","unstructured":"Reeves, B., & Nass, C. (1996). The media equation: How people treat computers, television, and new media like real people and places. University of Chicago Press, Chicago."},{"key":"6_CR40_6","doi-asserted-by":"publisher","first-page":"637","DOI":"10.1002\/0470013494.ch30","volume-title":"Handbook of cognition and emotion","author":"K. R. Scherer","year":"1999","unstructured":"Scherer, K. R. (1999). Appraisal theory. In T. Dalgleish, & M. Power (Eds.), Handbook of cognition and emotion (pp. 637\u2013663). New York: John Wiley."},{"key":"6_CR41_6","doi-asserted-by":"crossref","unstructured":"Shami, M., & Kamel, M. (2005). Segment-based approach to the recognition of emotions in speech. In IEEE Conference on Multimedia and Expo (ICME05), Amsterdam, The Netherlands. http:\/\/ieeexplore.ieee.org\/iel5\/10203\/32544\/01521436.pdf?tp=&isnumber= &arnumber=1521436","DOI":"10.1109\/ICME.2005.1521436"},{"key":"6_CR42_6","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1016\/j.specom.2007.01.006","volume":"49","author":"M. Shami","year":"2007","unstructured":"Shami, M., & Verhelst, W. (2007). An evaluation of the robustness of existing supervised machine learning approaches to the classification of emotions in speech. Speech Communication 49, 201\u2013212.","journal-title":"Speech Communication"},{"key":"6_CR43_6","volume-title":"Eurospeech 2005","author":"E. Shriberg","year":"2005","unstructured":"Shriberg, E. (2005). Spontaneous speech: How people really talk and why engineers should care. In Eurospeech 2005, Lisbon, Portugal."},{"key":"6_CR44_6","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1016\/S0167-6393(02)00049-3","volume":"39","author":"M. Slaney","year":"2003","unstructured":"Slaney, M., & McRoberts, G. (2003). A recognition system for affective vocalization. Speech Communication 39, 367\u2013384.","journal-title":"Speech Communication"},{"issue":"1\u20132","key":"6_CR45_6","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/S0167-6393(02)00083-3","volume":"40","author":"L. Ten Bosch","year":"2003","unstructured":"Ten Bosch, L. (2003). Emotions, speech and the ASR framework. Speech Communication 40(1\u20132), 213\u2013225.","journal-title":"Speech Communication"},{"key":"6_CR46_6","volume-title":"The nature of statistical learning theory","author":"V. Vapnik","year":"2005","unstructured":"Vapnik, V. (2005). The nature of statistical learning theory. New York: Springer-Verlag."},{"key":"6_CR47_6","unstructured":"Ververidis, D., Kotropoulos, C., & Pitas, I. (2005). Automatic emotional speech classification. In IEEE International Conference on Acoustics, Speech, and Signal Processing, Montreal (pp. 593\u2013596)."},{"key":"6_CR48_6","doi-asserted-by":"crossref","unstructured":"Wu, J., & Huo, Q. (2002). An environment compensated minimum classification error training approach and its evaluation on aurora2 database. In Seventh International Conference on Spoken Language, Denver (pp. 453\u2013456).","DOI":"10.21437\/ICSLP.2002-17"},{"key":"6_CR49_6","unstructured":"Yacoub, S., Simske, S., Lin, X., & Burns, J. (2003). Recognition of emotion in interactive voice systems. In: Proceedings of Eurospeech 2003, Eighth European Conference on Speech Communication and Technology, Geneva (pp. 729\u2013732)."}],"container-title":["Affective Information Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-1-84800-306-4_6.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,21]],"date-time":"2023-05-21T21:24:20Z","timestamp":1684704260000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-1-84800-306-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9781848003057","9781848003064"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-1-84800-306-4_6","relation":{},"subject":[]}}