{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T21:09:18Z","timestamp":1774127358051,"version":"3.50.1"},"reference-count":74,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2012,3,20]],"date-time":"2012-03-20T00:00:00Z","timestamp":1332201600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2012,6]]},"DOI":"10.1007\/s10772-012-9139-3","type":"journal-article","created":{"date-parts":[[2012,3,19]],"date-time":"2012-03-19T10:37:14Z","timestamp":1332153434000},"page":"265-289","source":"Crossref","is-referenced-by-count":66,"title":["Emotion recognition from speech using source, system, and prosodic features"],"prefix":"10.1007","volume":"15","author":[{"given":"Shashidhar G.","family":"Koolagudi","sequence":"first","affiliation":[]},{"given":"K. Sreenivasa","family":"Rao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,3,20]]},"reference":[{"key":"9139_CR1","unstructured":"Anjani, A. V. N. S. (2000). Autoassociate neural network models for processing degraded speech. Master\u2019s thesis, Department of Computer Science and Engineering, Indian Institute of Technology Madras, Chennai 600 036, India."},{"issue":"6","key":"9139_CR2","doi-asserted-by":"crossref","first-page":"1687","DOI":"10.1121\/1.1913303","volume":"52","author":"B. S. Atal","year":"1972","unstructured":"Atal, B. S. (1972). Automatic speaker recognition based on pitch contours. The Journal of the Acoustical Society of America, 52(6), 1687\u20131697.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9139_CR3","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1109\/ICISIP.2004.1287672","volume-title":"The international conference on intelligent sensing and information processing 2004","author":"A. Bajpai","year":"2004","unstructured":"Bajpai, A., & Yegnanarayana, B. (2004). Exploring features for audio clip classification using LP residual and AANN models. In The international conference on intelligent sensing and information processing 2004 (ICISIP 2004), Chennai, India, Jan. 2004 (pp. 305\u2013310)."},{"key":"9139_CR4","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1016\/j.specom.2005.02.016","volume":"46","author":"T. Banziger","year":"2005","unstructured":"Banziger, T., & Scherer, K. R. (2005). The role of intonation in emotional expressions. Speech Communication, 46, 252\u2013267.","journal-title":"Speech Communication"},{"key":"9139_CR5","doi-asserted-by":"crossref","first-page":"1091","DOI":"10.21437\/Interspeech.2009-34","volume-title":"INTERSPEECH-09","author":"G. Bapineedu","year":"2009","unstructured":"Bapineedu, G., Avinash, B., Gangashetty, S. V., & Yegnanarayana, B. (2009). Analysis of lombard speech using excitation source information. In INTERSPEECH-09, Brighton, UK, September 6\u201310 (pp. 1091\u20131094)."},{"issue":"7\u20138","key":"9139_CR6","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1016\/j.specom.2010.02.010","volume":"52","author":"D. Bitouk","year":"2010","unstructured":"Bitouk, D., Verma, R., & Nenkova, A. (2010). Class-level spectral features for emotion recognition. Speech Communication, 52(7\u20138), 613\u2013625.","journal-title":"Speech Communication"},{"key":"9139_CR7","first-page":"151","volume-title":"ITRW on speech and emotion","author":"F. Burkhardt","year":"2000","unstructured":"Burkhardt, F., & Sendlmeier, W. F. (2000). Verification of acoustical correlates of emotional speech using formant synthesis. In ITRW on speech and emotion, Newcastle, Northern Ireland, UK, Sept. 2000 (pp. 151\u2013156)."},{"key":"9139_CR8","volume-title":"Interspeech","author":"F. Burkhardt","year":"2005","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W., & Weiss, B. (2005). A database of German emotional speech. In Interspeech."},{"key":"9139_CR9","first-page":"1","volume-title":"JAVIOS","author":"J. E. Cahn","year":"1990","unstructured":"Cahn, J. E. (1990). The generation of affect in synthesized speech. In JAVIOS, Jul. 1990 (pp. 1\u201319)."},{"key":"9139_CR10","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/S0167-6393(02)00071-7","volume":"40","author":"R. Cowie","year":"2003","unstructured":"Cowie, R., & Cornelius, R. R. (2003). Describing the emotional states that are expressed in speech. Speech Communication, 40, 5\u201332.","journal-title":"Speech Communication"},{"key":"9139_CR11","doi-asserted-by":"crossref","first-page":"88","DOI":"10.1121\/1.413664","volume":"98","author":"K. E. Cummings","year":"1995","unstructured":"Cummings, K. E., & Clements, M. A. (1995). Analysis of the glottal excitation of emotionally styled and stressed speech. The Journal of the Acoustical Society of America, 98, 88\u201398.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9139_CR12","volume-title":"ICSLP 96","author":"F. Dellaert","year":"1996","unstructured":"Dellaert, F., Polzin, T., & Waibel, A. (1996). Recognising emotions in speech. In ICSLP 96, Oct. 1996."},{"key":"9139_CR13","doi-asserted-by":"crossref","first-page":"1970","DOI":"10.1109\/ICSLP.1996.608022","volume-title":"4th international conference on spoken language processing","author":"F. Dellert","year":"1996","unstructured":"Dellert, F., Polzin, T., & Waibel, A. (1996). Recognizing emotion in speech. In 4th international conference on spoken language processing, Philadelphia, PA, USA, Oct. 1996 (pp. 1970\u20131973)."},{"key":"9139_CR14","doi-asserted-by":"crossref","first-page":"954","DOI":"10.1109\/TASL.2010.2047683","volume":"18","author":"S. Desai","year":"2010","unstructured":"Desai, S., Black, A. W., Yegnanarayana, B., & Prahallad, K. (2010). Spectral mapping using artificial neural networks for voice conversion. IEEE Transactions on Audio, Speech, and Language Processing, 18, 954\u2013964.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9139_CR15","volume-title":"Principal component neural networks: Theory and applications","author":"K. I. Diamantaras","year":"1996","unstructured":"Diamantaras, K. I., & Kung, S. Y. (1996). Principal component neural networks: Theory and applications. New York: Wiley."},{"key":"9139_CR16","unstructured":"Gupta, C. S. (2003). Significance of source features for speaker recognition. Master\u2019s thesis, Department of Computer Science and Engineering, Indian Institute of Technology Madras, Chennai 600 036, India."},{"key":"9139_CR17","volume-title":"Int. joint conf. neural networks","author":"C. S. Gupta","year":"2002","unstructured":"Gupta, C. S., Prasanna, S. R. M., & Yegnanarayana, B. (2002). Autoassociative neural network models for online speaker verification using source features from vowels. In Int. joint conf. neural networks, Honolulu, Hawaii, USA, May 2002."},{"key":"9139_CR18","first-page":"1814","volume-title":"INTERSPEECH\u2014ICSLP","author":"Y. hao\u00a0Kao","year":"2006","unstructured":"hao\u00a0Kao, Y., & shan\u00a0Lee, L. (2006). Feature analysis for emotion recognition from Mandarin speech considering the special characteristics of Chinese language. In INTERSPEECH\u2014ICSLP, Pittsburgh, Pennsylvania, Sept. 2006 (pp. 1814\u20131817)."},{"key":"9139_CR19","volume-title":"Neural networks: A comprehensive foundation","author":"S. Haykin","year":"1999","unstructured":"Haykin, S. (1999) Neural networks: A comprehensive foundation. New Delhi: Pearson Education Asia, Inc."},{"key":"9139_CR20","volume-title":"A novel source analysis method by matching spectral characters of LF model with STRAIGHT spectrum","author":"L. Z. Hua","year":"2005","unstructured":"Hua, L. Z., Yu, H., & Hua, W. R. (2005). A novel source analysis method by matching spectral characters of LF model with STRAIGHT spectrum. Berlin: Springer."},{"key":"9139_CR21","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1016\/S0167-6393(02)00081-X","volume":"40","author":"A. Iida","year":"2003","unstructured":"Iida, A., Campbell, N., Higuchi, F., & Yasumura, M. (2003). A corpus-based speech synthesis system with emotion. Speech Communication, 40, 161\u2013187.","journal-title":"Speech Communication"},{"key":"9139_CR22","first-page":"854","volume-title":"Int. joint conf. neural networks","author":"M. S. Ikbal","year":"1999","unstructured":"Ikbal, M. S., Misra, H., & Yegnanarayana, B. (1999). Analysis of autoassociative mapping neural networks. In Int. joint conf. neural networks, USA (pp. 854\u2013858)."},{"key":"9139_CR23","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1109\/ICDT.2009.30","volume-title":"Fourth international conference on digital telecommunications","author":"T. Iliou","year":"2009","unstructured":"Iliou, T., & Anagnostopoulos, C. N. (2009). Statistical evaluation of speech features for emotion recognition. In Fourth international conference on digital telecommunications, Colmar, France, July 2009 (pp. 121\u2013126)."},{"issue":"9","key":"9139_CR24","first-page":"1","volume":"9","author":"N. Kamaruddin","year":"2009","unstructured":"Kamaruddin, N., & Wahab, A. (2009). Features extraction for speech emotion. Journal of Computational Methods in Science and Engineering, 9(9), 1\u201312.","journal-title":"Journal of Computational Methods in Science and Engineering"},{"key":"9139_CR25","first-page":"1548","volume-title":"Int. joint conf. neural networks","author":"S. P. Kishore","year":"2001","unstructured":"Kishore, S. P., & Yegnanarayana, B. (2001). Online text-independent speaker verification system using autoassociative neural network models. In Int. joint conf. neural networks, Washington, USA, Aug. 2001 (Vol. 2, pp. 1548\u20131553)."},{"key":"9139_CR26","series-title":"Communications in computer and information science, LNCS","volume-title":"IITKGP-SESC: Speech database for emotion analysis","author":"S. G. Koolagudi","year":"2009","unstructured":"Koolagudi, S. G., Maity, S., Kumar, V. A., Chakrabarti, S., & Rao, K. S. (2009). IITKGP-SESC: Speech database for emotion analysis. Communications in computer and information science, LNCS. Berlin: Springer."},{"key":"9139_CR27","series-title":"LNCS","doi-asserted-by":"crossref","first-page":"537","DOI":"10.1007\/978-3-642-11164-8_87","volume-title":"The 3rd international conference on pattern recognition and machine intelligence","author":"S. G. Koolagudi","year":"2009","unstructured":"Koolagudi, S. G., & Rao, K. S. (2009). Exploring speech features for classifying emotions along valence dimension. In S. Chandhury, et al. (Eds.), LNCS. The 3rd international conference on pattern recognition and machine intelligence (PReMI-09), IIT Delhi, December 2009 (pp. 537\u2013542). Heidelberg: Springer."},{"key":"9139_CR28","doi-asserted-by":"crossref","first-page":"1591","DOI":"10.21437\/Interspeech.2009-473","volume-title":"INTERSPEECH-09","author":"K. S. Kumar","year":"2009","unstructured":"Kumar, K. S., Reddy, M. S. H., Murty, K. S. R., & Yegnanarayana, B. (2009). Analysis of laugh signals for detecting in continuous speech. In INTERSPEECH-09, Brighton, UK, September 6\u201310 (pp. 1591\u20131594)."},{"key":"9139_CR29","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1109\/TSA.2004.838534","volume":"13","author":"C. M. Lee","year":"2005","unstructured":"Lee, C. M., & Narayanan, S. S. (2005). Toward detecting emotions in spoken dialogs. IEEE Transactions on Speech and Audio Processing, 13, 293\u2013303.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9139_CR30","first-page":"313","volume-title":"European conf. speech processing and technology (EUROSPEECH)","author":"J. H. L. Liu","year":"1997","unstructured":"Liu, J. H. L., & Palm, G. (1997). On the use of features from prediction residual signal in speaker recognition. In European conf. speech processing and technology (EUROSPEECH) (pp. 313\u2013316)."},{"key":"9139_CR31","doi-asserted-by":"crossref","first-page":"493","DOI":"10.21437\/Interspeech.2005-324","volume-title":"INTERSPEECH","author":"I. Luengo","year":"2005","unstructured":"Luengo, I., Navas, E., Hernez, I., & Snchez, J. (2005). Automatic emotion recognition using prosodic parameters. In INTERSPEECH, Lisbon, Portugal, Sept. 2005 (pp. 493\u2013496)."},{"key":"9139_CR32","first-page":"IV17","volume-title":"ICASSP","author":"M. Lugger","year":"2007","unstructured":"Lugger, M., & Yang, B. (2007). The relevance of voice quality features in speaker independent emotion recognition. In ICASSP, Honolulu, Hawaii, USA, May 2007 (pp. IV17\u2013IV20). New York: IEEE."},{"key":"9139_CR33","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1109\/ICISIP.2004.1287674","volume-title":"International conference on intelligent sensing and information processing","author":"L. Mary","year":"2004","unstructured":"Mary, L., & Yegnanarayana, B. (2004). Autoassociative neural network models for language identification. In International conference on intelligent sensing and information processing, Aug. 24 2004 (pp. 317\u2013320). New York: IEEE."},{"key":"9139_CR34","volume-title":"ISCA workshop on speech and emotion","author":"S. McGilloway","year":"2000","unstructured":"McGilloway, S., Cowie, R., Douglas-Cowie, E., Gielen, S., Westerdijk, M., & Stroeve, S. (2000). Approaching automatic recognition of emotion from voice: A rough benchmark. In ISCA workshop on speech and emotion, Belfast."},{"key":"9139_CR35","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1007\/s11760-008-0097-9","volume":"4","author":"C. K. Mohan","year":"2008","unstructured":"Mohan, C. K., & Yegnanarayana, B. (2008). Classification of sport videos using edge-based features and autoassociative neural network models. Signal, Image and Video Processing, 4, 61\u201373.","journal-title":"Signal, Image and Video Processing"},{"key":"9139_CR36","volume-title":"8th international symposium on signal processing and its applications","author":"O. M. Mubarak","year":"2005","unstructured":"Mubarak, O. M., Ambikairajah, E., & Epps, J. (2005). Analysis of an MFCC-based audio indexing system for efficient coding of multimedia sources. In 8th international symposium on signal processing and its applications, Sydney, Australia, Aug. 2005."},{"key":"9139_CR37","doi-asserted-by":"crossref","first-page":"369","DOI":"10.1016\/0167-6393(95)00005-9","volume":"16","author":"I. R. Murray","year":"1995","unstructured":"Murray, I. R., & Arnott, J. L. (1995). Implementation and testing of a system for producing emotion by rule in synthetic speech. Speech Communication, 16, 369\u2013390.","journal-title":"Speech Communication"},{"key":"9139_CR38","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1016\/S0167-6393(96)00046-5","volume":"20","author":"I. R. Murray","year":"1996","unstructured":"Murray, I. R., Arnott, J. L., & Rohwer, E. A. (1996). Emotional stress in synthetic speech: Progress and future directions. Speech Communication, 20, 85\u201391.","journal-title":"Speech Communication"},{"key":"9139_CR39","doi-asserted-by":"crossref","first-page":"1602","DOI":"10.1109\/TASL.2008.2004526","volume":"16","author":"K. S. R. Murty","year":"2008","unstructured":"Murty, K. S. R., & Yegnanarayana, B. (2008). Epoch extraction from speech signals. IEEE Transactions on Audio, Speech, and Language Processing, 16, 1602\u20131613.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9139_CR40","doi-asserted-by":"crossref","first-page":"1292","DOI":"10.1121\/1.396628","volume":"84","author":"H. Muta","year":"1988","unstructured":"Muta, H., Baer, T., Wagatsuma, K., Muraoka, T., & Fukuda, H. (1988). Pitch synchronous analysis of hoarseness in running speech. The Journal of the Acoustical Society of America, 84, 1292\u20131301.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9139_CR41","first-page":"809","volume-title":"INTERSPEECH\u2014ICSLP","author":"D. Neiberg","year":"2006","unstructured":"Neiberg, D., Elenius, K., & Laskowski, K. (2006). Emotion recognition in spontaneous speech using GMMs. In INTERSPEECH\u2014ICSLP. Pittsburgh, Pennsylvania, 17\u201319 September 2006 (pp. 809\u2013812)."},{"key":"9139_CR42","doi-asserted-by":"crossref","first-page":"603","DOI":"10.1016\/S0167-6393(03)00099-2","volume":"41","author":"T. L. Nwe","year":"2003","unstructured":"Nwe, T. L., Foo, S. W., & Silva, L. C. D. (2003). Speech emotion recognition using hidden Markov models. Speech Communication, 41, 603\u2013623.","journal-title":"Speech Communication"},{"key":"9139_CR43","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1016\/S1071-5819(02)00141-6","volume":"59","author":"P. Y. Oudeyer","year":"2003","unstructured":"Oudeyer, P. Y. (2003). The production and recognition of emotions in speech: features and algorithms. International Journal of Human-Computer Studies, 59, 157\u2013183.","journal-title":"International Journal of Human-Computer Studies"},{"key":"9139_CR44","series-title":"LNCS","first-page":"279","volume-title":"ACII","author":"T. L. Pao","year":"2005","unstructured":"Pao, T. L., Chen, Y. T., Yeh, J. H., & Liao, W. Y. (2005). Combining acoustic features for improved emotion recognition in Mandarin speech. In J. Tao, T. Tan, & R. Picard (Eds.), LNCS. ACII, Berlin, Heidelberg (pp. 279\u2013285), Berlin: Springer."},{"key":"9139_CR45","series-title":"LNCS","volume-title":"ACII 2007","author":"T. L. Pao","year":"2007","unstructured":"Pao, T. L., Chen, Y. T., Yeh, J. H., Cheng, Y. M., & Chien, C. S. (2007). Feature combination for better differentiating anger from neutral in mandarin emotional speech. In LNCS: Vol. 4738. ACII 2007. Berlin: Springer."},{"key":"9139_CR46","doi-asserted-by":"crossref","first-page":"556","DOI":"10.1109\/TASL.2008.2010884","volume":"17","author":"S. R. M. Prasanna","year":"2009","unstructured":"Prasanna, S. R. M., Reddy, B. V. S., & Krishnamoorthy, P. (2009). Vowel onset point detection using source, spectral peaks, and modulation spectrum energies. IEEE Transactions on Audio, Speech, and Language Processing, 17, 556\u2013565.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9139_CR47","unstructured":"Rao, K. S. (2005). Acquisition and incorporation prosody knowledge for speech systems in Indian languages. PhD thesis, Dept. of Computer Science and Engineering, Indian Institute of Technology Madras, Chennai, India, May 2005."},{"key":"9139_CR48","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1016\/j.csl.2008.06.005","volume":"23","author":"K. S. Rao","year":"2009","unstructured":"Rao, K. S., & Yegnanarayana, B. (2009). Intonation modeling for Indian languages. Computer Speech and Language, 23, 240\u2013256.","journal-title":"Computer Speech and Language"},{"key":"9139_CR49","doi-asserted-by":"crossref","first-page":"762","DOI":"10.1109\/LSP.2007.896454","volume":"14","author":"K. S. Rao","year":"2007","unstructured":"Rao, K. S., Prasanna, S. R. M., & Yegnanarayana, B. (2007). Determination of instants of significant excitation in speech using Hilbert envelope and group delay function. IEEE Signal Processing Letters, 14, 762\u2013765.","journal-title":"IEEE Signal Processing Letters"},{"key":"9139_CR50","volume-title":"International conference on speech prosody","author":"K. S. Rao","year":"2010","unstructured":"Rao, K. S., Reddy, R., Maity, S., & Koolagudi, S. G. (2010). Characterization of emotions using the dynamics of prosodic features. In International conference on speech prosody, Chicago, USA, May 2010."},{"key":"9139_CR51","unstructured":"Reddy, K. S. (2004). Source and system features for speaker recognition. Master\u2019s thesis, Department of Computer Science and Engineering, Indian Institute of Technology Madras, Chennai 600 036, India."},{"key":"9139_CR52","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1016\/S0167-6393(02)00084-5","volume":"40","author":"K. R. Scherer","year":"2003","unstructured":"Scherer, K. R. (2003). Vocal communication of emotion: A review of research paradigms. Speech Communication, 40, 227\u2013256.","journal-title":"Speech Communication"},{"key":"9139_CR53","volume-title":"Seventh European conference on speech communication and technology, Eurospeech","author":"M. Schroder","year":"2001","unstructured":"Schroder, M. (2001). Emoptional speech synthesis: A review. In Seventh European conference on speech communication and technology, Eurospeech, Aalborg, Denmark, Sept. 2001."},{"key":"9139_CR54","volume-title":"Workshop on emotion and computing (HUMAINE)","author":"M. Schroder","year":"2006","unstructured":"Schroder, M., & Cowie, R. (2006). Issues in emotion-oriented computing toward a shared understanding. In Workshop on emotion and computing (HUMAINE)."},{"key":"9139_CR55","doi-asserted-by":"crossref","first-page":"2061","DOI":"10.1121\/1.3203668","volume":"126","author":"G. P. Seshadri","year":"2009","unstructured":"Seshadri, G. P., & Yegnanarayana, B. (2009). Perceived loudness of speech based on the characteristics of glottal excitation source. The Journal of the Acoustical Society of America, 126, 2061\u20132071.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9139_CR56","first-page":"170","volume":"7","author":"M. Sigmund","year":"2007","unstructured":"Sigmund, M. (2007). Spectral analysis of speech under stress. International Journal of Computer Science and Network Security, 7, 170\u2013172.","journal-title":"International Journal of Computer Science and Network Security"},{"key":"9139_CR57","volume-title":"7th international conference on spoken language processing","author":"R. Tato","year":"2002","unstructured":"Tato, R., Santos, R., & Pardo, R. K. J. (2002). Emotional space improves emotion recognition. In 7th international conference on spoken language processing, Denver, Colorado, USA, Sept. 16\u201320 2002."},{"key":"9139_CR58","volume-title":"Pattern recognition","author":"S. Theodoridis","year":"2006","unstructured":"Theodoridis, S., & Koutroumbas, K. (2006). Pattern recognition (3rd ed.). New York: Elsevier, Academic Press.","edition":"3"},{"key":"9139_CR59","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1016\/0167-6393(95)00010-L","volume":"17","author":"P. Thevenaz","year":"1995","unstructured":"Thevenaz, P., & Hugli, H. (1995). Usefulness of LPC residue in textindependent speaker verification. Speech Communication, 17, 145\u2013157.","journal-title":"Speech Communication"},{"key":"9139_CR60","volume-title":"Eleventh Australasian international conference on speech science and technology","author":"D. Ververidis","year":"2006","unstructured":"Ververidis, D., & Kotropoulos, C. (2006). A state of the art review on emotional speech databases. In Eleventh Australasian international conference on speech science and technology, Auckland, New Zealand, Dec. 2006."},{"key":"9139_CR61","first-page":"I593","volume-title":"ICASSP","author":"D. Ververidis","year":"2004","unstructured":"Ververidis, D., Kotropoulos, C., & Pitas, I. (2004). Automatic emotional speech classification. In ICASSP (pp. I593\u2013I596). New York: IEEE."},{"key":"9139_CR62","doi-asserted-by":"crossref","first-page":"270","DOI":"10.1109\/TASSP.1976.1162797","volume":"24","author":"H. Wakita","year":"1976","unstructured":"Wakita, H. (1976). Residual energy of linear prediction to vowel and speaker recognition. IEEE Transactions on Acoustics, Speech, and Signal Processing, 24, 270\u2013271.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9139_CR63","doi-asserted-by":"crossref","first-page":"407","DOI":"10.1109\/ICNC.2008.713","volume-title":"Fourth international conference on natural computation","author":"Y. Wang","year":"2008","unstructured":"Wang, Y., Du, S., & Zhan, Y. (2008). Adaptive and optimal classification of speech emotion recognition. In Fourth international conference on natural computation, Oct. 2008 (pp. 407\u2013411)."},{"key":"9139_CR64","first-page":"189","volume-title":"Speech evaluation in psychiatry","author":"C. E. Williams","year":"1981","unstructured":"Williams, C. E., & Stevens, K. N. (1981). Vocal correlates of emotional states. In Speech evaluation in psychiatry (pp. 189\u2013220)."},{"key":"9139_CR65","volume-title":"Artificial neural networks","author":"B. Yegnanarayana","year":"1999","unstructured":"Yegnanarayana, B. (1999). Artificial neural networks. New Delhi: Prentice-Hall."},{"key":"9139_CR66","doi-asserted-by":"crossref","first-page":"459","DOI":"10.1016\/S0893-6080(02)00019-9","volume":"15","author":"B. Yegnanarayana","year":"2002","unstructured":"Yegnanarayana, B., & Kishore, S. P. (2002). AANN an alternative to GMM for pattern recognition. Neural Networks, 15, 459\u2013469.","journal-title":"Neural Networks"},{"key":"9139_CR67","first-page":"405","volume-title":"IEEE international conference on acoustics, speech and signal processing","author":"B. Yegnanarayana","year":"1998","unstructured":"Yegnanarayana, B., Murthy, P. S., Avendano, C., & Hermansky, H. (1998). Enhancement of reverberant speech using lp residual. In IEEE international conference on acoustics, speech and signal processing, Seattle, WA, USA, May 1998 (Vol.\u00a01, pp. 405\u2013408). New York: IEEE Xplore."},{"key":"9139_CR68","volume-title":"IEEE int. conf. acoust., speech, and signal processing","author":"B. Yegnanarayana","year":"2001","unstructured":"Yegnanarayana, B., Reddy, K. S., & Kishore, S. P. (2001a). Source and system features for speaker recognition using aann models. In IEEE int. conf. acoust., speech, and signal processing, Salt Lake City, UT, May 2001."},{"key":"9139_CR69","first-page":"409","volume-title":"Proc. IEEE int. conf. acoust., speech, signal processing","author":"B. Yegnanarayana","year":"2001","unstructured":"Yegnanarayana, B., Reddy, K. S., & Kishore, S. P. (2001b). Source and system features for speaker recognition using AANN models. In Proc. IEEE int. conf. acoust., speech, signal processing, Salt Lake City, Utah, USA, May 2001 (pp. 409\u2013412)."},{"issue":"6","key":"9139_CR70","doi-asserted-by":"crossref","first-page":"1196","DOI":"10.1109\/TASL.2009.2016230","volume":"17","author":"B. Yegnanarayana","year":"2009","unstructured":"Yegnanarayana, B., Swamy, R. K., & Murty, K. S. R. (2009). Determining mixing parameters from multispeaker data using speech-specific information. IEEE Transactions on Audio, Speech, and Language Processing, 17(6), 1196\u20131207.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9139_CR71","volume-title":"Int. conf. on spoken language processing","author":"S. Yildirim","year":"2004","unstructured":"Yildirim, S., Bulut, M., Lee, C. M., Kazemzadeh, A., Busso, C., Deng, Z., Lee, S., & Narayanan, S. (2004). An acoustic study of emotions expressed in speech. In Int. conf. on spoken language processing (ICSLP 2004), Jeju Island, Korea, Oct. 2004."},{"key":"9139_CR72","doi-asserted-by":"crossref","first-page":"691","DOI":"10.1109\/ICNC.2007.555","volume-title":"Third international conference on natural computation","author":"Y. Zeng","year":"2007","unstructured":"Zeng, Y., Wu, H., & Gao, R. (2007). Pitch synchronous analysis method and Fisher criterion based speaker identification. In Third international conference on natural computation, Washington DC, USA (pp. 691\u2013695). Washington: IEEE Computer Society."},{"key":"9139_CR73","series-title":"Lecture notes in computer science","doi-asserted-by":"crossref","first-page":"457","DOI":"10.1007\/978-3-540-87734-9_52","volume-title":"Advances in neural networks","author":"S. Zhang","year":"2008","unstructured":"Zhang, S. (2008). Emotion recognition in Chinese natural speech by combining prosody and voice quality features. In Sun, et al. (Eds.), Lecture notes in computer science. Advances in neural networks (pp. 457\u2013464). Berlin: Springer."},{"key":"9139_CR74","series-title":"LNCS","first-page":"544","volume-title":"Human computer interaction, Part III, HCII","author":"A. Zhu","year":"2007","unstructured":"Zhu, A., & Luo, Q. (2007). Study on speech emotion recognition system in E-learning. In J. Jacko (Ed.), LNCS. Human computer interaction, Part III, HCII (pp. 544\u2013552). Berlin: Springer."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9139-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-012-9139-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9139-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T20:48:15Z","timestamp":1641329295000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-012-9139-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,3,20]]},"references-count":74,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2012,6]]}},"alternative-id":["9139"],"URL":"https:\/\/doi.org\/10.1007\/s10772-012-9139-3","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,3,20]]}}}