{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T10:23:28Z","timestamp":1760955808320},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,5,1]],"date-time":"2018-05-01T00:00:00Z","timestamp":1525132800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,5,1]],"date-time":"2018-05-01T00:00:00Z","timestamp":1525132800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mobile Netw Appl"],"published-print":{"date-parts":[[2019,2,15]]},"DOI":"10.1007\/s11036-018-1052-9","type":"journal-article","created":{"date-parts":[[2018,5,1]],"date-time":"2018-05-01T09:58:52Z","timestamp":1525168732000},"page":"193-201","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Application of Emotion Recognition and Modification for Emotional Telugu Speech Recognition"],"prefix":"10.1007","volume":"24","author":[{"given":"Vishnu Vidyadhara Raju","family":"Vegesna","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Krishna","family":"Gurugubelli","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anil Kumar","family":"Vuppala","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,5,1]]},"reference":[{"key":"1052_CR1","doi-asserted-by":"crossref","unstructured":"Gangamohan P, Mittal V, Yegnanarayana B (2012) Relative importance of different components of speech contributing to perception of emotion. In: Proc of Sixth international conference on speech prosody, China","DOI":"10.21437\/SpeechProsody.2012-164"},{"issue":"4","key":"1052_CR2","first-page":"301","volume":"4","author":"MK YeonWoo Lee","year":"2017","unstructured":"YeonWoo Lee MK, Cheeyong K (2017) A study on colors and emotions of video contents-focusing on depression scale through analysis of commercials. Journal of Multimedia Information Systems 4(4):301\u2013306","journal-title":"Journal of Multimedia Information Systems"},{"issue":"1-2","key":"1052_CR3","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/j.specom.2004.02.001","volume":"43","author":"L Dybkjaer","year":"2004","unstructured":"Dybkjaer L, Bernsen NO, Minker W (2004) Evaluation and usability of multimodal spoken language dialogue systems. Speech Comm 43(1-2):33\u201354","journal-title":"Speech Comm"},{"key":"1052_CR4","doi-asserted-by":"crossref","unstructured":"Busso C, Bulut M, Narayanan S, Gratch J, Marsella S (2013) Toward effective automatic recognition systems of emotion in speech. In: Social emotions in nature and artifact: emotions in human and human-computer interaction, pp 110\u2013127","DOI":"10.1093\/acprof:oso\/9780195387643.003.0008"},{"issue":"4","key":"1052_CR5","doi-asserted-by":"publisher","first-page":"582","DOI":"10.1109\/TASL.2008.2009578","volume":"17","author":"C Busso","year":"2009","unstructured":"Busso C, Lee S, Narayanan S (2009) Analysis of emotionally salient aspects of fundamental frequency for emotion detection. IEEE transactions on audio, speech, and language processing 17(4):582\u2013596","journal-title":"IEEE transactions on audio, speech, and language processing"},{"issue":"1","key":"1052_CR6","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1109\/T-AFFC.2011.20","volume":"3","author":"G McKeown","year":"2012","unstructured":"McKeown G, Valstar M, Cowie R, Pantic M, Schroder M (2012) The semaine database: Annotated multimodal records of emotionally colored conversations between a person and a limited agent. IEEE Trans Affect Comput 3(1):5\u201317","journal-title":"IEEE Trans Affect Comput"},{"key":"1052_CR7","doi-asserted-by":"crossref","unstructured":"Mariooryad S, Lotfian R, Busso C (2014) Building a naturalistic emotional speech corpus by retrieving expressive behaviors from existing speech corpora. In: Proc of Fifteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2014-60"},{"issue":"2","key":"1052_CR8","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1109\/T-AFFC.2010.8","volume":"1","author":"B Schuller","year":"2010","unstructured":"Schuller B, Vlasenko B, Eyben F, Wollmer M, Stuhlsatz A, Wendemuth A, Rigoll G (2010) Cross-corpus acoustic emotion recognition: variances and strategies. IEEE Trans Affect Comput 1(2):119\u2013131","journal-title":"IEEE Trans Affect Comput"},{"key":"1052_CR9","doi-asserted-by":"crossref","unstructured":"Sethu V, Ambikairajah E, Epps J (2007) Speaker normalisation for speech-based emotion detection. In: Proc of 15th international conference on digital signal processing, pp 611\u2013614","DOI":"10.1109\/ICDSP.2007.4288656"},{"key":"1052_CR10","doi-asserted-by":"crossref","unstructured":"Busso C, Metallinou A, Narayanan SS (2011) Iterative feature normalization for emotional speech detection. In: Proc of international conference on acoustics, speech and signal processing (ICASSP), IEEE, pp 5692\u20135695","DOI":"10.1109\/ICASSP.2011.5947652"},{"key":"1052_CR11","doi-asserted-by":"crossref","unstructured":"Deng J, Zhang Z, Marchi E, Schuller B (2013) Sparse autoencoder-based feature transfer learning for speech emotion recognition. In: Proc of humaine association conference on affective computing and intelligent interaction (ACII), IEEE, pp 511\u2013516","DOI":"10.1109\/ACII.2013.90"},{"key":"1052_CR12","doi-asserted-by":"crossref","unstructured":"Maeireizo B, Litman D, Hwa R (2004) Co-training for predicting emotions with spoken dialogue data. In: Proc of the interactive poster and demonstration sessions, ACL, p 28","DOI":"10.3115\/1219044.1219072"},{"key":"1052_CR13","doi-asserted-by":"crossref","unstructured":"Abdelwahab M, Busso C (2015) Supervised domain adaptation for emotion recognition from speech. In: Proc of International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, pp 5058\u20135062","DOI":"10.1109\/ICASSP.2015.7178934"},{"issue":"9","key":"1052_CR14","doi-asserted-by":"publisher","first-page":"1162","DOI":"10.1016\/j.specom.2006.04.003","volume":"48","author":"D Ververidis","year":"2006","unstructured":"Ververidis D, Kotropoulos C (2006) Emotional speech recognition: resources, features, and methods. Speech Comm 48(9):1162\u20131181","journal-title":"Speech Comm"},{"key":"1052_CR15","doi-asserted-by":"crossref","unstructured":"Schuller B, Seppi D, Batliner A, Maier A, Steidl S (2007) Towards more reality in the recognition of emotional speech. In: Proc of international conference on acoustics, speech and signal processing, vol 4. IEEE, pp IV\u2013941","DOI":"10.1109\/ICASSP.2007.367226"},{"key":"1052_CR16","doi-asserted-by":"crossref","unstructured":"Schuller B, Batliner A, Steidl S, Seppi D (2009) Emotion recognition from speech: putting asr in the loop. In: Proc of international conference on acoustics, speech and signal processing (ICASSP), IEEE, pp 4585\u20134588","DOI":"10.1109\/ICASSP.2009.4960651"},{"issue":"4","key":"1052_CR17","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1016\/j.neunet.2005.03.008","volume":"18","author":"T Athanaselis","year":"2005","unstructured":"Athanaselis T, Bakamidis S, Dologlou I, Cowie R, Douglas-Cowie E, Cox C (2005) Asr for emotional speech: clarifying the issues and enhancing performance. Neural Netw 18(4):437\u2013444","journal-title":"Neural Netw"},{"key":"1052_CR18","unstructured":"Steeneken HJ, Hansen JH (1999) Speech under stress conditions: overview of the effect on speech production and on system performance. In: Proc of international conference on acoustics, speech, and signal processing(ICASSP), vol 4. IEEE, pp 2079\u20132082"},{"issue":"10-11","key":"1052_CR19","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1016\/j.specom.2007.02.006","volume":"49","author":"M Benzeghiba","year":"2007","unstructured":"Benzeghiba M, De Mori R, Deroo O, Dupont S, Erbes T, Jouvet D, Fissore L, Laface P, Mertins A, Ris C et al (2007) Automatic speech recognition and speech variability: a review. Speech Comm 49(10-11):763\u2013786","journal-title":"Speech Comm"},{"issue":"7","key":"1052_CR20","doi-asserted-by":"publisher","first-page":"1765","DOI":"10.1007\/s00521-011-0620-8","volume":"21","author":"M Sheikhan","year":"2012","unstructured":"Sheikhan M, Gharavian D, Ashoftedel F (2012) Using dtw neural-based mfcc warping to improve emotional speech recognition. Springer journal on Neural Computing and Applications 21(7):1765\u20131773","journal-title":"Springer journal on Neural Computing and Applications"},{"issue":"6","key":"1052_CR21","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1109\/TSA.2002.803435","volume":"10","author":"L Welling","year":"2002","unstructured":"Welling L, Ney H, Kanthak S (2002) Speaker adaptive modeling by vocal tract normalization. IEEE Transactions on Speech and Audio Processing 10(6):415\u2013426","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"1052_CR22","doi-asserted-by":"crossref","unstructured":"Sinha R, Umesh S (2002) Non-uniform scaling based speaker normalization. In: Proc of international conference on acoustics, speech, and signal processing (ICASSP), vol 1. IEEE, pp I\u2013589","DOI":"10.1109\/ICASSP.2002.5743786"},{"issue":"6","key":"1052_CR23","doi-asserted-by":"publisher","first-page":"830","DOI":"10.1016\/j.specom.2011.02.002","volume":"53","author":"F M\u00fcller","year":"2011","unstructured":"M\u00fcller F, Mertins A (2011) Contextual invariant-integration features for improved speaker-independent speech recognition. Speech Comm 53(6):830\u2013841","journal-title":"Speech Comm"},{"key":"1052_CR24","doi-asserted-by":"crossref","unstructured":"Vydana HK, Vidyadhara Raju V, Gangashetty SV, Vuppala AK (2015) Significance of emotionally significant regions of speech for emotive to neutral conversion. In: Proc of international conference on mining intelligence and knowledge exploration, Springer, Hyderabad, pp 287\u2013296","DOI":"10.1007\/978-3-319-26832-3_28"},{"key":"1052_CR25","unstructured":"Vidyadhara Raju V, Vydana Vhk, Gangashetty SV, Vuppala AK (2017) Importance of non-uniform prosody modification for speech recognition in emotion conditions. In: Proc of Asia-Pacific Signal and information processing association annual summit and conference (APSIPA), IEEE"},{"key":"1052_CR26","doi-asserted-by":"crossref","unstructured":"Adiga N, Govind D, Prasanna SM (2014) Significance of epoch identification accuracy for prosody modification. In: Proc of SPCOM, IEEE, Bangalore, pp 1\u20136","DOI":"10.1109\/SPCOM.2014.6984007"},{"issue":"3","key":"1052_CR27","doi-asserted-by":"publisher","first-page":"972","DOI":"10.1109\/TSA.2005.858051","volume":"14","author":"KS Rao","year":"2006","unstructured":"Rao KS, Yegnanarayana B (2006) Prosody modification using instants of significant excitation. IEEE Trans Audio Speech Lang Process 14(3):972\u2013980","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"4","key":"1052_CR28","doi-asserted-by":"publisher","first-page":"1145","DOI":"10.1109\/TASL.2006.876113","volume":"14","author":"J Tao","year":"2006","unstructured":"Tao J, Kang Y, Li A (2006) Prosody conversion from neutral speech to emotional speech. IEEE Trans Audio Speech Lang Process 14(4):1145\u20131154","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"1052_CR29","doi-asserted-by":"crossref","unstructured":"Prasanna S, Govind D, Rao KS, Yenanarayana B (2010) Fast prosody modification using instants of significant excitation. In: Proc of speech prosody, Chicago","DOI":"10.21437\/SpeechProsody.2010-126"},{"key":"1052_CR30","unstructured":"Thomas MR, Gudnason J, Naylor PA (2008) Application of dypsa algorithm to segmented time scale modification of speech. In: Proc of EUSIPCO, IEEE, Switzerland"},{"key":"1052_CR31","doi-asserted-by":"crossref","unstructured":"Vidyadhara Raju VV, Gurugubelli K, Vydana HK, Pulugandla B, Shrivastava M, Vuppala AK (2017) Dnn-hmm acoustic modeling for large vocabulary telugu speech recognition. In: Proc of international conference on mining intelligence and knowledge exploration, Springer, pp 189\u2013197","DOI":"10.1007\/978-3-319-71928-3_19"},{"key":"1052_CR32","doi-asserted-by":"crossref","unstructured":"Koolagudi SG, Maity S, Kumar VA, Chakrabarti S, Rao KS (2009) IITKGP-SESC: speech database for emotion analysis. In: Contemporary computing, Springer, pp 485\u2013492","DOI":"10.1007\/978-3-642-03547-0_46"},{"issue":"2","key":"1052_CR33","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1109\/89.824696","volume":"8","author":"LK Saul","year":"2000","unstructured":"Saul LK, Rahim MG (2000) Maximum likelihood and minimum classification error factor analysis for automatic speech recognition. IEEE Transactions on Speech and Audio Processing 8(2):115\u2013125","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"1052_CR34","first-page":"175","volume":"3","author":"S Young","year":"2002","unstructured":"Young S, Evermann G, Gales M, Hain T, Kershaw D, Liu X, Moore G, Odell J, Ollason D, Povey D et al (2002) The htk book. Cambridge university engineering department 3:175","journal-title":"Cambridge university engineering department"},{"key":"1052_CR35","unstructured":"Povey D, Ghoshal A, Boulianne G, Burget L, Glembek O, Goel N, Hannemann M, Motlicek P, Qian Y, Schwarz P et al (2011) The kaldi speech recognition toolkit. In: IEEE 2011 workshop on automatic speech recognition and understanding, IEEE Signal Processing Society"},{"issue":"4","key":"1052_CR36","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1109\/PROC.1975.9792","volume":"63","author":"J Makhoul","year":"1975","unstructured":"Makhoul J (1975) Linear prediction: a tutorial review. Proc IEEE 63(4):561\u2013580","journal-title":"Proc IEEE"},{"issue":"2","key":"1052_CR37","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/s10772-011-9125-1","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi SG, Rao KS (2012) Emotion recognition from speech: a review. Int J Speech Technol 15(2):99\u2013117","journal-title":"Int J Speech Technol"},{"issue":"3","key":"1052_CR38","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1016\/j.patcog.2010.09.020","volume":"44","author":"M El Ayadi","year":"2011","unstructured":"El Ayadi M, Kamel MS, Karray F (2011) Survey on speech emotion recognition: features, classification schemes, and databases. Pattern Recogn 44(3):572\u2013587","journal-title":"Pattern Recogn"},{"key":"1052_CR39","unstructured":"Rabiner LR, Juang B-H (1993) Fundamentals of speech recognition. PTR Prentice Hall Englewood Cliffs, vol 14"},{"key":"1052_CR40","unstructured":"Benesty J, Chen J, Huang Y (2008) Microphone array signal processing. Springer Science & Business Media, vol 1"},{"issue":"1-2","key":"1052_CR41","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/S0167-6393(02)00071-7","volume":"40","author":"R Cowie","year":"2003","unstructured":"Cowie R, Cornelius RR (2003) Describing the emotional states that are expressed in speech. Speech Comm 40(1-2):5\u201332","journal-title":"Speech Comm"},{"issue":"3-4","key":"1052_CR42","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1016\/j.specom.2005.02.016","volume":"46","author":"T B\u00e4nziger","year":"2005","unstructured":"B\u00e4nziger T, Scherer KR (2005) The role of intonation in emotional expressions. Speech Comm 46 (3-4):252\u2013267","journal-title":"Speech Comm"},{"issue":"2","key":"1052_CR43","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TSA.2004.838534","volume":"13","author":"CM Lee","year":"2005","unstructured":"Lee CM, Narayanan SS (2005) Toward detecting emotions in spoken dialogs. IEEE Transactions on Speech and Audio Processing 13(2):293\u2013303","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"1052_CR44","doi-asserted-by":"crossref","unstructured":"Vidyadhara Raju VV, Gurugubelli K, Vuppala AK (2018) Differenced prosody features from normal and stressed regions foremotion recognition. In: 5th international conference on signal processing and integrated networks (SPIN), IEEE","DOI":"10.1109\/SPIN.2018.8474265"},{"key":"1052_CR45","unstructured":"Werner S, Keller E (1995) Prosodic aspects of speech. In: Fundamentals of speech synthesis and speech recognition, Wiley Ltd., pp 23\u201340"},{"key":"1052_CR46","unstructured":"Govind D, Prasanna SM (2018) Prosody modification for speech recognition in emotionally mismatched conditions. Int J Speech Technol"},{"issue":"8","key":"1052_CR47","doi-asserted-by":"publisher","first-page":"1602","DOI":"10.1109\/TASL.2008.2004526","volume":"16","author":"KSR Murty","year":"2008","unstructured":"Murty KSR, Yegnanarayana B (2008) Epoch extraction from speech signals. IEEE Trans Audio Speech Lang Process 16(8):1602\u20131613","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"3","key":"1052_CR48","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1109\/LSP.2009.2038507","volume":"17","author":"N Dhananjaya","year":"2010","unstructured":"Dhananjaya N, Yegnanarayana B (2010) Voiced\/nonvoiced detection based on robustness of voiced epochs. IEEE Signal Process Lett 17(3):273\u2013276","journal-title":"IEEE Signal Process Lett"}],"container-title":["Mobile Networks and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11036-018-1052-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11036-018-1052-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11036-018-1052-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,6]],"date-time":"2024-07-06T11:59:06Z","timestamp":1720267146000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11036-018-1052-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5,1]]},"references-count":48,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,2,15]]}},"alternative-id":["1052"],"URL":"https:\/\/doi.org\/10.1007\/s11036-018-1052-9","relation":{},"ISSN":["1383-469X","1572-8153"],"issn-type":[{"value":"1383-469X","type":"print"},{"value":"1572-8153","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,5,1]]},"assertion":[{"value":"1 May 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}