{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T14:57:21Z","timestamp":1773413841662,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2011,5,15]],"date-time":"2011-05-15T00:00:00Z","timestamp":1305417600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2012,10]]},"DOI":"10.1007\/s00521-011-0620-8","type":"journal-article","created":{"date-parts":[[2011,5,14]],"date-time":"2011-05-14T05:05:07Z","timestamp":1305349507000},"page":"1765-1773","source":"Crossref","is-referenced-by-count":36,"title":["Using DTW neural\u2013based MFCC warping to improve emotional speech recognition"],"prefix":"10.1007","volume":"21","author":[{"given":"Mansour","family":"Sheikhan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Davood","family":"Gharavian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Farhad","family":"Ashoftedel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2011,5,15]]},"reference":[{"key":"620_CR1","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1016\/S0167-6393(99)00038-2","volume":"29","author":"H Strik","year":"1999","unstructured":"Strik H, Cucchiarini C (1999) Modeling pronunciation variation for ASR: a survey of the literature. Speech Commun 29:225\u2013246","journal-title":"Speech Commun"},{"key":"620_CR2","doi-asserted-by":"crossref","unstructured":"Vlasenko B, Wendemuth A (2009) Heading toward to the natural way of human-machine interaction: the NIMITEK project. Proceedings of IEEE international conference on multimedia and expo, pp 950\u2013953","DOI":"10.1109\/ICME.2009.5202653"},{"key":"620_CR3","doi-asserted-by":"crossref","unstructured":"Ijima Y, Tachibana M, Nose T, Kobayashi T (2009) Emotional speech recognition based on style estimation and adaptation with multiple-regression HMM. Proceedings of IEEE international conference on acoustic, speech and signal processing, pp 4157\u20134160","DOI":"10.1109\/ICASSP.2009.4960544"},{"key":"620_CR4","doi-asserted-by":"crossref","first-page":"1162","DOI":"10.1016\/j.specom.2006.04.003","volume":"48","author":"D Ververidis","year":"2006","unstructured":"Ververidis D, Kotropoulos C (2006) Emotional speech recognition: resources, features, and methods. Speech Commun 48:1162\u20131181","journal-title":"Speech Commun"},{"key":"620_CR5","doi-asserted-by":"crossref","unstructured":"Schuller B, Seppi D, Batliner A, Maier A, Steidl S (2007) Towards more reality in the recognition of emotional speech. Proceedings of IEEE international conference on acoustic, speech and signal processing, vol 4, pp 941\u2013944","DOI":"10.1109\/ICASSP.2007.367226"},{"key":"620_CR6","doi-asserted-by":"crossref","unstructured":"Schuller B, Batliner A, Steidl S, Seppi D (2009) Emotion recognition from speech: putting ASR in the loop. Proceedings of IEEE international conference on acoustic, speech and signal processing, pp 4585\u20134588","DOI":"10.1109\/ICASSP.2009.4960651"},{"key":"620_CR7","doi-asserted-by":"crossref","unstructured":"Krajewski J, Batliner A, Kessel S (2010) Comparing multiple classifiers for speech-based detection of self-confidence-A pilot study. Proceedings international conference on pattern recognition, pp 3716\u20133719","DOI":"10.1109\/ICPR.2010.905"},{"key":"620_CR8","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1016\/j.neunet.2005.03.008","volume":"18","author":"T Athanaselis","year":"2005","unstructured":"Athanaselis T, Bakamidis S, Dologlou I, Cowie R, Douglas-Cowie E, Cox C (2005) ASR for emotional speech: clarifying the issues and enhancing performance. J Neural Netw 18:437\u2013444","journal-title":"J Neural Netw"},{"key":"620_CR9","unstructured":"Litman DJ, Hirschberg JB, Swerts M (2000) Predicting automatic speech recognition performance using prosodic cues. Proceedings North American chapter of the association for computational linguistics conference, pp 218\u2013225"},{"key":"620_CR10","unstructured":"Steeneken HJM, Hansen JHL (1999) Speech under stress conditions: overview of the effect of speech production and on system performance. Proceedings of IEEE international conference on acoustic, speech and signal processing, vol 4, pp 2079\u20132082"},{"key":"620_CR11","doi-asserted-by":"crossref","first-page":"763","DOI":"10.1016\/j.specom.2007.02.006","volume":"49","author":"M Benzeghiba","year":"2007","unstructured":"Benzeghiba M, De Mori R, Deroo O, Dupont S, Erbes T, Jouvet D, Fissore L, Laface P, Mertins A, Ris C, Rose R, Tyagi V, Wellekens C (2007) Automatic speech recognition and speech variability: a review. Speech Commun 49:763\u2013786","journal-title":"Speech Commun"},{"key":"620_CR12","first-page":"108","volume-title":"Speech under stress: analysis, modeling and recognition","author":"JH Hansen","year":"2007","unstructured":"Hansen JH, Patil S (2007) Speech under stress: analysis, modeling and recognition. Springer, Berlin, pp 108\u2013137"},{"key":"620_CR13","unstructured":"Gharavian D (2004) Prosody in Farsi language and its use in recognition of intonation and speech. Ph.D. Dissertation, Electrical Engineering Department, Amirkabir University of Technology, Tehran"},{"key":"620_CR14","unstructured":"Gharavian D, Ahadi SM (2006) Recognition of emotional speech and speech emotion in Farsi. Proceedings of international symposium on Chinese spoken language processing, vol 2, pp 299\u2013308"},{"key":"620_CR15","unstructured":"Gharavian D, Ahadi SM (2005) The effect of emotion on Farsi speech parameters: a statistical evaluation. Proceedings of international conference on speech and computer, pp 463\u2013466"},{"issue":"1","key":"620_CR16","first-page":"19","volume":"4","author":"D Gharavian","year":"2010","unstructured":"Gharavian D, Sheikhan M, Janipour M (2010) Pitch in emotional speech and emotional speech recognition using pitch frequency. Majlesi J Electr Eng 4(1):19\u201324","journal-title":"Majlesi J Electr Eng"},{"key":"620_CR17","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1016\/S0167-6393(02)00083-3","volume":"40","author":"LT Bosch","year":"2003","unstructured":"Bosch LT (2003) Emotions, speech and the ASR framework. Speech Commun 40:213\u2013225","journal-title":"Speech Commun"},{"key":"620_CR18","doi-asserted-by":"crossref","unstructured":"M\u00fcller F, Mertins A (2011) Contextual invariant-integration features for improved speaker-independent speech recognition. Speech Commun. doi: 10.1016\/j.specom.2011.02.002 Article in Press","DOI":"10.1016\/j.specom.2011.02.002"},{"key":"620_CR19","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1109\/TSA.2002.803435","volume":"10","author":"L Welling","year":"2002","unstructured":"Welling L, Ney H, Kanthak S (2002) Speaker adaptive modeling by vocal tract normalization. IEEE Trans Speech Audio Process 10:415\u2013426","journal-title":"IEEE Trans Speech Audio Process"},{"key":"620_CR20","unstructured":"Sinha R, Umesh S (2002) Non-uniform scaling based speaker normalization. Proceedings of IEEE international conference on acoustic, speech and signal processing, vol 1, pp 589\u2013592"},{"key":"620_CR21","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1006\/csla.1998.0043","volume":"12","author":"MJF Gales","year":"1998","unstructured":"Gales MJF (1998) Maximum likelihood linear transformations for HMM-based speech recognition. Comput Speech Lang 12:75\u201398","journal-title":"Comput Speech Lang"},{"key":"620_CR22","doi-asserted-by":"crossref","first-page":"420","DOI":"10.1109\/TSA.2004.828702","volume":"12","author":"W Byrne","year":"2004","unstructured":"Byrne W, Doermann D, Franz M, Gustman S, Haji\u010d J, Oard D, Picheny M, Psutka J, Ramabhadran B, Soergel D, Ward T, Zhu W-J (2004) Automatic recognition of spontaneous speech for access to multilingual oral history archives. IEEE Trans Speech Audio Process 12:420\u2013435","journal-title":"IEEE Trans Speech Audio Process"},{"key":"620_CR23","doi-asserted-by":"crossref","unstructured":"Godfrey J, Holliman E, McDaniel J (1992) SWITCHBOARD: telephone speech corpus for research and development. Proceedings of IEEE international conference on acoustic, speech and signal processing, pp 517\u2013520","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"620_CR24","doi-asserted-by":"crossref","unstructured":"Pan YC, Xu MX, Liu LQ, Jia PF (2006) Emotion-detecting based model selection for emotional speech recognition. Proceedings of multiconference on computational engineering in system applications, pp 2169\u20132172","DOI":"10.1109\/CESA.2006.4281997"},{"key":"620_CR25","doi-asserted-by":"crossref","unstructured":"Meng H, Pittermann J, Pittermann A, Minker W (2007) Combined speech-emotion recognition for spoken human-computer interfaces. Proceedings IEEE international conference on signal processing and communications, pp 1179\u20131182","DOI":"10.1109\/ICSPC.2007.4728535"},{"key":"620_CR26","doi-asserted-by":"crossref","unstructured":"Sun Y, Zhou Y, Zhao Q, Yan Y (2009) Acoustic feature optimization for emotion affected speech recognition. Proceedings of international conference on information engineering and computer science, pp 1\u20134. doi: 10.1109\/ICIECS.2009.5365821","DOI":"10.1109\/ICIECS.2009.5365821"},{"key":"620_CR27","doi-asserted-by":"crossref","first-page":"367","DOI":"10.1109\/LSP.2006.888364","volume":"14","author":"R Muralishankar","year":"2007","unstructured":"Muralishankar R, Sangwan A, O\u2019Shaughnessy D (2007) Theoretical complex cepstrum of DCT and warped DCT filters. IEEE Signal Process Lett 14:367\u2013370","journal-title":"IEEE Signal Process Lett"},{"key":"620_CR28","doi-asserted-by":"crossref","first-page":"535","DOI":"10.1109\/TCSII.2005.850448","volume":"52","author":"J-H Chang","year":"2005","unstructured":"Chang J-H (2005) Warped discrete cosine transform-based noisy speech enhancement. IEEE Trans Circuits Syst II 52:535\u2013539","journal-title":"IEEE Trans Circuits Syst II"},{"key":"620_CR29","doi-asserted-by":"crossref","unstructured":"Panchapagesan S (2006) Frequency warping by linear transformation of standard MFCC. Proceedings of interspeech, pp 397\u2013400","DOI":"10.21437\/Interspeech.2006-131"},{"key":"620_CR30","doi-asserted-by":"crossref","unstructured":"Pitz M, Molau S, Schlueter R, Ney H (2001) Vocal tract normalization equals linear transformation in cepstral space. Proceedings of European conference on speech communication and technology, pp 721\u2013724","DOI":"10.21437\/Eurospeech.2001-621"},{"key":"620_CR31","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1016\/j.csl.2010.03.003","volume":"25","author":"C Clavel","year":"2011","unstructured":"Clavel C, Vasilescu I, Devillers L (2011) Fiction support for realistic portrayals of fear-type emotional manifestations. Comput Speech Lang 25:63\u201383","journal-title":"Comput Speech Lang"},{"key":"620_CR32","unstructured":"Bijankhan M, Sheikhzadegan J, Roohani MR, Samareh Y, Lucas C, Tebiani M (1994) The speech database of Farsi spoken language. Proceedings of Australian international conference on speech science and technology, pp 826\u2013831"},{"key":"620_CR33","unstructured":"Young SJ, Evermann G, Kershaw D, Moore G, Odell J, Ollason D, Povey D, Valtchev V, Woodland V (2002) The HTK book (Ver.3.2). Cambridge University, Cambridge"},{"key":"620_CR34","doi-asserted-by":"crossref","first-page":"135","DOI":"10.1109\/TASSP.1974.1162559","volume":"22","author":"SS McCandless","year":"1974","unstructured":"McCandless SS (1974) An Algorithm for formant extraction using linear prediction spectra. IEEE Trans Acoustics Speech Signal Process 22:135\u2013141","journal-title":"IEEE Trans Acoustics Speech Signal Process"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-011-0620-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-011-0620-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-011-0620-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,6]],"date-time":"2023-06-06T19:23:46Z","timestamp":1686079426000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-011-0620-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,5,15]]},"references-count":34,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2012,10]]}},"alternative-id":["620"],"URL":"https:\/\/doi.org\/10.1007\/s00521-011-0620-8","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011,5,15]]}}}