{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T13:40:27Z","timestamp":1760708427005},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2012,6,8]],"date-time":"2012-06-08T00:00:00Z","timestamp":1339113600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2012,9]]},"DOI":"10.1007\/s10772-012-9148-2","type":"journal-article","created":{"date-parts":[[2012,6,7]],"date-time":"2012-06-07T14:01:49Z","timestamp":1339077709000},"page":"335-349","source":"Crossref","is-referenced-by-count":27,"title":["Neural network based feature transformation for emotion independent speaker identification"],"prefix":"10.1007","volume":"15","author":[{"given":"Sreenivasa Rao","family":"Krothapalli","sequence":"first","affiliation":[]},{"given":"Jaynath","family":"Yadav","sequence":"additional","affiliation":[]},{"given":"Sourjya","family":"Sarkar","sequence":"additional","affiliation":[]},{"given":"Shashidhar G.","family":"Koolagudi","sequence":"additional","affiliation":[]},{"given":"Anil Kumar","family":"Vuppala","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,6,8]]},"reference":[{"key":"9148_CR1","first-page":"655","volume-title":"Proc. IEEE int. conf. acoust., speech, signal processing","author":"M. Abe","year":"1988","unstructured":"Abe, M., Nakamura, S., Shikano, K., & Kuwabara, H. (1988). Voice conversion through vector quantization. In Proc. IEEE int. conf. acoust., speech, signal processing, May 1988 (Vol.\u00a01, pp.\u00a0655\u2013658)."},{"key":"9148_CR2","volume-title":"Springer handbook on speech processing","year":"2008","unstructured":"Benesty, J., Sondhi, M. M. & Huang, Y. (Eds.) (2008). Springer handbook on speech processing. Berlin: Springer."},{"key":"9148_CR3","doi-asserted-by":"crossref","first-page":"430","DOI":"10.1155\/S1110865704310024","volume":"4","author":"F. Bimbot","year":"2004","unstructured":"Bimbot, F., Bonastre, J.-F., Fredouille, C., Gravier, G., Magrin-Chagnolleau, I., Meignier, S., Merlin, T., Ortega-Garcia, J., Petrovska-Delacretaz, D., & Reynolds, D. (2004). A\u00a0tutorial on text-independent speaker verification. EURASIP Journal on Applied Signal Processing, 4, 430\u2013451.","journal-title":"EURASIP Journal on Applied Signal Processing"},{"key":"9148_CR4","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1016\/S0167-6393(96)00047-7","volume":"20","author":"S. E. Bou-Ghazale","year":"1996","unstructured":"Bou-Ghazale, S. E., & Hansen, J. H. L. (1996). Generating stressed speech from neutral speech using a modified celp vocoder. Speech Communication, 20, 93\u2013110.","journal-title":"Speech Communication"},{"key":"9148_CR5","doi-asserted-by":"crossref","first-page":"1517","DOI":"10.21437\/Interspeech.2005-446","volume-title":"INTERSPEECH-2005","author":"F. Burkhardt","year":"2005","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W. F., & Weiss, B. (2005). A\u00a0database of German emotional speech. In INTERSPEECH-2005 (pp.\u00a01517\u20131520)."},{"key":"9148_CR6","volume-title":"ICSLP","author":"N. Campbell","year":"2004","unstructured":"Campbell, N. (2004). Perception of affect in speech\u2014towards an automatic processing of paralinguistic information in spoken conversation. In ICSLP, Jeju, October 2004."},{"key":"9148_CR7","first-page":"2665","volume-title":"Proc. European conf. speech commun. technol.","author":"J. Campbell","year":"2003","unstructured":"Campbell, J., Reynolds, D., & Dunn, R. (2003). Fusing high- and low-level features for speaker recognition. In Proc. European conf. speech commun. technol. (pp.\u00a02665\u20132668)."},{"key":"9148_CR8","doi-asserted-by":"crossref","first-page":"954","DOI":"10.1109\/TASL.2010.2047683","volume":"18","author":"S. Desai","year":"2010","unstructured":"Desai, S., Black, A. W., Yegnanarayana, B., & Prahallad, K. (2010). Spectral mapping using artificial neural networks for voice conversion. IEEE Transactions on Audio, Speech, and Language Processing, 18, 954\u2013964.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"9148_CR9","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1006\/dspr.1999.0359","volume":"10","author":"R. Dunn","year":"2000","unstructured":"Dunn, R., Reynolds, D., & Quatieri, T. (2000). Approaches to speaker detection and tracking in multi-speaker audio. Digital Signal Processing, 10(1), 93\u2013112.","journal-title":"Digital Signal Processing"},{"key":"9148_CR10","volume-title":"Proc. IEEE int. conf. acoust., speech, signal processing","author":"S. Fine","year":"2001","unstructured":"Fine, S., Navaratil, J., & Gopinath, R. (2001). A\u00a0hybrid GMM\/SVM approach to speaker identification. In Proc. IEEE int. conf. acoust., speech, signal processing, Utah, USA, May 2001 (Vol.\u00a01)."},{"key":"9148_CR11","first-page":"73","volume-title":"Proc. INTERSPEECH 2011","author":"D. Govind","year":"2004","unstructured":"Govind, D., Prasanna, S. R. M., & Yegnanarayana, B. (2004). Neutral to target emotion conversion using source and suprasegmental information. In Proc. INTERSPEECH 2011, Florence, Italy, August 2004 (pp.\u00a073\u201376)."},{"key":"9148_CR12","unstructured":"Gupta, C. S. (2003). Significance of source features for speaker recognition. Master\u2019s thesis, Department of Computer Science and Engineering, Indian Institute of Technology Madras, Chennai 600 036, India."},{"key":"9148_CR13","doi-asserted-by":"crossref","first-page":"307","DOI":"10.1109\/89.506935","volume":"4","author":"J. H. L. Hansen","year":"1996","unstructured":"Hansen, J. H. L., & Womack, B. D. (1996). Feature analysis and neural network-based classification of speech under stress. IEEE Transactions on Speech and Audio Processing, 4, 307\u2013313.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9148_CR14","volume-title":"Neural networks: a comprehensive foundation","author":"S. Haykin","year":"1999","unstructured":"Haykin, S. (1999). Neural networks: a comprehensive foundation. New Delhi: Pearson Education Aisa, Inc."},{"key":"9148_CR15","volume-title":"Spoken language processing","author":"X. Huang","year":"2001","unstructured":"Huang, X., Acero, A., & Hon, H. W. (2001). Spoken language processing. New York: Prentice-Hall, Inc."},{"key":"9148_CR16","first-page":"1548","volume-title":"Int. joint conf. neural networks","author":"S. P. Kishore","year":"2001","unstructured":"Kishore, S. P., & Yegnanarayana, B. (2001). Online text-independent speaker verification system using autoassociative neural network models. In Int. joint conf. neural networks, Washington, USA, Aug. 2001 (pp.\u00a01548\u20131553)."},{"key":"9148_CR17","series-title":"LNCS","volume-title":"Communications in computer and information science","author":"S. G. Koolagudi","year":"2009","unstructured":"Koolagudi, S. G., Maity, S., Kumar, V. A., Chakrabarti, S., & Rao, K.\u00a0S. (2009). IITKGP-SESC: speech database for emotion analysis. In LNCS. Communications in computer and information science, Aug. 2009. Berlin: Springer."},{"key":"9148_CR18","volume-title":"International conference on devices and communication","author":"S. G. Koolagudi","year":"2011","unstructured":"Koolagudi, S. G., Reddy, R., Yadav, J., & Rao, K. S. (2011). Iitkgp-sehsc: Hindi speech corpus for emotion analysis. In International conference on devices and communication, Mesra, India, Birla Institute of Technology, Feb. 2011. New York: IEEE Press."},{"key":"9148_CR19","series-title":"Lecture notes in computer science.","isbn-type":"print","volume-title":"Emotion-state conversion for speaker recognition","author":"D. Li","year":"2005","unstructured":"Li, D., Yang, Y., Wu, Z., & Wu, T. (2005). Lecture notes in computer science.: Vol.\u00a03784. Emotion-state conversion for speaker recognition. Berlin: Springer. ISBN: 978-3-540-29621-8.","ISBN":"http:\/\/id.crossref.org\/isbn\/9783540296"},{"key":"9148_CR20","first-page":"361","volume-title":"Proceedings of EUROSPEECH 2001","author":"M. Marshimi","year":"2001","unstructured":"Marshimi, M., Toda, T., Shikano, K., & Campbell, N. (2001). Evaluation of cross-language voice conversion based on GMM and STRAIGHT. In Proceedings of EUROSPEECH 2001, Aalborg, Denmark, Sept. 2001 (pp.\u00a0361\u2013364)."},{"key":"9148_CR21","unstructured":"Mary, L. (2006). Multi level implicit features for language and speaker recognition. PhD thesis, Dept. of Computer Science and Engineering, Indian Institute of Technology, Madras, Chennai, India, June."},{"key":"9148_CR22","first-page":"917","volume-title":"Proc. int. conf. spoken language processing","author":"L. Mary","year":"2006","unstructured":"Mary, L., & Yegnanarayana, B. (2006). Prosodic features for speaker verification. In Proc. int. conf. spoken language processing, Pittsburgh, PA, USA, Sep. 2006 (pp.\u00a0917\u2013920)."},{"key":"9148_CR23","doi-asserted-by":"crossref","first-page":"782","DOI":"10.1016\/j.specom.2008.04.010","volume":"50","author":"L. Mary","year":"2008","unstructured":"Mary, L., & Yegnanarayana, B. (2008). Extraction and representation of prosodic features for language and speaker recognition. Speech Communication, 50, 782\u2013796.","journal-title":"Speech Communication"},{"key":"9148_CR24","volume-title":"Int. conf. cognitive and neural systems","author":"L. Mary","year":"2004","unstructured":"Mary, L., Rao, K. S., Gangashetty, S. V., & Yegnanarayana, B. (2004). Neural network models for capturing duration and intonation knowledge for language and speaker identification. In Int. conf. cognitive and neural systems, Boston, MA, USA, May 2004."},{"key":"9148_CR25","doi-asserted-by":"crossref","first-page":"52","DOI":"10.1109\/LSP.2005.860538","volume":"13","author":"K. S. R. Murty","year":"2006","unstructured":"Murty, K. S. R., & Yegnanarayana, B. (2006). Combining evidence from residual phase and mfcc features for speaker recognition. IEEE Signal Processing Letters, 13, 52\u201355.","journal-title":"IEEE Signal Processing Letters"},{"key":"9148_CR26","doi-asserted-by":"crossref","first-page":"1602","DOI":"10.1109\/TASL.2008.2004526","volume":"16","author":"K. S. R. Murty","year":"2008","unstructured":"Murty, K. S. R., & Yegnanarayana, B. (2008). Epoch extraction from speech signals. IEEE Transactions on Audio, Speech, and Language Processing, 16, 1602\u20131613.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9148_CR27","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1007\/s10772-011-9094-4","volume":"14","author":"N. P. Narendra","year":"2011","unstructured":"Narendra, N. P., Rao, K. S., Ghosh, K., Vempada, R. R., & Maity, S. (2011). Development of syllable-based text-to-speech synthesis system in Bengali. International Journal of Speech Technology, 14, 167\u2013182.","journal-title":"International Journal of Speech Technology"},{"key":"9148_CR28","first-page":"206","volume":"16","author":"M. Narendranadh","year":"1995","unstructured":"Narendranadh, M., Murthy, H. A., Rajendran, S., & Yegnanarayana, B. (1995). Transformation of formants for voice conversion using artificial neural networks. Speech Communication, 16, 206\u2013216.","journal-title":"Speech Communication"},{"key":"9148_CR29","doi-asserted-by":"crossref","first-page":"2552","DOI":"10.1109\/TASL.2011.2155061","volume":"19","author":"S. R. M. Prasanna","year":"2011","unstructured":"Prasanna, S. R. M., & Pradhan, G. (2011). Significance of vowel-like regions for speaker verification under degraded condition. IEEE Transactions on Speech and Audio Processing, 19, 2552\u20132565.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9148_CR30","volume-title":"Fundamentals of speech recognition","author":"L. R. Rabiner","year":"1993","unstructured":"Rabiner, L. R., & Juang, B. H. (1993). Fundamentals of speech recognition. Englewood Cliffs: Prentice-Hall."},{"key":"9148_CR31","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/s10772-010-9075-z","volume":"13","author":"G. S. Raja","year":"2010","unstructured":"Raja, G. S., & Dandapat, S. (2010). Speaker recognition under stressed condition. International Journal of Speech Technology, 13, 141\u2013161.","journal-title":"International Journal of Speech Technology"},{"key":"9148_CR32","doi-asserted-by":"crossref","unstructured":"Rao, K. S. (2010). Voice conversion by mapping the speaker-specific features using pitch synchronous approach. Computer Speech and Language, 24.","DOI":"10.1016\/j.csl.2009.03.003"},{"key":"9148_CR33","doi-asserted-by":"crossref","first-page":"783","DOI":"10.1007\/s12046-011-0047-z","volume":"36","author":"K. S. Rao","year":"2011","unstructured":"Rao, K. S. (2011). Role of neural networks for developing speech systems. Sadhana, 36, 783\u2013836.","journal-title":"Sadhana"},{"key":"9148_CR34","volume-title":"15th international conference on advanced computing and communication (ADCOM-2007)","author":"K. S. Rao","year":"2007","unstructured":"Rao, K. S., & Koolagudi, S. G. (2007). Transformation of speaker characteristics in speech using support vector machines. In 15th international conference on advanced computing and communication (ADCOM-2007), Guwahati, India, Dec. 2007."},{"key":"9148_CR35","volume-title":"9th int. conf. information technology","author":"K. S. Rao","year":"2006","unstructured":"Rao, K. S., & Yegnanarayana, B. (2006a). Voice conversion by prosody and vocal tract modification. In 9th int. conf. information technology, Bhubaneswar, Orissa, India."},{"key":"9148_CR36","doi-asserted-by":"crossref","first-page":"972","DOI":"10.1109\/TSA.2005.858051","volume":"14","author":"K. S. Rao","year":"2006","unstructured":"Rao, K. S., & Yegnanarayana, B. (2006b). Prosody modification using instants of significant excitation. IEEE Transactions on Speech and Audio Processing, 14, 972\u2013980.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9148_CR37","doi-asserted-by":"crossref","first-page":"282","DOI":"10.1016\/j.csl.2006.06.003","volume":"21","author":"K. S. Rao","year":"2007","unstructured":"Rao, K. S., & Yegnanarayana, B. (2007). Modeling durations of syllables using neural networks. Computer Speech & Language, 21, 282\u2013295.","journal-title":"Computer Speech & Language"},{"key":"9148_CR38","unstructured":"Rao, K. S., & Yegnanarayana, B. (2008). Intonation modeling for Indian languages. Computer Speech and Language."},{"key":"9148_CR39","doi-asserted-by":"crossref","first-page":"1263","DOI":"10.1016\/j.specom.2009.06.004","volume":"51","author":"K. S. Rao","year":"2009","unstructured":"Rao, K. S., & Yegnanarayana, B. (2009). Duration modification using glottal closure instants and vowel onset points. Speech Communication, 51, 1263\u20131269.","journal-title":"Speech Communication"},{"key":"9148_CR40","volume-title":"2nd international conference on pattern recognition and machine intelligence (Premi-2007)","author":"K. S. Rao","year":"2007","unstructured":"Rao, K. S., Laskar, R. H., & Koolagudi, S. G. (2007a). Voice transformation by mapping the features at syllable level. In 2nd international conference on pattern recognition and machine intelligence (Premi-2007), Kolkata, India, Dec. 2007."},{"key":"9148_CR41","first-page":"479","volume-title":"LNCS","author":"K. S. Rao","year":"2007","unstructured":"Rao, K. S., Laskar, R. H., & Koolagudi, S. G. (2007b). Voice transformation by mapping the features at syllable level. In R.\u00a0K.\u00a0D.\u00a0A. Ghosh & S.\u00a0K. Pal (Eds.), LNCS, ISI Kolkata (pp.\u00a0479\u2013486). Heidelberg: Springer."},{"key":"9148_CR42","unstructured":"Reddy, K. S. (2004). Source and system features for speaker recognition. Master\u2019s thesis, Department of Computer Science and Engineering, Indian Institute of Technology Madras, Chennai 600 036, India."},{"key":"9148_CR43","doi-asserted-by":"crossref","unstructured":"Reynolds, D., & Rose, R. (1995). Robust text independent speaker identification using Gaussian mixture speaker models. IEEE Transactions Speech and Audio Processing, 72\u201383.","DOI":"10.1109\/89.365379"},{"issue":"1","key":"9148_CR44","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"D. Reynolds","year":"2000","unstructured":"Reynolds, D., Quatieri, T., & Dunn, R. (2000). Speaker verification using adapted Gaussian mixture models. Digital Signal Processing, 10(1), 19\u201341.","journal-title":"Digital Signal Processing"},{"key":"9148_CR45","volume-title":"Proceedings of the international workshop on speech and computer","author":"K. R. Scherer","year":"1998","unstructured":"Scherer, K. R., Johnstone, T., & Banziger, T. (1998). Automatic verification of emotionally stressed speakers: the problem of individual differences. In Proceedings of the international workshop on speech and computer, St. Petersburg."},{"key":"9148_CR46","doi-asserted-by":"crossref","first-page":"1047","DOI":"10.1016\/j.specom.2006.01.005","volume":"48","author":"I. Shahin","year":"2006","unstructured":"Shahin, I. (2006). Enhancing speaker identification performance under the shouted talking condition using second-order circular hidden Markov models. Speech Communication, 48, 1047\u20131055.","journal-title":"Speech Communication"},{"key":"9148_CR47","volume-title":"5th int. workshop on signal processing and its applications","author":"I. Shahin","year":"2008","unstructured":"Shahin, I. (2008). Using emotions to identify speakers. In 5th int. workshop on signal processing and its applications."},{"issue":"1","key":"9148_CR48","first-page":"41","volume":"8","author":"I. Shahin","year":"2009","unstructured":"Shahin, I. (2009). Speaker identification in emotional environments. Iranian Journal of Electrical and Computer Engineering, 8(1), 41\u201346.","journal-title":"Iranian Journal of Electrical and Computer Engineering"},{"key":"9148_CR49","first-page":"841","volume-title":"Proc. IEEE int. conf. acoust., speech, signal processing","author":"T. Toda","year":"2001","unstructured":"Toda, T., Saruwatari, H., & Shikano, K. (2001). Voice conversion algorithm based on Gaussian mixture model with dynamic frequency warping of STRAIGHT spectrum. In Proc. IEEE int. conf. acoust., speech, signal processing, May 2001 (Vol.\u00a02, pp.\u00a0841\u2013844)."},{"key":"9148_CR50","doi-asserted-by":"crossref","unstructured":"Vuppala, A. K., Rao, K. S., & Chakrabarti, S. (2011). Improved consonant-vowel recognition for low bit-rate coded speech. International Journal of Adaptive Control Signal Processing, doi: 10.1002\/acs.1286 .","DOI":"10.1002\/acs.1286"},{"key":"9148_CR51","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1007\/s10772-010-9086-9","volume":"14","author":"A. K. Vuppala","year":"2011","unstructured":"Vuppala, A. K., Yadav, J., Chakrabarti, S., & Rao, K. S. (2011). Application of prosody models for developing speech systems in Indian languages. International Journal of Speech Technology, 14, 19\u201333.","journal-title":"International Journal of Speech Technology"},{"key":"9148_CR52","doi-asserted-by":"crossref","first-page":"1894","DOI":"10.1109\/TASL.2012.2191284","volume":"20","author":"A. K. Vuppala","year":"2012","unstructured":"Vuppala, A. K., Yadav, J., Chakrabarti, S., & Rao, K. S. (2012). Vowel onset point detection for low bit rate coded speech. IEEE Transactions on Speech and Audio Processing, 20, 1894\u20131903.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9148_CR53","first-page":"2102","volume-title":"Proc. of int. conf. on spoken language processing (ICSLP-2006)","author":"W. Wu","year":"2006","unstructured":"Wu, W., Zheng, T. F., Xu, M. X., & Bao, H. J. (2006). Study on speaker verification on emotional speech. In Proc. of int. conf. on spoken language processing (ICSLP-2006) (pp.\u00a02102\u20132105)."},{"key":"9148_CR54","volume-title":"Artificial neural networks","author":"B. Yegnanarayana","year":"1999","unstructured":"Yegnanarayana, B. (1999). Artificial neural networks. New Delhi: Prentice-Hall."},{"key":"9148_CR55","doi-asserted-by":"crossref","first-page":"459","DOI":"10.1016\/S0893-6080(02)00019-9","volume":"15","author":"B. Yegnanarayana","year":"2002","unstructured":"Yegnanarayana, B., & Kishore, S. P. (2002). AANN an alternative to GMM for pattern recognition. Neural Networks, 15, 459\u2013469.","journal-title":"Neural Networks"},{"key":"9148_CR56","first-page":"409","volume-title":"Proc. IEEE int. conf. acoust., speech, signal processing","author":"B. Yegnanarayana","year":"2001","unstructured":"Yegnanarayana, B., Reddy, K. S., & Kishore, S. P. (2001). Source and system features for speaker recognition using AANN models. In Proc. IEEE int. conf. acoust., speech, signal processing, Salt Lake City, Utah, USA, May 2001 (pp.\u00a0409\u2013412)."},{"key":"9148_CR57","unstructured":"Zachariah, J. M. (2002). Text-dependent speaker verification using segmental suprasegmental and source features. Master\u2019s thesis, Department of Computer Science and Engineering, Indian Institute of Technology Madras, Chennai 600 036, India, March."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9148-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-012-9148-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9148-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,16]],"date-time":"2022-01-16T23:38:59Z","timestamp":1642376339000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-012-9148-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6,8]]},"references-count":57,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2012,9]]}},"alternative-id":["9148"],"URL":"https:\/\/doi.org\/10.1007\/s10772-012-9148-2","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,6,8]]}}}