{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T14:22:26Z","timestamp":1762352546496},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,10,27]],"date-time":"2016-10-27T00:00:00Z","timestamp":1477526400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2017,3]]},"DOI":"10.1007\/s10772-016-9386-9","type":"journal-article","created":{"date-parts":[[2016,10,27]],"date-time":"2016-10-27T15:34:30Z","timestamp":1477582470000},"page":"15-25","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["Modification of energy spectra, epoch parameters and prosody for emotion conversion in speech"],"prefix":"10.1007","volume":"20","author":[{"given":"Arijul","family":"Haque","sequence":"first","affiliation":[]},{"given":"Krothapalli Sreenivasa","family":"Rao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,10,27]]},"reference":[{"key":"9386_CR1","series-title":"Information Science and Statistics","volume-title":"Pattern Recognition and Machine Learning","author":"CM Bishop","year":"2007","unstructured":"Bishop, C. M. (2007). Pattern Recognition and Machine Learning (2nd ed.)., Information Science and Statistics New York: Springer.","edition":"2"},{"issue":"6","key":"9386_CR2","doi-asserted-by":"crossref","first-page":"4547","DOI":"10.1121\/1.2909562","volume":"123","author":"M Bulut","year":"2008","unstructured":"Bulut, M., & Narayanan, S. (2008). On the robustness of overall f0-only modifications to the perception of emotions in speech. The Journal of the Acoustical Society of America, 123(6), 4547\u20134558.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9386_CR3","doi-asserted-by":"crossref","unstructured":"Govind, D., Prasanna, S. M., & Yegnanarayana, B. (2011) Neutral to target emotion conversion using source and suprasegmental information. In Interspeech (pp. 2969\u20132972).","DOI":"10.21437\/Interspeech.2011-743"},{"key":"9386_CR4","doi-asserted-by":"crossref","unstructured":"Iriondo, I., Al\u00edas, F., Melench\u00f3n, J., & Llorca, M. A. (2004) Modeling and synthesizing emotional speech for Catalan text-to-speech synthesis. In Tutorial and research workshop on affective dialogue systems (pp. 197\u2013208). New York: Springer.","DOI":"10.1007\/978-3-540-24842-2_20"},{"issue":"2","key":"9386_CR5","doi-asserted-by":"crossref","first-page":"99","DOI":"10.1007\/s10772-011-9125-1","volume":"15","author":"SG Koolagudi","year":"2012","unstructured":"Koolagudi, S. G., & Rao, K. S. (2012). Emotion recognition from speech: A review. International Journal of Speech Technology, 15(2), 99\u2013117.","journal-title":"International Journal of Speech Technology"},{"key":"9386_CR6","volume-title":"Multimodal signals: Cognitive and algorithmic issues","author":"BJ Kr\u00f6ger","year":"2009","unstructured":"Kr\u00f6ger, B. J., & Birkholz, P. (2009). Multimodal signals: Cognitive and algorithmic issues. Berlin: Springer."},{"issue":"2","key":"9386_CR7","doi-asserted-by":"crossref","first-page":"181","DOI":"10.1007\/s10772-012-9175-z","volume":"16","author":"SR Krothapalli","year":"2013","unstructured":"Krothapalli, S. R., & Koolagudi, S. G. (2013). Characterization and recognition of emotions from speech using excitation source information. International Journal of Speech Technology, 16(2), 181\u2013201.","journal-title":"International Journal of Speech Technology"},{"key":"9386_CR8","first-page":"923","volume":"98","author":"JM Montero","year":"1998","unstructured":"Montero, J. M., Gutierrez-Arriola, J. M., Palazuelos, S. E., Enriquez, E., Aguilera, S., & Pardo, J. M. (1998). Emotional speech synthesis: From speech database to TTS. ICSLP, 98, 923\u2013926.","journal-title":"ICSLP"},{"issue":"5\u20136","key":"9386_CR9","doi-asserted-by":"crossref","first-page":"453","DOI":"10.1016\/0167-6393(90)90021-Z","volume":"9","author":"E Moulines","year":"1990","unstructured":"Moulines, E., & Charpentier, F. (1990). Pitch-synchronous waveform processing techniques for text-to-speech synthesis using diphones. Speech Communication, 9(5\u20136), 453\u2013467.","journal-title":"Speech Communication"},{"key":"9386_CR10","volume-title":"Speech variability and emotion: Production and perception","author":"SJL Mozziconacci","year":"1998","unstructured":"Mozziconacci, S. J. L. (1998). Speech variability and emotion: Production and perception. Eindhoven: Technische Universiteit Eindhoven."},{"issue":"8","key":"9386_CR11","doi-asserted-by":"crossref","first-page":"1602","DOI":"10.1109\/TASL.2008.2004526","volume":"16","author":"KSR Murty","year":"2008","unstructured":"Murty, K. S. R., & Yegnanarayana, B. (2008). Epoch extraction from speech signals. IEEE Transactions on Audio, Speech, and Language Processing, 16(8), 1602\u20131613.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"4","key":"9386_CR12","doi-asserted-by":"crossref","first-page":"1099","DOI":"10.1109\/TASL.2006.876123","volume":"14","author":"JF Pitrelli","year":"2006","unstructured":"Pitrelli, J. F., Bakis, R., Eide, E. M., Fernandez, R., Hamza, W., & Picheny, M. A. (2006). The IBM expressive text-to-speech synthesis system for American English. IEEE Transactions on Audio, Speech, and Language Processing, 14(4), 1099\u20131108.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9386_CR13","doi-asserted-by":"crossref","unstructured":"Prasanna, S. M., & Govind, D. (2010) Analysis of excitation source information in emotional speech. In Interspeech (pp. 781\u2013784).","DOI":"10.21437\/Interspeech.2010-284"},{"key":"9386_CR14","doi-asserted-by":"crossref","unstructured":"P\u0159ibilov\u00e1, A., & P\u0159ibil, J. (2009) Spectrum modification for emotional speech synthesis. In Multimodal signals: Cognitive and algorithmic issues (pp. 232\u2013241). Berlin: Springer.","DOI":"10.1007\/978-3-642-00525-1_23"},{"issue":"6","key":"9386_CR17","doi-asserted-by":"crossref","first-page":"2133","DOI":"10.1007\/s00034-012-9428-8","volume":"31","author":"KS Rao","year":"2012","unstructured":"Rao, K. S. (2012). Unconstrained pitch contour modification using instants of significant excitation. Circuits, Systems, and Signal Processing, 31(6), 2133\u20132152.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"9386_CR15","volume-title":"Emotion recognition using speech features","author":"KS Rao","year":"2012","unstructured":"Rao, K. S., & Koolagudi, S. G. (2012). Emotion recognition using speech features. New York: Springer."},{"issue":"6","key":"9386_CR18","doi-asserted-by":"crossref","first-page":"745","DOI":"10.1016\/j.specom.2013.03.002","volume":"55","author":"KS Rao","year":"2013","unstructured":"Rao, K. S., & Vuppala, A. K. (2013). Non-uniform time scale modification using instants of significant excitation and vowel onset points. Speech Communication, 55(6), 745\u2013756.","journal-title":"Speech Communication"},{"issue":"3","key":"9386_CR19","doi-asserted-by":"crossref","first-page":"972","DOI":"10.1109\/TSA.2005.858051","volume":"14","author":"KS Rao","year":"2006","unstructured":"Rao, K. S., & Yegnanarayana, B. (2006). Prosody modification using instants of significant excitation. IEEE Transactions on Audio, Speech, and Language Processing, 14(3), 972\u2013980.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"01","key":"9386_CR16","doi-asserted-by":"crossref","first-page":"50","DOI":"10.4236\/jsip.2010.11006","volume":"1","author":"KS Rao","year":"2010","unstructured":"Rao, K. S., et al. (2010). Real time prosody modification. Journal of Signal and Information Processing, 1(01), 50.","journal-title":"Journal of Signal and Information Processing"},{"key":"9386_CR20","doi-asserted-by":"crossref","unstructured":"Sarkar, P., Haque, A., Dutta, A. K., Reddy, G., Harikrishna, D., Dhara, P., Verma, R., Narendra, N., Sunil S. B., & Yadav, J., et al. (2014). Designing prosody rule-set for converting neutral TTS speech to storytelling style speech for Indian languages: Bengali, Hindi and Telugu. In Seventh international conference on contemporary computing (IC3). IEEE (pp. 473\u2013477).","DOI":"10.1109\/IC3.2014.6897219"},{"key":"9386_CR21","doi-asserted-by":"crossref","unstructured":"Schr\u00f6der, M. (2001). Emotional speech synthesis: A review. In Interspeech (pp. 561\u2013564).","DOI":"10.21437\/Eurospeech.2001-150"},{"key":"9386_CR22","doi-asserted-by":"crossref","unstructured":"Schr\u00f6der, M. (2004). Dimensional emotion representation as a basis for speech synthesis with non-extreme emotions. In Tutorial and research workshop on affective dialogue systems (pp. 209\u2013220). New York: Springer.","DOI":"10.1007\/978-3-540-24842-2_21"},{"key":"9386_CR23","unstructured":"Silva, A., Vala, M., & Paiva, A. (2001). The storyteller: Building a synthetic character that tells stories. In Proceedings of the workshop multimodal communication and context in embodied agents (pp. 53\u201358)."},{"key":"9386_CR24","doi-asserted-by":"crossref","unstructured":"Tao, J. (2003). Emotion control of Chinese speech synthesis in natural environment. In Interspeech (pp. 2349\u20132352).","DOI":"10.21437\/Eurospeech.2003-648"},{"issue":"4","key":"9386_CR25","doi-asserted-by":"crossref","first-page":"1145","DOI":"10.1109\/TASL.2006.876113","volume":"14","author":"J Tao","year":"2006","unstructured":"Tao, J., Kang, Y., & Li, A. (2006). Prosody conversion from neutral speech to emotional speech. IEEE Transactions on Audio, Speech, and Language Processing, 14(4), 1145\u20131154.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"4","key":"9386_CR26","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TASL.2006.876129","volume":"14","author":"M Theune","year":"2006","unstructured":"Theune, M., Meijs, K., Heylen, D., & Ordelman, R. (2006). Generating expressive speech for storytelling applications. IEEE Transactions on Audio, Speech, and Language Processing, 14(4), 1137\u20131144.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9386_CR27","doi-asserted-by":"crossref","unstructured":"T\u00fcrk, O., & Schr\u00f6der, M. (2008). A comparison of voice conversion methods for transforming voice quality in emotional speech synthesis. In Interspeech (pp. 2282\u20132285).","DOI":"10.21437\/Interspeech.2008-560"},{"issue":"1","key":"9386_CR28","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1007\/s00034-015-0051-3","volume":"35","author":"J Yadav","year":"2016","unstructured":"Yadav, J., & Rao, K. S. (2016). Prosodic mapping using neural networks for emotion conversion in Hindi language. Circuits, Systems, and Signal Processing, 35(1), 139\u2013162.","journal-title":"Circuits, Systems, and Signal Processing"},{"issue":"4","key":"9386_CR29","doi-asserted-by":"crossref","first-page":"614","DOI":"10.1109\/TASL.2008.2012194","volume":"17","author":"B Yegnanarayana","year":"2009","unstructured":"Yegnanarayana, B., & Murty, K. S. R. (2009). Event-based instantaneous fundamental frequency estimation from speech signals. IEEE Transactions on Audio, Speech, and Language Processing, 17(4), 614\u2013624.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9386_CR30","doi-asserted-by":"crossref","unstructured":"Zhang, J. Y., Black, A. W., & Sproat, R. (2003). Identifying speakers in children\u2019s stories for speech synthesis. In Interspeech.","DOI":"10.21437\/Eurospeech.2003-586"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-016-9386-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-016-9386-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-016-9386-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,20]],"date-time":"2024-06-20T06:42:34Z","timestamp":1718865754000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-016-9386-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10,27]]},"references-count":30,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,3]]}},"alternative-id":["9386"],"URL":"https:\/\/doi.org\/10.1007\/s10772-016-9386-9","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,10,27]]}}}