{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T21:54:42Z","timestamp":1767650082845},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s10772-019-09626-5","type":"journal-article","created":{"date-parts":[[2019,9,3]],"date-time":"2019-09-03T04:54:02Z","timestamp":1567486442000},"page":"533-549","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Prosodic transformation in vocal emotion conversion for multi-lingual scenarios: a pilot study"],"prefix":"10.1007","volume":"22","author":[{"given":"Susmitha","family":"Vekkot","sequence":"first","affiliation":[]},{"given":"Deepa","family":"Gupta","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,9,3]]},"reference":[{"issue":"5","key":"9626_CR1","doi-asserted-by":"publisher","first-page":"134","DOI":"10.5923\/j.ajsp.20120205.06","volume":"2","author":"R Aihara","year":"2012","unstructured":"Aihara, R., Takashima, R., Takiguchi, T., & Ariki, Y. (2012). GMM-based emotional voice conversion using spectrum and prosody features. American Journal of Signal Processing, 2(5), 134\u2013138.","journal-title":"American Journal of Signal Processing"},{"key":"9626_CR2","doi-asserted-by":"crossref","unstructured":"Aihara, R., Ueda, R., Takiguchi, T., & Ariki, Y. (2014). Exemplar-based emotional voice conversion using non-negative matrix factorization. In Signal and Information Processing Association Annual Summit and Conference (APSIPA), 2014 Asia-Pacific (pp. 1\u20137).","DOI":"10.1109\/APSIPA.2014.7041640"},{"key":"9626_CR3","doi-asserted-by":"crossref","unstructured":"Akagi, M., Han, X., Elbarougy, R., Hamada, Y., & Li, J. (2014). Toward affective speech-to-speech translation: Strategy for emotional speech recognition and synthesis in multiple languages. In Signal and Information Processing Association Annual Summit and Conference (APSIPA), 2014 Asia-Pacific (pp. 1\u201310).","DOI":"10.1109\/APSIPA.2014.7041623"},{"key":"9626_CR4","volume-title":"The constitution of India","author":"PM Bakshi","year":"1982","unstructured":"Bakshi, P. M., & Kashyap, S. C. (1982). The constitution of India. Prayagraj: Universal Law Publishing."},{"key":"9626_CR6","doi-asserted-by":"crossref","unstructured":"Burkhardt, F., Paeschke, A., Rolfes, M., Sendlmeier, W. F., & Weiss, B. (2005). A database of German emotional speech. In Ninth European Conference on Speech Communication and Technology (pp. 1517\u20131520).","DOI":"10.21437\/Interspeech.2005-446"},{"key":"9626_CR5","unstructured":"Burkhardt, F., & Sendlmeier, W. F. (2000). Verification of acoustical correlates of emotional speech using formant-synthesis. In ISCA Tutorial and Research Workshop (ITRW) on speech and emotion (pp. 151\u2013156)."},{"key":"9626_CR7","doi-asserted-by":"crossref","unstructured":"Cabral, J. P., & Oliveira, L. C. (2006). Emovoice: a system to generate emotions in speech. In Ninth International Conference on Spoken Language Processing (pp. 1798\u20131801).","DOI":"10.21437\/Interspeech.2006-497"},{"issue":"1","key":"9626_CR8","first-page":"1","volume":"8","author":"JE Cahn","year":"1990","unstructured":"Cahn, J. E. (1990). The generation of affect in synthesized speech. Journal of the American Voice I\/O Society, 8(1), 1\u201319.","journal-title":"Journal of the American Voice I\/O Society"},{"key":"9626_CR9","doi-asserted-by":"crossref","unstructured":"Cen, L., Chan, P., Dong, M., & Li, H. (2010). Generating emotional speech from neutral speech. In 7th International Symposium on Chinese Spoken Language Processing (pp. 383\u2013386).","DOI":"10.1109\/ISCSLP.2010.5684862"},{"issue":"5","key":"9626_CR10","doi-asserted-by":"publisher","first-page":"954","DOI":"10.1109\/TASL.2010.2047683","volume":"18","author":"S Desai","year":"2010","unstructured":"Desai, S., Black, A. W., Yegnanarayana, B., & Prahallad, K. (2010). Spectral mapping using artificial neural networks for voice conversion. IEEE Transactions on Audio, Speech, and Language Processing, 18(5), 954\u2013964.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"7","key":"9626_CR11","doi-asserted-by":"publisher","first-page":"2518","DOI":"10.1007\/s00034-015-0159-5","volume":"35","author":"D Govind","year":"2016","unstructured":"Govind, D., & Joy, T. T. (2016). Improving the flexibility of dynamic prosody modification using instants of significant excitation. Circuits, Systems, and Signal Processing, 35(7), 2518\u20132543.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"9626_CR13","doi-asserted-by":"crossref","unstructured":"Govind, D., & Prasanna, S. R. M. (2012). Epoch extraction from emotional speech. In International Conference on Signal Processing and Communications (SPCOM) (pp. 1\u20135).","DOI":"10.1109\/SPCOM.2012.6289995"},{"issue":"1","key":"9626_CR12","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/s10772-012-9155-3","volume":"16","author":"D Govind","year":"2013","unstructured":"Govind, D., & Prasanna, S. M. (2013). Dynamic prosody modification using zero frequency filtered signal. International Journal of Speech Technology, 16(1), 41\u201354.","journal-title":"International Journal of Speech Technology"},{"key":"9626_CR14","doi-asserted-by":"crossref","unstructured":"Govind, D., Prasanna, S. M., & Yegnanarayana, B. (2011). Neutral to target emotion conversion using source and suprasegmental information. In Twelfth Annual Conference of the International Speech Communication Association (pp. 2969\u20132972).","DOI":"10.21437\/Interspeech.2011-743"},{"key":"9626_CR15","unstructured":"Haq, S., Jackson, P. J., & Edge, J. (2009). Speaker-dependent audio-visual emotion recognition. In AVSP (pp. 53\u201358)."},{"issue":"3","key":"9626_CR16","doi-asserted-by":"publisher","first-page":"806","DOI":"10.1109\/TASL.2011.2165944","volume":"20","author":"E Helander","year":"2011","unstructured":"Helander, E., Sil\u00e9n, H., Virtanen, T., & Gabbouj, M. (2011). Voice conversion using dynamic kernel partial least squares regression. IEEE Transactions on Audio, Speech, and Language Processing, 20(3), 806\u2013817.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9626_CR17","doi-asserted-by":"crossref","unstructured":"Hunt, A. J., & Black, A. W. (1996). Unit selection in a concatenative speech synthesis system using a large speech database. In IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings (Vol. 1, pp. 373\u2013376).","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"9626_CR19","doi-asserted-by":"crossref","unstructured":"Kadiri, S. R., & Yegnanarayana, B. (2015). Analysis of singing voice for epoch extraction using zero frequency filtering method. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 4260\u20134264).","DOI":"10.1109\/ICASSP.2015.7178774"},{"key":"9626_CR18","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1016\/j.specom.2016.11.005","volume":"86","author":"SR Kadiri","year":"2017","unstructured":"Kadiri, S. R., & Yegnanarayana, B. (2017). Epoch extraction from emotional speech using single frequency filtering approach. Speech Communication, 86, 52\u201363.","journal-title":"Speech Communication"},{"key":"9626_CR20","doi-asserted-by":"crossref","unstructured":"Koolagudi, S. G., Maity, S., Kumar, V. A., Chakrabarti, S., & Rao, K. S. (2009). IITKGP-SESC: Speech database for emotion analysis. In International Conference on Contemporary Computing (pp. 485\u2013492). Springer, Berlin.","DOI":"10.1007\/978-3-642-03547-0_46"},{"key":"9626_CR21","doi-asserted-by":"crossref","unstructured":"Luo, Z., Takiguchi, T., & Ariki, Y. (2016). Emotional voice conversion using deep neural networks with MCC and F0 features. In IEEE\/ACIS 15th International Conference on Computer and Information Science (ICIS) (pp. 1\u20135).","DOI":"10.1109\/ICIS.2016.7550889"},{"key":"9626_CR23","doi-asserted-by":"crossref","unstructured":"Ming, H., Huang, D., Xie, L., Wu, J., Dong, M., & Li, H. (2016a). Deep bidirectional LSTM modeling of timbre and prosody for emotional voice conversion. In Proceeding of INTERSPEECH","DOI":"10.21437\/Interspeech.2016-1053"},{"key":"9626_CR22","doi-asserted-by":"crossref","unstructured":"Ming, H., Huang, D., Xie, L., Zhang, S., Dong, M., & Li, H. (2016b). Exemplar-based sparse representation of timbre and prosody for voice conversion. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 5175\u20135179).","DOI":"10.1109\/ICASSP.2016.7472664"},{"issue":"4","key":"9626_CR24","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1016\/0167-6393(95)00005-9","volume":"16","author":"IR Murray","year":"1995","unstructured":"Murray, I. R., & Arnott, J. L. (1995). Implementation and testing of a system for producing emotion-by-rule in synthetic speech. Speech Communication, 16(4), 369\u2013390.","journal-title":"Speech Communication"},{"issue":"8","key":"9626_CR25","doi-asserted-by":"publisher","first-page":"1602","DOI":"10.1109\/TASL.2008.2004526","volume":"16","author":"KSR Murty","year":"2008","unstructured":"Murty, K. S. R., & Yegnanarayana, B. (2008). Epoch extraction from speech signals. IEEE Transactions on Audio, Speech, and Language Processing, 16(8), 1602\u20131613.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"9","key":"9626_CR26","doi-asserted-by":"publisher","first-page":"5265","DOI":"10.1007\/s11042-015-3039-x","volume":"75","author":"HQ Nguyen","year":"2016","unstructured":"Nguyen, H. Q., Lee, S. W., Tian, X., Dong, M., & Chng, E. S. (2016). High quality voice conversion using prosodic and high-resolution spectral features. Multimedia Tools and Applications, 75(9), 5265\u20135285.","journal-title":"Multimedia Tools and Applications"},{"key":"9626_CR28","doi-asserted-by":"crossref","unstructured":"Pravena, D., & Govind, D. (2016). Expressive speech analysis for epoch extraction using zero frequency filtering approach. In IEEE Students\u2019 Technology Symposium (TechSym) (pp. 240\u2013244).","DOI":"10.1109\/TechSym.2016.7872689"},{"issue":"2","key":"9626_CR27","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1007\/s10772-017-9407-3","volume":"20","author":"D Pravena","year":"2017","unstructured":"Pravena, D., & Govind, D. (2017). Development of simulated emotion speech database for excitation source analysis. International Journal of Speech Technology, 20(2), 327\u2013338.","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"9626_CR29","doi-asserted-by":"publisher","first-page":"323","DOI":"10.3758\/s13428-017-0873-y","volume":"50","author":"L Rachman","year":"2018","unstructured":"Rachman, L., Liuni, M., Arias, P., Lind, A., Johansson, P., Hall, L., et al. (2018). DAVID: An open-source platform for real-time transformation of infra-segmental emotional cues in running speech. Behavior Research Methods, 50(1), 323\u2013343.","journal-title":"Behavior Research Methods"},{"issue":"6","key":"9626_CR30","doi-asserted-by":"publisher","first-page":"745","DOI":"10.1016\/j.specom.2013.03.002","volume":"55","author":"KS Rao","year":"2013","unstructured":"Rao, K. S., & Vuppala, A. K. (2013). Non-uniform time scale modification using instants of significant excitation and vowel onset points. Speech Communication, 55(6), 745\u2013756.","journal-title":"Speech Communication"},{"key":"9626_CR31","doi-asserted-by":"crossref","unstructured":"Sarkar, P., Haque, A., Dutta, A. K., Reddy, G., Harikrishna, D. M., Dhara, P., & Rao, K. S. (2014). Designing prosody rule-set for converting neutral TTS speech to storytelling style speech for indian languages: Bengali, Hindi and Telugu. In 2014 Seventh International Conference on Contemporary Computing (IC3) (pp. 473\u2013477).","DOI":"10.1109\/IC3.2014.6897219"},{"key":"9626_CR32","series-title":"Affective information processing","first-page":"111","volume-title":"Expressive speech synthesis: past, present, and possible futures","author":"M Schr\u00f6der","year":"2009","unstructured":"Schr\u00f6der, M. (2009). Expressive speech synthesis: past, present, and possible futures (pp. 111\u2013126)., Affective information processing London: Springer."},{"issue":"4","key":"9626_CR33","doi-asserted-by":"publisher","first-page":"1145","DOI":"10.1109\/TASL.2006.876113","volume":"14","author":"J Tao","year":"2006","unstructured":"Tao, J., Kang, Y., & Li, A. (2006). Prosody conversion from neutral speech to emotional speech. IEEE Transactions on Audio, Speech, and Language Processing, 14(4), 1145\u20131154.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"4","key":"9626_CR34","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TASL.2006.876129","volume":"14","author":"M Theune","year":"2006","unstructured":"Theune, M., Meijs, K., Heylen, D., & Ordelman, R. (2006). Generating expressive speech for storytelling applications. IEEE Transactions on Audio, Speech, and Language Processing, 14(4), 1137\u20131144.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"8","key":"9626_CR35","doi-asserted-by":"publisher","first-page":"2222","DOI":"10.1109\/TASL.2007.907344","volume":"15","author":"T Toda","year":"2007","unstructured":"Toda, T., Black, A. W., & Tokuda, K. (2007). Voice conversion based on maximum-likelihood estimation of spectral parameter trajectory. IEEE Transactions on Audio, Speech, and Language Processing, 15(8), 2222\u20132235.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9626_CR36","doi-asserted-by":"publisher","first-page":"81883","DOI":"10.1109\/ACCESS.2019.2923003","volume":"7","author":"S Vekkot","year":"2019","unstructured":"Vekkot, S., Gupta, D., Zakariah, M., & Alotaibi, Y. A. (2019). Hybrid framework for speaker-independent emotion conversion using i-vector PLDA and neural network. IEEE Access, 7, 81883\u201381902.","journal-title":"IEEE Access"},{"key":"9626_CR37","doi-asserted-by":"crossref","unstructured":"Vekkot, S., & Tripathi, S. (2016a). Significance of glottal closure instants detection algorithms in vocal emotion conversion. In International Workshop Soft Computing Applications (pp. 462\u2013473). Springer, Cham.","DOI":"10.1007\/978-3-319-62521-8_40"},{"key":"9626_CR39","doi-asserted-by":"crossref","unstructured":"Vekkot, S., & Tripathi, S. (2016b). Inter-emotion conversion using dynamic time warping and prosody imposition. In International Symposium on Intelligent Systems Technologies and Applications (pp. 913\u2013924). Springer, Cham.","DOI":"10.1007\/978-3-319-47952-1_73"},{"key":"9626_CR38","doi-asserted-by":"crossref","unstructured":"Vekkot, S., & Tripathi, S. (2017). Vocal emotion conversion using WSOLA and linear prediction. In International Conference on Speech and Computer (pp. 777\u2013787). Springer, Cham.","DOI":"10.1007\/978-3-319-66429-3_78"},{"key":"9626_CR40","doi-asserted-by":"crossref","unstructured":"Verhelst, W., & Roelands, M. (1993). An overlap-add technique based on waveform similarity (WSOLA) for high quality time-scale modification of speech. In IEEE International Conference on Acoustics, Speech, and Signal Processing (Vol. 2, pp. 554\u2013557).","DOI":"10.1109\/ICASSP.1993.319366"},{"key":"9626_CR41","doi-asserted-by":"crossref","unstructured":"Verma, R., Sarkar, P., & Rao, K. S. (2015). Conversion of neutral speech to storytelling style speech. In Eighth International Conference on Advances in Pattern Recognition (ICAPR) (pp. 1\u20136).","DOI":"10.1109\/ICAPR.2015.7050705"},{"key":"9626_CR42","doi-asserted-by":"crossref","unstructured":"Vuppala, A. K., & Kadiri, S. R. (2014). Neutral to anger speech conversion using non-uniform duration modification. In 9th International Conference on Industrial and Information Systems (ICIIS) (pp. 1\u20134)","DOI":"10.1109\/ICIINFS.2014.7036614"},{"issue":"5","key":"9626_CR43","doi-asserted-by":"publisher","first-page":"1643","DOI":"10.1007\/s00034-015-0134-1","volume":"35","author":"HK Vydana","year":"2016","unstructured":"Vydana, H. K., Kadiri, S. R., & Vuppala, A. K. (2016). Vowel-based non-uniform prosody modification for emotion conversion. Circuits, Systems, and Signal Processing, 35(5), 1643\u20131663.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"9626_CR44","doi-asserted-by":"crossref","unstructured":"Vydana, H. K., Raju, V. V., Gangashetty, S. V., & Vuppala, A. K. (2015). Significance of emotionally significant areas of speech for emotive to neutral conversion. In International Conference on Mining Intelligence and Knowledge Exploration (pp. 287\u2013296). Springer, Cham.","DOI":"10.1007\/978-3-319-26832-3_28"},{"issue":"6","key":"9626_CR45","first-page":"1394","volume":"18","author":"CH Wu","year":"2009","unstructured":"Wu, C. H., Hsia, C. C., Lee, C. H., & Lin, M. C. (2009). Hierarchical prosody conversion using regression-based clustering for emotional speech synthesis. IEEE Transactions on Audio, Speech, and Language Processing, 18(6), 1394\u20131405.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"10","key":"9626_CR46","doi-asserted-by":"publisher","first-page":"1506","DOI":"10.1109\/TASLP.2014.2333242","volume":"22","author":"Z Wu","year":"2014","unstructured":"Wu, Z., Virtanen, T., Chng, E. S., & Li, H. (2014). Exemplar-based sparse representation with residual compensation for voice conversion. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22(10), 1506\u20131521.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"9626_CR47","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/s00034-015-0051-3","volume":"35","author":"J Yadav","year":"2016","unstructured":"Yadav, J., & Rao, K. S. (2016). Prosodic mapping using neural networks for emotion conversion in Hindi language. Circuits, Systems, and Signal Processing, 35(1), 139\u2013162.","journal-title":"Circuits, Systems, and Signal Processing"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-019-09626-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-019-09626-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-019-09626-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,27]],"date-time":"2022-09-27T06:07:11Z","timestamp":1664258831000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-019-09626-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,9]]},"references-count":47,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["9626"],"URL":"https:\/\/doi.org\/10.1007\/s10772-019-09626-5","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,9]]},"assertion":[{"value":"31 July 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 August 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 September 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}