{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T18:08:35Z","timestamp":1742926115767,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811501203"},{"type":"electronic","value":"9789811501210"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-981-15-0121-0_51","type":"book-chapter","created":{"date-parts":[[2019,9,12]],"date-time":"2019-09-12T23:03:21Z","timestamp":1568329401000},"page":"645-656","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Control Emotion Intensity for LSTM-Based Expressive Speech Synthesis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8842-7329","authenticated-orcid":false,"given":"Xiaolian","family":"Zhu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2815-8494","authenticated-orcid":false,"given":"Liumeng","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,9,13]]},"reference":[{"key":"51_CR1","unstructured":"Sotelo, J., et al.: Char2wav: end-to-end speech synthesis (2017)"},{"key":"51_CR2","unstructured":"Wang, Y., et al.: Tacotron: a fully end-to-end text-to-speech synthesis model. arXiv preprint (2017)"},{"key":"51_CR3","unstructured":"Skerry-Ryan, R., et al.: Towards end-to-end prosody transfer for expressive speech synthesis with tacotron. arXiv preprint arXiv:1803.09047 (2018)"},{"key":"51_CR4","unstructured":"Arik, S.O., et al.: Deep voice: real-time neural text-to-speech. arXiv preprint arXiv:1702.07825 (2017)"},{"key":"51_CR5","unstructured":"Li, N., Liu, S., Liu, Y., Zhao, S., Liu, M., Zhou, M.: Close to human quality TTS with transformer. arXiv preprint arXiv:1809.08895 (2018)"},{"key":"51_CR6","unstructured":"Hunt, A.J., Black, A.W.: Unit selection in a concatenative speech synthesis system using a large speech database. In: 1996 IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings, ICASSP 1996, vol. 1, pp. 373\u2013376. IEEE (1996)"},{"key":"51_CR7","doi-asserted-by":"crossref","unstructured":"Bulut, M., Narayanan, S.S., Syrdal, A.K.: Expressive speech synthesis using a concatenative synthesizer. In: Seventh International Conference on Spoken Language Processing (2002)","DOI":"10.21437\/ICSLP.2002-389"},{"issue":"4","key":"51_CR8","doi-asserted-by":"publisher","first-page":"1099","DOI":"10.1109\/TASL.2006.876123","volume":"14","author":"JF Pitrelli","year":"2006","unstructured":"Pitrelli, J.F., Bakis, R., Eide, E.M., Fernandez, R., Hamza, W., Picheny, M.A.: The IBM expressive text-to-speech synthesis system for American English. IEEE Trans. Audio Speech Lang. Process. 14(4), 1099\u20131108 (2006)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"51_CR9","doi-asserted-by":"crossref","unstructured":"Cabral, J.P., Saam, C., Vanmassenhove, E., Bradley, S., Haider, F.: The ADAPT entry to the Blizzard challenge 2016. In: 2016 Proceedings of the Blizzard Challenge (2016)","DOI":"10.21437\/Blizzard.2016-3"},{"issue":"3","key":"51_CR10","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1093\/ietisy\/e88-d.3.502","volume":"88","author":"J Yamagishi","year":"2005","unstructured":"Yamagishi, J., Onishi, K., Masuko, T., Kobayashi, T.: Acoustic modeling of speaking styles and emotional expressions in HMM-based speech synthesis. IEICE Trans. Inf. Syst. 88(3), 502\u2013509 (2005)","journal-title":"IEICE Trans. Inf. Syst."},{"issue":"2","key":"51_CR11","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1016\/j.specom.2012.09.003","volume":"55","author":"T Nose","year":"2013","unstructured":"Nose, T., Kobayashi, T.: An intuitive style control technique in HMM-based expressive speech synthesis using subjective style intensity and multiple-regression global variance model. Speech Commun. 55(2), 347\u2013357 (2013)","journal-title":"Speech Commun."},{"key":"51_CR12","unstructured":"Lorenzo-Trueba, J., Barra-Chicote, R., Watts, O., Montero, J.M.: Towards speaking style transplantation in speech synthesis. In: Eighth ISCA Workshop on Speech Synthesis (2013)"},{"issue":"1","key":"51_CR13","doi-asserted-by":"publisher","first-page":"292","DOI":"10.1016\/j.csl.2015.03.008","volume":"34","author":"J Lorenzo-Trueba","year":"2015","unstructured":"Lorenzo-Trueba, J., Barra-Chicote, R., San-Segundo, R., Ferreiros, J., Yamagishi, J., Montero, J.M.: Emotion transplantation through adaptation in HMM-based speech synthesis. Comput. Speech Lang. 34(1), 292\u2013307 (2015)","journal-title":"Comput. Speech Lang."},{"issue":"11","key":"51_CR14","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1016\/j.specom.2009.04.004","volume":"51","author":"H Zen","year":"2009","unstructured":"Zen, H., Tokuda, K., Black, A.W.: Statistical parametric speech synthesis. Speech Commun. 51(11), 1039\u20131064 (2009)","journal-title":"Speech Commun."},{"key":"51_CR15","unstructured":"Zen, H.: Acoustic modeling in statistical parametric speech synthesis-from HMM to LSTM-RNN. In: Proceedings of the MLSLP (2015)"},{"key":"51_CR16","unstructured":"Ze, H., Senior, A., Schuster, M.: Statistical parametric speech synthesis using deep neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 7962\u20137966. IEEE (2013)"},{"key":"51_CR17","doi-asserted-by":"crossref","unstructured":"Fan, Y., Qian, Y., Xie, F.-L., Soong, F.K.: TTS synthesis with bidirectional LSTM based recurrent neural networks. In: Fifteenth Annual Conference of the International Speech Communication Association (2014)","DOI":"10.21437\/Interspeech.2014-443"},{"key":"51_CR18","doi-asserted-by":"crossref","unstructured":"Uria, B., Murray, I., Renals, S., Valentini-Botinhao, C., Bridle, J.: Modelling acoustic feature dependencies with artificial neural networks: Trajectory-RNADE. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4465\u20134469. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178815"},{"key":"51_CR19","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xie, L., Chen, X., Lou, X., Zhu, X., Tan, X.: Controlling expressivity using input codes in neural network based TTS. In: 2018 First Asian Conference on Affective Computing and Intelligent Interaction (ACII Asia), pp. 1\u20136. IEEE (2018)","DOI":"10.1109\/ACIIAsia.2018.8470327"},{"key":"51_CR20","doi-asserted-by":"crossref","unstructured":"Li, B., Zen, H.: Multi-language multi-speaker acoustic modeling for LSTM-RNN based statistical parametric speech synthesis. In: INTERSPEECH 2016, pp. 2468\u20132472 (2016)","DOI":"10.21437\/Interspeech.2016-172"},{"key":"51_CR21","doi-asserted-by":"crossref","unstructured":"Huang, Z., Tang, J., Xue, S., Dai, L.: Speaker adaptation of RNN-BLSTM for speech recognition based on speaker code. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5305\u20135309. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472690"},{"key":"51_CR22","doi-asserted-by":"crossref","unstructured":"Luong, H.-T., Takaki, S., Henter, G.E., Yamagishi, J.: Adapting and controlling DNN-based speech synthesis using input codes. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4905\u20134909. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7953089"},{"key":"51_CR23","doi-asserted-by":"crossref","unstructured":"Watts, O., Wu, Z., King, S.: Sentence-level control vectors for deep neural network speech synthesis. In: Sixteenth Annual Conference of the International Speech Communication Association (2015)","DOI":"10.21437\/Interspeech.2015-128"},{"key":"51_CR24","unstructured":"Wang, Y., et al.: Style tokens: unsupervised style modeling, control and transfer in end-to-end speech synthesis. arXiv preprint arXiv:1803.09017 (2018)"},{"key":"51_CR25","volume-title":"Multidimensional Scaling","author":"TF Cox","year":"2001","unstructured":"Cox, T.F., Cox, M.A.: Multidimensional Scaling. Chapman and Hall\/CRC, New York (2001)"},{"issue":"3\u20134","key":"51_CR26","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/S0167-6393(98)00085-5","volume":"27","author":"H Kawahara","year":"1999","unstructured":"Kawahara, H., Masuda-Katsuse, I., De Cheveigne, A.: Restructuring speech representations using a pitch-adaptive time-frequency smoothing and an instantaneous-frequency-based F0 extraction: possible role of a repetitive structure in sounds. Speech Commun. 27(3\u20134), 187\u2013207 (1999)","journal-title":"Speech Commun."}],"container-title":["Communications in Computer and Information Science","Data Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-15-0121-0_51","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,23]],"date-time":"2024-07-23T10:13:13Z","timestamp":1721729593000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-15-0121-0_51"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9789811501203","9789811501210"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-15-0121-0_51","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"13 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPCSEE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference of Pioneering Computer Scientists, Engineers and Educators","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guilin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpcsee2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2019.icpcsee.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}