{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T08:10:01Z","timestamp":1777450201274,"version":"3.51.4"},"publisher-location":"Cham","reference-count":16,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030007935","type":"print"},{"value":"9783030007942","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-00794-2_48","type":"book-chapter","created":{"date-parts":[[2018,9,7]],"date-time":"2018-09-07T19:50:24Z","timestamp":1536349824000},"page":"445-452","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["WaveNet-Based Speech Synthesis Applied to Czech"],"prefix":"10.1007","author":[{"given":"Zden\u011bk","family":"Hanzl\u00ed\u010dek","sequence":"first","affiliation":[]},{"given":"Jakub","family":"V\u00edt","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Tihelka","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,8]]},"reference":[{"issue":"3","key":"48_CR1","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1109\/MSP.2014.2359987","volume":"32","author":"ZH Ling","year":"2015","unstructured":"Ling, Z.H., Kang, S.Y., Zen, H., et al.: Deep learning for acoustic modeling in parametric speech generation: a systematic review of existing techniques and future trends. IEEE Signal Process. Mag. 32(3), 35\u201352 (2015)","journal-title":"IEEE Signal Process. Mag."},{"issue":"11","key":"48_CR2","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1016\/j.specom.2009.04.004","volume":"51","author":"H Zen","year":"2009","unstructured":"Zen, H., Tokuda, K., Black, A.W.: Statistical parametric speech synthesis. Speech Commun. 51(11), 1039\u20131064 (2009)","journal-title":"Speech Commun."},{"key":"48_CR3","unstructured":"Zen, H.: Acoustic modeling in statistical parametric speech synthesis - from HMM to LSTM-RNN. In: Proceedings of MLSLP (2015)"},{"key":"48_CR4","unstructured":"van den Oord, A., Dieleman, S., Zen, H., Simonyan, K., et al.: WaveNet: a generative model for raw audio. CoRR abs\/1609.03499 (2016). http:\/\/arxiv.org\/abs\/1609.03499"},{"key":"48_CR5","doi-asserted-by":"crossref","unstructured":"Kobayashi, K., Hayashi, T., Tamamori, A., Toda, T.: Statistical voice conversion with WaveNet-based waveform generation. In: Proceedings of Interspeech 2017, pp. 1138\u20131142 (2017)","DOI":"10.21437\/Interspeech.2017-986"},{"key":"48_CR6","doi-asserted-by":"crossref","unstructured":"Hayashi, T., Tamamori, A., Kobayashi, K., et al.: An investigation of multi-speaker training for WaveNet vocoder. In: Proceedings of ASRU 2017, pp. 712\u2013718 (2017)","DOI":"10.1109\/ASRU.2017.8269007"},{"key":"48_CR7","doi-asserted-by":"crossref","unstructured":"Tamamori, A., Hayashi, T., Kobayashi, K., et al.: Speaker-dependent WaveNet vocoder. In: Proceedings of Interspeech 2017, pp. 1118\u20131122 (2017)","DOI":"10.21437\/Interspeech.2017-314"},{"key":"48_CR8","unstructured":"Arik, S.O., Chrzanowski, M., Coates, A., et al.: Deep voice: real-time neural text-to-speech. CoRR abs\/1702.07825 (2017). https:\/\/arxiv.org\/abs\/1702.07825"},{"key":"48_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1007\/11846406_55","volume-title":"Text, Speech and Dialogue","author":"J Matou\u0161ek","year":"2006","unstructured":"Matou\u0161ek, J., Tihelka, D., Romportl, J.: Current state of Czech text-to-speech system ARTIC. In: Sojka, P., Kope\u010dek, I., Pala, K. (eds.) TSD 2006. LNCS, vol. 4188, pp. 439\u2013446. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11846406_55"},{"key":"48_CR10","unstructured":"Matous\u0161ek, J., Leg\u00e1t, M., Tihelka, D.: Is unit selection aware of audible artifacts? In: Proceedings of SSW8, pp. 267\u2013271. ISCA (2013)"},{"key":"48_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1007\/978-3-642-15760-8_37","volume-title":"Text, Speech and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2010","unstructured":"Hanzl\u00ed\u010dek, Z.: Czech HMM-Based Speech Synthesis. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2010. LNCS, vol. 6231, pp. 291\u2013298. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15760-8_37"},{"key":"48_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1007\/978-3-319-64206-2_40","volume-title":"Text, Speech, and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2017","unstructured":"Hanzl\u00ed\u010dek, Z.: Optimal number of states in HMM-based speech synthesis. In: Ek\u0161tein, K., Matou\u0161ek, V. (eds.) TSD 2017. LNCS, vol. 10415, pp. 353\u2013361. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-64206-2_40"},{"key":"48_CR13","unstructured":"Matous\u0161ek, J., Tihelka, D., Romportl, J.: Building of a speech corpus optimised for unit selection TTS synthesis. In: Proceedings of LREC (2008)"},{"key":"48_CR14","unstructured":"Method for the subjective assessment of intermediate quality level of coding systems. ITU Recommendation ITU-R BS.1534-2 (2014)"},{"key":"48_CR15","doi-asserted-by":"crossref","unstructured":"Henter, G.E., Merritt, T., Shannon, M., et al.: Measuring the perceptual effects of modelling assumptions in speech synthesis using stimuli constructed from repeated natural speech. In: Proceedings of Interspeech 2014, pp. 1504\u20131508 (2014)","DOI":"10.21437\/Interspeech.2014-361"},{"key":"48_CR16","unstructured":"van den Oord, A., Li, Y., Babuschkin, I., et al.: Parallel WaveNet: fast high-fidelity speech synthesis. CoRR abs\/1711.10433 (2017). https:\/\/arxiv.org\/abs\/1711.10433"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-00794-2_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T16:38:01Z","timestamp":1709829481000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-00794-2_48"}},"subtitle":["A Comparison with the Traditional Synthesis Methods"],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030007935","9783030007942"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-00794-2_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"8 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}