{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T08:09:49Z","timestamp":1777450189469,"version":"3.51.4"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030007935","type":"print"},{"value":"9783030007942","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-00794-2_40","type":"book-chapter","created":{"date-parts":[[2018,9,7]],"date-time":"2018-09-07T19:50:24Z","timestamp":1536349824000},"page":"369-378","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":32,"title":["Current State of Text-to-Speech System ARTIC: A\u00a0Decade of Research on the Field of Speech Technologies"],"prefix":"10.1007","author":[{"given":"Daniel","family":"Tihelka","sequence":"first","affiliation":[]},{"given":"Zden\u011bk","family":"Hanzl\u00ed\u010dek","sequence":"additional","affiliation":[]},{"given":"Mark\u00e9ta","family":"J\u016fzov\u00e1","sequence":"additional","affiliation":[]},{"given":"Jakub","family":"V\u00edt","sequence":"additional","affiliation":[]},{"given":"Jind\u0159ich","family":"Matou\u0161ek","sequence":"additional","affiliation":[]},{"given":"Martin","family":"Gr\u016fber","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,8]]},"reference":[{"key":"40_CR1","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1007\/978-3-642-15760-8_37","volume-title":"Text, Speech and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2010","unstructured":"Hanzl\u00ed\u010dek, Z.: Czech HMM-based speech synthesis. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2010. LNCS (LNAI), vol. 6231, pp. 291\u2013298. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15760-8_37"},{"key":"40_CR2","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/978-3-642-23538-2_14","volume-title":"Text, Speech and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2011","unstructured":"Hanzl\u00ed\u010dek, Z.: Czech HMM-based speech synthesis: experiments with model adaptation. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2011. LNCS (LNAI), vol. 6836, pp. 107\u2013114. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-23538-2_14"},{"key":"40_CR3","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1007\/978-3-319-64206-2_40","volume-title":"Text, Speech, and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2017","unstructured":"Hanzl\u00ed\u010dek, Z.: Optimal Number of States in HMM-Based Speech Synthesis. In: Ek\u0161tein, K., Matou\u0161ek, V. (eds.) TSD 2017. LNCS (LNAI), vol. 10415, pp. 353\u2013361. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-64206-2_40"},{"key":"40_CR4","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/978-3-642-40585-3_32","volume-title":"Text, Speech, and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2013","unstructured":"Hanzl\u00ed\u010dek, Z., Matou\u0161ek, J., Tihelka, D.: Experiments on reducing footprint of unit selection TTS system. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2013. LNCS (LNAI), vol. 8082, pp. 249\u2013256. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40585-3_32"},{"key":"40_CR5","series-title":"Topics in Intelligent Engineering and Informatics","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1007\/978-3-642-34422-0_14","volume-title":"Beyond Artificial Intelligence","author":"Z Hanzl\u00ed\u010dek","year":"2012","unstructured":"Hanzl\u00ed\u010dek, Z., Romportl, J., Matou\u0161ek, J.: Voice conservation: towards creating a speech-aid system for total laryngectomees. In: Kelemen, J., Romportl, J., Zackova, E. (eds.) Beyond Artificial Intelligence. TIEI, vol. 4, pp. 203\u2013212. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-34422-0_14"},{"key":"40_CR6","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1007\/978-3-030-00794-2_48","volume-title":"Text, Speech, and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2018","unstructured":"Hanzl\u00ed\u010dek, Z., V\u00edt, J., Tihelka, D.: WaveNet-based speech synthesis applied to Czech: a comparison with the traditional synthesis methods. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2018. LNAI, vol. 11107, pp. 445\u2013452. Springer, Cham (2018)"},{"key":"40_CR7","doi-asserted-by":"crossref","unstructured":"Ircing, P., Romportl, J., Loose, Z.: Audiovisual interface for Czech spoken dialogue system. In: Proceedings of ICSP 2010, pp. 526\u2013529. IEEE, Beijing (2010)","DOI":"10.1109\/ICOSP.2010.5656088"},{"key":"40_CR8","unstructured":"ITU Recommendation BS.1534-2: Method for the subjective assessment of intermediate quality level of coding systems. Technical report, International Telecommunication Union (2014)"},{"key":"40_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"398","DOI":"10.1007\/978-3-319-10816-2_48","volume-title":"Text, Speech and Dialogue","author":"M J\u016fzov\u00e1","year":"2014","unstructured":"J\u016fzov\u00e1, M., Tihelka, D.: Minimum text corpus selection for limited domain speech synthesis. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2014. LNCS (LNAI), vol. 8655, pp. 398\u2013407. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10816-2_48"},{"key":"40_CR10","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1007\/978-3-319-10816-2_49","volume-title":"Text, Speech and Dialogue","author":"M J\u016fzov\u00e1","year":"2014","unstructured":"J\u016fzov\u00e1, M., Tihelka, D.: Tuning limited domain speech synthesis using general text-to-speech system. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2014. LNCS (LNAI), vol. 8655, pp. 408\u2013415. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10816-2_49"},{"key":"40_CR11","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/978-3-319-43958-7_24","volume-title":"Speech and Computer","author":"M J\u016fzov\u00e1","year":"2016","unstructured":"J\u016fzov\u00e1, M., Tihelka, D., Matou\u0161ek, J.: Designing high-coverage multi-level text corpus for non-professional-voice conservation. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS (LNAI), vol. 9811, pp. 207\u2013215. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-43958-7_24"},{"key":"40_CR12","unstructured":"J\u016fzov\u00e1, M., Tihelka, D., Matou\u0161ek, J., Hanzl\u00ed\u010dek, Z.: Voice conservation and TTS system for people facing total laryngectomy. In: Proceedings of Interspeech 2017, pp. 3425\u20133426. ISCA, Stockholm (2017)"},{"key":"40_CR13","doi-asserted-by":"crossref","unstructured":"Kala, J., Matou\u0161ek, J.: Very fast unit selection using Viterbi search with zero-concatenation-cost chains. In: Proceedings of ICASSP 2014, pp. 2569\u20132573. IEEE, Florence (2014)","DOI":"10.1109\/ICASSP.2014.6854064"},{"key":"40_CR14","doi-asserted-by":"crossref","unstructured":"Kr\u0148oul, Z., \u017delezn\u00fd, M.: A development of Czech talking head. In: Proceedings of Interspeech (ICSLP) 2008, Brisbane, Australia, pp. 2326\u20132329 (2008)","DOI":"10.21437\/Interspeech.2008-593"},{"key":"40_CR15","unstructured":"Leg\u00e1t, M., Matou\u0161ek, J.: Pitch contours as predictors of audible concatenation artifacts. In: Proceedings of WCECS 2011, San Francisco, USA, pp. 525\u2013529 (2011)"},{"key":"40_CR16","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"364","DOI":"10.1007\/978-3-642-23538-2_46","volume-title":"Text, Speech and Dialogue","author":"J Matou\u0161ek","year":"2011","unstructured":"Matou\u0161ek, J., Hanzl\u00ed\u010dek, Z., Campr, M., Kr\u0148oul, Z., Campr, P., Gr\u016fber, M.: Web-based system for automatic reading of technical documents for vision impaired students. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2011. LNCS (LNAI), vol. 6836, pp. 364\u2013371. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-23538-2_46"},{"key":"40_CR17","unstructured":"Matou\u0161ek, J., Leg\u00e1t, M.: Is unit selection aware of audible artifacts? In: Proceedings of SSW8, ISCA, Barcelona, pp. 267\u2013271 (2013)"},{"key":"40_CR18","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1007\/978-3-540-74628-7_43","volume-title":"Text, Speech and Dialogue","author":"J Matou\u0161ek","year":"2007","unstructured":"Matou\u0161ek, J., Romportl, J.: Recording and annotation of speech corpus for Czech unit selection speech synthesis. In: Matou\u0161ek, V., Mautner, P. (eds.) TSD 2007. LNCS (LNAI), vol. 4629, pp. 326\u2013333. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74628-7_43"},{"key":"40_CR19","doi-asserted-by":"crossref","unstructured":"Matou\u0161ek, J., Tihelka, D.: Annotation errors detection in TTS corpora. In: Proceedings of Interspeech 2013, pp. 1511\u20131515. ISCA, Lyon (2013)","DOI":"10.21437\/Interspeech.2013-305"},{"key":"40_CR20","doi-asserted-by":"crossref","unstructured":"Matou\u0161ek, J., Tihelka, D.: Voting detector: a combination of anomaly detectors to reveal annotation errors in TTS corpora. In: Proceedings of Interspeech 2016, pp. 1560\u20131564. ISCA, San Francisco (2016)","DOI":"10.21437\/Interspeech.2016-442"},{"key":"40_CR21","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1007\/11846406_55","volume-title":"Text, Speech and Dialogue","author":"J Matou\u0161ek","year":"2006","unstructured":"Matou\u0161ek, J., Tihelka, D., Romportl, J.: Current state of czech text-to-speech system ARTIC. In: Sojka, P., Kope\u010dek, I., Pala, K. (eds.) TSD 2006. LNCS (LNAI), vol. 4188, pp. 439\u2013446. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11846406_55"},{"key":"40_CR22","unstructured":"Matou\u0161ek, J., Tihelka, D., Romportl, J.: Building of a speech corpus optimised for unit selection TTS synthesis. In: Proceedings of LREC 2008, pp. 1296\u20131299. ELRA, Marrakech (2008)"},{"key":"40_CR23","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1007\/978-3-642-32790-2_55","volume-title":"Text, Speech and Dialogue","author":"J Matou\u0161ek","year":"2012","unstructured":"Matou\u0161ek, J., Tihelka, D., \u0160m\u00eddl, L.: On the impact of annotation errors on unit-selection speech synthesis. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2012. LNCS (LNAI), vol. 7499, pp. 456\u2013463. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-32790-2_55"},{"key":"40_CR24","unstructured":"van den Oord, A., et al.: WaveNet: a generative model for raw audio. CoRR abs\/1609.03499 (2016)"},{"key":"40_CR25","unstructured":"van den Oord, A., et al.: Parallel WaveNet: fast high-fidelity speech synthesis. CoRR abs\/1711.10433 (2017)"},{"issue":"2","key":"40_CR26","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1109\/TASL.2012.2221460","volume":"21","author":"Y Qian","year":"2013","unstructured":"Qian, Y., Soong, F.K., Yan, Z.J.: A unified trajectory tiling approach to high quality speech rendering. IEEE Trans. Audio Speech Lang. Process. 21(2), 280\u2013290 (2013)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"40_CR27","doi-asserted-by":"crossref","unstructured":"Romportl, J.: Structural data-driven prosody model for TTS synthesis. In: Proceedings of the Speech Prosody 2006, pp. 549\u2013552. TUDpress, Dresden (2006)","DOI":"10.21437\/SpeechProsody.2006-121"},{"key":"40_CR28","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1007\/11551874_48","volume-title":"Text, Speech and Dialogue","author":"J Romportl","year":"2005","unstructured":"Romportl, J., Matou\u0161ek, J.: Formal prosodic structures and their application in NLP. In: Matou\u0161ek, V., Mautner, P., Pavelka, T. (eds.) TSD 2005. LNCS (LNAI), vol. 3658, pp. 371\u2013378. Springer, Heidelberg (2005). https:\/\/doi.org\/10.1007\/11551874_48"},{"key":"40_CR29","unstructured":"Romportl, J., Zovato, E., Santos, R., Ircing, P., Rela\u00f1o, J.G., Danieli, M.: Application of expressive TTS synthesis in an advanced ECA system. In: Proceedings of SSW7, pp. 120\u2013125. ISCA, Kyoto (2010)"},{"key":"40_CR30","unstructured":"Stanislav, P., \u0160m\u00eddl, L., \u0160vec, J.: An automatic training tool for air traffic control training. In: Proceedings of Interspeech 2016, pp. 782\u2013783. ISCA, San Francisco (2016)"},{"key":"40_CR31","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511816338","volume-title":"Text-to-Speech Synthesis","author":"P Taylor","year":"2009","unstructured":"Taylor, P.: Text-to-Speech Synthesis, 1st edn. Cambridge University Press, New York (2009)","edition":"1"},{"key":"40_CR32","doi-asserted-by":"crossref","unstructured":"Tihelka, D.: Symbolic prosody driven unit selection for highly natural synthetic speech. In: Proceedings of Interspeech 2005 - Eurospeech, pp. 2525\u20132528. ISCA, Lisboa (2005)","DOI":"10.21437\/Interspeech.2005-786"},{"key":"40_CR33","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"442","DOI":"10.1007\/978-3-642-40585-3_56","volume-title":"Text, Speech, and Dialogue","author":"D Tihelka","year":"2013","unstructured":"Tihelka, D., Gr\u016fber, M., Hanzl\u00ed\u010dek, Z.: Robust methodology for TTS enhancement evaluation. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2013. LNCS (LNAI), vol. 8082, pp. 442\u2013449. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40585-3_56"},{"key":"40_CR34","doi-asserted-by":"publisher","first-page":"676","DOI":"10.1007\/978-3-319-99579-3_69","volume-title":"Speech and Computer","author":"Daniel Tihelka","year":"2018","unstructured":"Tihelka, D., Hanzl\u00ed\u010dek, Z., J\u016fzov\u00e1, M., Matou\u0161ek, J.: First steps towards hybrid speech synthesis in Czech TTS system ARTIC. In: SPECOM 2018 (2018, submitted for review)"},{"key":"40_CR35","doi-asserted-by":"crossref","unstructured":"Tihelka, D., Kala, J., Matou\u0161ek, J.: Enhancements of Viterbi search for fast unit selection synthesis. In: Proceedings of Interspeech 2010, pp. 174\u2013177. ISCA, Makuhari (2010)","DOI":"10.21437\/Interspeech.2010-78"},{"key":"40_CR36","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"508","DOI":"10.1007\/978-3-540-74628-7_66","volume-title":"Text, Speech and Dialogue","author":"D Tihelka","year":"2007","unstructured":"Tihelka, D., Matou\u0161ek, J., Kala, J.: Quality deterioration factors in unit selection speech synthesis. In: Matou\u0161ek, V., Mautner, P. (eds.) TSD 2007. LNCS (LNAI), vol. 4629, pp. 508\u2013515. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74628-7_66"},{"key":"40_CR37","unstructured":"Tihelka, D., Stanislav, P.: ARTIC for assistive technologies: transformation to resource-limited hardware. In: Proceedings of WCECS 2011, pp. 581\u2013584. IANG, San Francisco (2011)"},{"key":"40_CR38","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1007\/978-3-642-40585-3_22","volume-title":"Text, Speech, and Dialogue","author":"J V\u00edt","year":"2013","unstructured":"V\u00edt, J., Matou\u0161ek, J.: Concatenation artifact detection trained from listeners evaluations. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2013. LNCS (LNAI), vol. 8082, pp. 169\u2013176. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40585-3_22"},{"key":"40_CR39","doi-asserted-by":"crossref","unstructured":"V\u00edt, J., Matou\u0161ek, J.: On the analysis of training data for WaveNet-based speech synthesis. In: Proceedings of ICASSP 2018, IEEE, Calgary (2018)","DOI":"10.1109\/ICASSP.2018.8461960"},{"key":"40_CR40","unstructured":"Zen, H.: Acoustic modeling in statistical parametric speech synthesis - from HMM to LSTM-RNN. In: Proceedings of MLSLP (2015, invited paper)"},{"key":"40_CR41","doi-asserted-by":"publisher","first-page":"3657","DOI":"10.1016\/j.sigpro.2006.02.039","volume":"12","author":"M \u017delezn\u00fd","year":"2006","unstructured":"\u017delezn\u00fd, M., Kr\u0148oul, Z., C\u00edsa\u0159, P., Matou\u0161ek, J.: Design, implementation and evaluation of the Czech realistic audio-visual speech synthesis. Sig. Process. 12, 3657\u20133673 (2006)","journal-title":"Sig. Process."}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-00794-2_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T16:37:53Z","timestamp":1709829473000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-00794-2_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030007935","9783030007942"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-00794-2_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"8 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}