{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T11:48:09Z","timestamp":1742989689232,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030602758"},{"type":"electronic","value":"9783030602765"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60276-5_56","type":"book-chapter","created":{"date-parts":[[2020,10,4]],"date-time":"2020-10-04T07:02:44Z","timestamp":1601794964000},"page":"581-591","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Uncertainty of Phone Voicing and Its Impact on Speech Synthesis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3149-2330","authenticated-orcid":false,"given":"Daniel","family":"Tihelka","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4001-9289","authenticated-orcid":false,"given":"Zden\u011bk","family":"Hanzl\u00ed\u010dek","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6194-7826","authenticated-orcid":false,"given":"Mark\u00e9ta","family":"J\u016fzov\u00e1","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,29]]},"reference":[{"key":"56_CR1","doi-asserted-by":"publisher","first-page":"3657","DOI":"10.1016\/j.sigpro.2006.02.039","volume":"12","author":"M \u017delezn\u00fd","year":"2006","unstructured":"\u017delezn\u00fd, M., Kr\u0148\u0131oul, Z., C\u00edsa\u0159, P., Matou\u0161ek, J.: Design, implementation and evaluation of the Czech realistic audio-visual speech synthesis. Sig. Process. 12, 3657\u20133673 (2006)","journal-title":"Sig. Process."},{"key":"56_CR2","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1007\/978-3-030-00794-2_48","volume-title":"Text, Speech, and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2018","unstructured":"Hanzl\u00ed\u010dek, Z., V\u00edt, J., Tihelka, D.: WaveNet-based speech synthesis applied to Czech: a comparison with the traditional synthesis methods. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2018. LNCS (LNAI), vol. 11107, pp. 445\u2013452. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-00794-2_48"},{"key":"56_CR3","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1007\/978-3-030-27947-9_31","volume-title":"Text, Speech, and Dialogue","author":"Z Hanzl\u00ed\u010dek","year":"2019","unstructured":"Hanzl\u00ed\u010dek, Z., V\u00edt, J., Tihelka, D.: LSTM-based speech segmentation for TTS synthesis. In: Ek\u0161tein, K. (ed.) TSD 2019. LNCS (LNAI), vol. 11697, pp. 361\u2013372. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-27947-9_31"},{"key":"56_CR4","doi-asserted-by":"crossref","unstructured":"Hunt, A.J., Black, A.W.: Unit selection in a concatenative speech synthesis system using a large speech database. In: ICASSP 1996, Proceedings of International Conference on Acoustics, Speech, and Signal Processing, IEEE, Atlanta, Georgia, vol. 1, pp. 373\u2013376 (1996)","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"56_CR5","doi-asserted-by":"crossref","unstructured":"Kala, J., Matou\u0161ek, J.: Very fast unit selection using Viterbi search with zero-concatenation-cost chains. In: ICASSP 2014, Proceedings of International Conference on Acoustics, Speech, and Signal Processing, IEEE, Florence, Italy, pp. 2569\u20132573 (2014)","DOI":"10.1109\/ICASSP.2014.6854064"},{"key":"56_CR6","unstructured":"Kalchbrenner, N., et al.: Efficient neural audio synthesis. arXiv preprint arXiv:1802.08435 (2018)"},{"key":"56_CR7","doi-asserted-by":"crossref","unstructured":"Leg\u00e1t, M., Matou\u0161ek, J., Tihelka, D.: A robust multi-phase pitch-mark detection algorithm. In: Interspeech, vol. 2007, pp. 1641\u20131644 (2007)","DOI":"10.21437\/Interspeech.2007-457"},{"key":"56_CR8","doi-asserted-by":"crossref","unstructured":"Lorenzo-Trueba, J., et al.: Towards achieving robust universal neural vocoding, pp. 181\u2013185 (2019)","DOI":"10.21437\/Interspeech.2019-1424"},{"key":"56_CR9","volume-title":"Principles of Phonetic Segmentation","author":"P Macha\u010d","year":"2013","unstructured":"Macha\u010d, P., Skarnitzl, R.: Principles of Phonetic Segmentation. Epocha, Prague (2013)"},{"key":"56_CR10","unstructured":"Matou\u0161ek, J., Leg\u00e1t, M.: Is unit selection aware of audible artifacts? In: SSW 2013, Proceedings of the 8th Speech Synthesis Workshop, ISCA, Barcelona, Spain, pp. 267\u2013271 (2013)"},{"key":"56_CR11","doi-asserted-by":"crossref","unstructured":"Matou\u0161ek, J., Romportl, J.: Automatic pitch-synchronous phonetic segmentation. In: INTERSPEECH 2008, Proceedings of 9th Annual Conference of International Speech Communication Association, ISCA, Brisbane, Australia, pp. 1626\u20131629 (2008)","DOI":"10.21437\/Interspeech.2008-452"},{"key":"56_CR12","doi-asserted-by":"crossref","unstructured":"Matou\u0161ek, J., Tihelka, D.: Using extreme gradient boosting to detect glottal closure instants in speech signal. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, Great Britain, pp. 6515\u20136519 (2019)","DOI":"10.1109\/ICASSP.2019.8683889"},{"key":"56_CR13","unstructured":"van den Oord, A., et al.: WaveNet: a generative model for raw audio. arXiv preprint arXiv:1609.03499 (2016)"},{"key":"56_CR14","doi-asserted-by":"crossref","unstructured":"Romportl, J.: Structural data-driven prosody model for TTS synthesis. In: Proceedings of the Speech Prosody 2006 Conference, pp. 549\u2013552. TUDpress, Dresden (2006)","DOI":"10.21437\/SpeechProsody.2006-121"},{"key":"56_CR15","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1007\/11551874_48","volume-title":"Text, Speech and Dialogue","author":"J Romportl","year":"2005","unstructured":"Romportl, J., Matou\u0161ek, J.: Formal prosodic structures and their application in NLP. In: Matou\u0161ek, V., Mautner, P., Pavelka, T. (eds.) TSD 2005. LNCS (LNAI), vol. 3658, pp. 371\u2013378. Springer, Heidelberg (2005). https:\/\/doi.org\/10.1007\/11551874_48"},{"key":"56_CR16","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511816338","volume-title":"Text-to-Speech Synthesis","author":"P Taylor","year":"2009","unstructured":"Taylor, P.: Text-to-Speech Synthesis, 1st edn. Cambridge University Press, New York (2009)","edition":"1"},{"key":"56_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"442","DOI":"10.1007\/978-3-642-40585-3_56","volume-title":"Text, Speech, and Dialogue","author":"D Tihelka","year":"2013","unstructured":"Tihelka, D., Gr\u016fber, M., Hanzl\u00ed\u010dek, Z.: Robust methodology for TTS enhancement evaluation. In: Habernal, I., Matou\u0161ek, V. (eds.) TSD 2013. LNCS (LNAI), vol. 8082, pp. 442\u2013449. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-40585-3_56"},{"key":"56_CR18","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"676","DOI":"10.1007\/978-3-319-99579-3_69","volume-title":"Speech and Computer","author":"D Tihelka","year":"2018","unstructured":"Tihelka, D., Hanzl\u00ed\u010dek, Z., J\u016fzov\u00e1, M., Matou\u0161ek, J.: First steps towards hybrid speech synthesis in Czech TTS system ARTIC. In: Karpov, A., Jokisch, O., Potapova, R. (eds.) SPECOM 2018. LNCS (LNAI), vol. 11096, pp. 676\u2013686. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-99579-3_69"},{"key":"56_CR19","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1007\/978-3-030-00794-2_40","volume-title":"Text, Speech, and Dialogue","author":"D Tihelka","year":"2018","unstructured":"Tihelka, D., Hanzl\u00ed\u010dek, Z., J\u016fzov\u00e1, M., V\u00edt, J., Matou\u0161ek, J., Gr\u016fber, M.: Current state of text-to-speech system ARTIC: a\u00a0decade of research on the field of speech technologies. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2018. LNCS (LNAI), vol. 11107, pp. 369\u2013378. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-00794-2_40"},{"key":"56_CR20","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/978-3-319-10816-2_55","volume-title":"Text, Speech and Dialogue","author":"D Tihelka","year":"2014","unstructured":"Tihelka, D., Matou\u0161ek, J., Hanzl\u00ed\u010dek, Z.: Modelling F0 dynamics in\u00a0unit\u00a0selection\u00a0based\u00a0speech\u00a0synthesis. In: Sojka, P., Hor\u00e1k, A., Kope\u010dek, I., Pala, K. (eds.) TSD 2014. LNCS (LNAI), vol. 8655, pp. 457\u2013464. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10816-2_55"},{"key":"56_CR21","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"508","DOI":"10.1007\/978-3-540-74628-7_66","volume-title":"Text, Speech and Dialogue","author":"D Tihelka","year":"2007","unstructured":"Tihelka, D., Matou\u0161ek, J., Kala, J.: Quality deterioration factors in unit selection speech synthesis. In: Matou\u0161ek, V., Mautner, P. (eds.) TSD 2007. LNCS (LNAI), vol. 4629, pp. 508\u2013515. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74628-7_66"},{"key":"56_CR22","doi-asserted-by":"crossref","unstructured":"Tihelka, D., Romportl, J.: Exploring automatic similarity measures for unit selection tuning. In: INTERSPEECH 2009, Proceedings of 10th Annual Conference of International Speech Communication Association, ISCA, Brighton, Great Britain, pp. 736\u2013739 (2009)","DOI":"10.21437\/Interspeech.2009-250"},{"key":"56_CR23","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/978-3-030-27947-9_26","volume-title":"Text, Speech, and Dialogue","author":"J V\u00edt","year":"2019","unstructured":"V\u00edt, J., Hanzl\u00ed\u010dek, Z., Matou\u0161ek, J.: Czech speech synthesis with generative neural vocoder. In: Ek\u0161tein, K. (ed.) TSD 2019. LNCS (LNAI), vol. 11697, pp. 307\u2013315. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-27947-9_26"},{"key":"56_CR24","volume-title":"Handbook of Standards and Resources for Spoken Language Systems","author":"JC Wells","year":"1997","unstructured":"Wells, J.C.: SAMPA computer readable phonetic alphabet. In: Gibbon, D., Moore, R., Winski, R. (eds.) Handbook of Standards and Resources for Spoken Language Systems. Mouton de Gruyter, Berlin and New York (1997)"},{"key":"56_CR25","unstructured":"Wu, Z., Watts, O., King, S.: Merlin: an open source neural network speech synthesis system. In: 9th ISCA Speech Synthesis Workshop (2016), pp. 218\u2013223, September 2016"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60276-5_56","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,15]],"date-time":"2024-08-15T12:51:37Z","timestamp":1723726297000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-60276-5_56"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030602758","9783030602765"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60276-5_56","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Russia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"160","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"65","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic SPECOM 2020 was held as a virtual event","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}