{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:58:11Z","timestamp":1740099491725,"version":"3.37.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030260606"},{"type":"electronic","value":"9783030260613"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-26061-3_21","type":"book-chapter","created":{"date-parts":[[2019,8,8]],"date-time":"2019-08-08T19:03:54Z","timestamp":1565291034000},"page":"201-208","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["LSTM-Based Kazakh Speech Synthesis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8399-8379","authenticated-orcid":false,"given":"Arman","family":"Kaliyev","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,7,24]]},"reference":[{"key":"21_CR1","doi-asserted-by":"publisher","unstructured":"An, S., Ling, Z., Dai, L.: Emotional statistical parametric speech synthesis using LSTM-RNNS. In: 2017 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), pp. 1613\u20131616, December 2017. \n                    https:\/\/doi.org\/10.1109\/APSIPA.2017.8282282","DOI":"10.1109\/APSIPA.2017.8282282"},{"key":"21_CR2","unstructured":"Berment, V.: Methods to computerize \u201clittle equipped\u201d languages and groups of languages. Theses, Universit\u00e9 Joseph-Fourier - Grenoble I, May 2004. \n                    https:\/\/tel.archives-ouvertes.fr\/tel-00006313"},{"issue":"4","key":"21_CR3","first-page":"467","volume":"18","author":"PF Brown","year":"1992","unstructured":"Brown, P.F., Pietra, V.J.D., de Souza, P.V., Lai, J.C., Mercer, R.L.: Class-based n-gram models of natural language. Comput. Linguist. 18(4), 467\u2013479 (1992)","journal-title":"Comput. Linguist."},{"key":"21_CR4","unstructured":"Fan, Y., Qian, Y., Xie, F., Soong, F.K.: TTS synthesis with bidirectional LSTM based recurrent neural networks. In: INTERSPEECH 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, 14\u201318 September 2014, pp. 1964\u20131968 (2014). \n                    http:\/\/www.isca-speech.org\/archive\/interspeech_2014\/i14_1964.html"},{"key":"21_CR5","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"741","DOI":"10.1007\/978-3-319-66429-3_74","volume-title":"Speech and Computer","author":"A Kaliyev","year":"2017","unstructured":"Kaliyev, A., Rybin, S.V., Matveev, Y.: The pausing method based on brown clustering and word embedding. In: Karpov, A., Potapova, R., Mporas, I. (eds.) SPECOM 2017. LNCS (LNAI), vol. 10458, pp. 741\u2013747. Springer, Cham (2017). \n                    https:\/\/doi.org\/10.1007\/978-3-319-66429-3_74"},{"key":"21_CR6","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1007\/978-3-319-99579-3_29","volume-title":"Speech and Computer","author":"A Kaliyev","year":"2018","unstructured":"Kaliyev, A., Rybin, S.V., Matveev, Y.N.: Phoneme duration prediction for Kazakh language. In: Karpov, A., Jokisch, O., Potapova, R. (eds.) SPECOM 2018. LNCS (LNAI), vol. 11096, pp. 274\u2013280. Springer, Cham (2018). \n                    https:\/\/doi.org\/10.1007\/978-3-319-99579-3_29"},{"key":"21_CR7","doi-asserted-by":"publisher","unstructured":"Kaliyev, A., Rybin, S.V., Matveev, Y.N., Kaziyeva, N., Burambayeva, N.: Modeling pause for the synthesis of kazakh speech. In: Proceedings of the Fourth International Conference on Engineering & MIS 2018, ICEMIS 2018, pp. 1:1\u20131:4. ACM, New York (2018). \n                    https:\/\/doi.org\/10.1145\/3234698.3234699","DOI":"10.1145\/3234698.3234699"},{"key":"21_CR8","unstructured":"Karpov, A., Verkhodanova, V.: Speech technologies for under-resourced languages of the world. Voprosy Jazykoznanija 2015, pp. 117\u2013135, January 2015"},{"key":"21_CR9","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/978-3-319-23132-7_3","volume-title":"Speech and Computer","author":"O Khomitsevich","year":"2015","unstructured":"Khomitsevich, O., Mendelev, V., Tomashenko, N., Rybin, S., Medennikov, I., Kudubayeva, S.: A bilingual Kazakh-Russian system for automatic speech recognition and synthesis. In: Ronzhin, A., Potapova, R., Fakotakis, N. (eds.) SPECOM 2015. LNCS (LNAI), vol. 9319, pp. 25\u201333. Springer, Cham (2015). \n                    https:\/\/doi.org\/10.1007\/978-3-319-23132-7_3"},{"key":"21_CR10","unstructured":"Krauwer, S.: The basic language resource kit (BLARK) as the first milestone for the language resources roadmap. In: Proceedings of SPECOM 2003, pp. 8\u201315 (2003)"},{"key":"21_CR11","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.specom.2016.09.001","volume":"84","author":"M Morise","year":"2016","unstructured":"Morise, M.: D4C, a band-aperiodicity estimator for high-quality speech synthesis. Speech Commun. 84, 57\u201365 (2016). \n                    https:\/\/doi.org\/10.1016\/j.specom.2016.09.001\n                    \n                  . \n                    http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0167639316300413","journal-title":"Speech Commun."},{"issue":"7","key":"21_CR12","doi-asserted-by":"publisher","first-page":"1877","DOI":"10.1587\/transinf.2015EDP7457","volume":"99","author":"M Morise","year":"2016","unstructured":"Morise, M., Yokomori, F., Ozawa, K.: World: a vocoder-based high-quality speech synthesis system for real-time applications. IEICE Trans. Inf. Syst. 99(7), 1877\u20131884 (2016). \n                    https:\/\/doi.org\/10.1587\/transinf.2015EDP7457","journal-title":"IEICE Trans. Inf. Syst."},{"key":"21_CR13","unstructured":"Parlikar, A., Black, A.W.: A grammar based approach to style specific phrase prediction. In: Interspeech 2011, 12th Annual Conference of the International Speech Communication Association, Florence, Italy, 27\u201331 August 2011, pp. 2149\u20132152 (2011). \n                    http:\/\/www.isca-speech.org\/archive\/interspeech_2011\/i11_2149.html"},{"key":"21_CR14","unstructured":"Salmenova, A.: Prosodic design of syntagmas and phonetic correlates of excretion. Ph.D. thesis, Saint Petersburg State University, Saint Petersburg, Russia, December 1984. (in Russian)"},{"key":"21_CR15","doi-asserted-by":"publisher","unstructured":"Sarkar, P., Rao, K.S.: Data-driven pause prediction for speech synthesis in storytelling style speech. In: Twenty First National Conference on Communications, NCC 2015, Mumbai, India, 27 February 1 March 2015, pp. 1\u20135 (2015). \n                    https:\/\/doi.org\/10.1109\/NCC.2015.7084924","DOI":"10.1109\/NCC.2015.7084924"},{"key":"21_CR16","unstructured":"Skerry-Ryan, R.J., et al.: Towards end-to-end prosody transfer for expressive speech synthesis with tacotron. CoRR abs\/1803.09047 (2018). \n                    http:\/\/arxiv.org\/abs\/1803.09047"},{"key":"21_CR17","unstructured":"Sotelo, J., et al.: Char2Wav: end-to-end speech synthesis. In: International Conference on Learning Representations (Workshop Track), April 2017"},{"key":"21_CR18","unstructured":"Stratos, K., Kim, D., Collins, M., Hsu, D.: A spectral algorithm for learning class-based n-gram models of natural language. In: Proceedings of the Thirtieth Conference on Uncertainty in Artificial Intelligence, UAI 2014, pp. 762\u2013771. AUAI Press, Arlington (2014). \n                    http:\/\/dl.acm.org\/citation.cfm?id=3020751.3020830"},{"key":"21_CR19","unstructured":"Taigman, Y., Wolf, L., Polyak, A., Nachmani, E.: Voice synthesis for in-the-wild speakers via a phonological loop. CoRR abs\/1707.06588 (2017). \n                    http:\/\/arxiv.org\/abs\/1707.06588"},{"key":"21_CR20","doi-asserted-by":"publisher","unstructured":"Zen, H., Sak, H.: Unidirectional long short-term memory recurrent neural network with recurrent output layer for low-latency speech synthesis. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2015, South Brisbane, Queensland, Australia, 19\u201324 April 2015, pp. 4470\u20134474 (2015). \n                    https:\/\/doi.org\/10.1109\/ICASSP.2015.7178816","DOI":"10.1109\/ICASSP.2015.7178816"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-26061-3_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,8]],"date-time":"2019-08-08T19:06:36Z","timestamp":1565291196000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-26061-3_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030260606","9783030260613"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-26061-3_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"24 July 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Istanbul","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turkey","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 August 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/specom.nw.ru\/2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"86","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"57","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}