{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T22:40:29Z","timestamp":1764715229526,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031779602"},{"type":"electronic","value":"9783031779619"}],"license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-77961-9_16","type":"book-chapter","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T14:02:33Z","timestamp":1732197753000},"page":"219-229","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["End-to-End Speech Synthesis for the Serbian Language Based on Tacotron"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3707-0286","authenticated-orcid":false,"given":"Tijana","family":"Nosek","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0511-6729","authenticated-orcid":false,"given":"Sini\u0161a","family":"Suzi\u0107","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3426-3277","authenticated-orcid":false,"given":"Milan","family":"Se\u010dujski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2517-3728","authenticated-orcid":false,"given":"Vuk","family":"Stanojev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3026-8086","authenticated-orcid":false,"given":"Darko","family":"Pekar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4558-9918","authenticated-orcid":false,"given":"Vlado","family":"Deli\u0107","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"16_CR1","unstructured":"Tan, X., Qin, T., Soong, F., Liu, T.Y.: A survey on neural speech synthesis. arXiv preprint arXiv:2106.15561 (2021)"},{"issue":"1","key":"16_CR2","first-page":"4368036","volume":"2019","author":"V Deli\u0107","year":"2019","unstructured":"Deli\u0107, V., et al.: Speech technology progress based on new machine learning paradigm. Comput. Intell. Neurosci. 2019(1), 4368036 (2019)","journal-title":"Comput. Intell. Neurosci."},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Tan, X., et al.: Naturalspeech: end-to-end text-to-speech synthesis with human-level quality. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3356232"},{"key":"16_CR4","unstructured":"Van Den Oord, A., et al.: Wavenet: A generative model for raw audio. arXiv preprint arXiv:1609.03499 12 (2016)"},{"key":"16_CR5","unstructured":"Ren, Y., et al.: Fastspeech 2: Fast and high-quality end-to-end text to speech. arXiv preprint arXiv:2006.04558 (2020)"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: Tacotron: Towards end-to-end speech synthesis. arXiv preprint arXiv:1703.10135 (2017)","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Shen, J., et al.: Natural TTS synthesis by conditioning wavenet on MEL spectrogram predictions. In: 2018 IEEE International Conference On Acoustics, Speech And Signal Processing (ICASSP), pp. 4779\u20134783. IEEE (2018)","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"16_CR8","unstructured":"Ping, W., et al.: Deep voice 3: Scaling text-to-speech with convolutional sequence learning. arXiv preprint arXiv:1710.07654 (2017)"},{"key":"16_CR9","unstructured":"Ren, Y., et al.: Fastspeech: fast, robust and controllable text to speech. Adv. Neural Inf. Proc. Syst. 32 (2019)"},{"key":"16_CR10","unstructured":"Mu, Z., Yang, X., Dong, Y.: Review of end-to-end speech synthesis technology based on deep learning. arXiv preprint arXiv:2104.09995 (2021)"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Mishev, K., Karovska Ristovska, A., Trajanov, D., Eftimov, T., Simjanoska, M.: MAKEDONKA: applied deep learning model for text-to-speech synthesis in Macedonian language. Appl. Sci. 10(19), 6882 (2020)","DOI":"10.3390\/app10196882"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Sofronievski, B., et al.: Macedonian speech synthesis for assistive technology applications. In: 2022 30th European Signal Processing Conference (EUSIPCO), pp. 1183\u20131187. IEEE (2022)","DOI":"10.23919\/EUSIPCO55093.2022.9909778"},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Secujski, M.S.: Obtaining prosodic information from text in Serbian language. In: EUROCON 2005-The International Conference on Computer as a Tool, vol. 2, pp. 1654\u20131657. IEEE (2005)","DOI":"10.1109\/EURCON.2005.1630288"},{"key":"16_CR14","first-page":"17022","volume":"33","author":"J Kong","year":"2020","unstructured":"Kong, J., Kim, J., Bae, J.: Hifi-gan: generative adversarial networks for efficient and high fidelity speech synthesis. Adv. Neural. Inf. Process. Syst. 33, 17022\u201317033 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR15","unstructured":"NVIDIA. Tacotron 2. GitHub repository, https:\/\/github.com\/NVIDIA\/tacotron2. Accessed 23 May 2024"},{"issue":"1","key":"16_CR16","doi-asserted-by":"publisher","first-page":"32","DOI":"10.5937\/telfor1701032D","volume":"9","author":"T Deli\u0107","year":"2017","unstructured":"Deli\u0107, T., Se\u010dujski, M., Suzi\u0107, S.: A review of Serbian parametric speech synthesis based on deep neural networks. Telfor J. 9(1), 32\u201337 (2017)","journal-title":"Telfor J."},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Suzi\u0107, S., Pekar, D., Se\u010dujski, M., Nosek, T., Deli\u0107, V.: HiFi-GAN based Text-to-Speech Synthesis in Serbian. In: 2022 30th European Signal Processing Conference (EUSIPCO), pp. 2231\u20132235. IEEE (2022)","DOI":"10.23919\/EUSIPCO55093.2022.9909548"},{"issue":"4","key":"16_CR18","doi-asserted-by":"publisher","first-page":"434","DOI":"10.3897\/jucs.2020.023","volume":"26","author":"M Secujski","year":"2020","unstructured":"Secujski, M., Pekar, D., Suzic, S., Smirnov, A., Nosek, T.V.: Speaker\/style-dependent neural network speech synthesis based on speaker\/style embedding. J. Univers. Comput. Sci. 26(4), 434\u2013453 (2020)","journal-title":"J. Univers. Comput. Sci."},{"issue":"11","key":"16_CR19","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"16_CR20","unstructured":"Keith Ito. The LJ Speech Dataset. https:\/\/keithito.com\/LJ-Speech-Dataset. Accessed 23 July 2024"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-77961-9_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,9]],"date-time":"2025-01-09T16:05:40Z","timestamp":1736438740000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-77961-9_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"ISBN":["9783031779602","9783031779619"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-77961-9_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"22 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belgrade","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Serbia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom2024.ftn.uns.ac.rs\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}