{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T12:00:30Z","timestamp":1769774430629,"version":"3.49.0"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032159830","type":"print"},{"value":"9783032159847","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-15984-7_37","type":"book-chapter","created":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T20:33:56Z","timestamp":1769718836000},"page":"547-561","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["The Impact of\u00a0Prosodic Segmentation on\u00a0Speech Synthesis of\u00a0Spontaneous Speech"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6378-4648","authenticated-orcid":false,"given":"Julio","family":"Galdino","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8817-2063","authenticated-orcid":false,"given":"Sidney","family":"Leal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7191-9296","authenticated-orcid":false,"given":"Leticia","family":"de Souza","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4344-1109","authenticated-orcid":false,"given":"Rodrigo","family":"Lima","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9867-3101","authenticated-orcid":false,"given":"Antonio","family":"Moreira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5647-0891","authenticated-orcid":false,"suffix":"Jr.","given":"Arnaldo","family":"Candido","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0866-0535","authenticated-orcid":false,"suffix":"Jr.","given":"Miguel","family":"Oliveira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0160-7173","authenticated-orcid":false,"given":"Edresson","family":"Casanova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5108-2630","authenticated-orcid":false,"given":"Sandra","family":"Alu\u00edsio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,30]]},"reference":[{"key":"37_CR1","first-page":"4489","volume":"2023","author":"M Bain","year":"2023","unstructured":"Bain, M., Huh, J., Han, T., Zisserman, A.: WhisperX: time-accurate speech transcription of long-form audio. INTERSPEECH 2023, 4489\u20134493 (2023)","journal-title":"INTERSPEECH"},{"key":"37_CR2","unstructured":"Barbosa, P.A.: Pros\u00f3dia. Par\u00e1bola (2019)"},{"key":"37_CR3","doi-asserted-by":"publisher","unstructured":"Biron, T., et al.: Automatic detection of prosodic boundaries in spontaneous speech. PLoS ONE 16(5), 1\u201321 (2021). https:\/\/doi.org\/10.1371\/journal.pone.0250969","DOI":"10.1371\/journal.pone.0250969"},{"key":"37_CR4","unstructured":"Boersma, P., Weenink, D.: Praat: doing phonetics by computer [Computer program]. Version 6.3.10 (2023). http:\/\/www.praat.org\/"},{"key":"37_CR5","doi-asserted-by":"crossref","unstructured":"Casanova, E., et\u00a0al.: Xtts: a massively multilingual zero-shot text-to-speech model. In: Proceedings of Interspeech 2024, pp. 4978\u20134982 (2024)","DOI":"10.21437\/Interspeech.2024-2016"},{"key":"37_CR6","doi-asserted-by":"publisher","unstructured":"Chan, C., Kuang, J.: Exploring the accuracy of prosodic encodings in state-of-the-art text-to-speech models. In: Speech Prosody 2024, pp. 27\u201331 (2024). https:\/\/doi.org\/10.21437\/SpeechProsody.2024-6","DOI":"10.21437\/SpeechProsody.2024-6"},{"issue":"3","key":"37_CR7","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1109\/89.668817","volume":"6","author":"SH Chen","year":"1998","unstructured":"Chen, S.H., Hwang, S.H., Wang, Y.R.: An RNN-based prosodic information synthesizer for mandarin text-to-speech. IEEE Trans. Speech Audio Process. 6(3), 226\u2013239 (1998). https:\/\/doi.org\/10.1109\/89.668817","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"37_CR8","doi-asserted-by":"crossref","unstructured":"He, H., et al.: Emilia: an extensive, multilingual, and diverse speech dataset for large-scale speech generation, pp. 885\u2013890. IEEE (2024)","DOI":"10.1109\/SLT61566.2024.10832365"},{"key":"37_CR9","unstructured":"Hirst, D.: Analyse tier praat script (2012)"},{"key":"37_CR10","doi-asserted-by":"crossref","unstructured":"Hussain, S., et al.: Koel-tts: enhancing llm based speech generation with preference alignment and classifier free guidance. arXiv:2502.05236 (2025)","DOI":"10.18653\/v1\/2025.emnlp-main.1076"},{"key":"37_CR11","doi-asserted-by":"publisher","unstructured":"Jokisch, O., Mixdorff, H., Kruschke, H., Kordon, U.: Learning the parameters of quantitative prosody models. In: 6th International Conference on Spoken Language Processing (ICSLP 2000), pp. 645\u2013648 (2000). https:\/\/doi.org\/10.21437\/ICSLP.2000-160","DOI":"10.21437\/ICSLP.2000-160"},{"key":"37_CR12","unstructured":"Kim, H., Kim, S., Yoon, S.: Guided-tts: a diffusion model for text-to-speech via classifier guidance. In: International Conference on Machine Learning, pp. 11119\u201311133. PMLR (2022)"},{"key":"37_CR13","unstructured":"Leal, S.E., et al.: MuPe life stories dataset: spontaneous speech in Brazilian Portuguese with a case study evaluation on ASR bias against speakers groups and topic modeling. In: Rambow, O., Wanner, L., Apidianaki, M., Al-Khalifa, H., Eugenio, B.D., Schockaert, S. (eds.) Proceedings of the 31st International Conference on Computational Linguistics, pp. 6076\u20136087. ACL, Abu Dhabi, UAE, January 2025. https:\/\/aclanthology.org\/2025.coling-main.407\/"},{"key":"37_CR14","doi-asserted-by":"publisher","unstructured":"Li, W., et al.: Spontaneous style text-to-speech synthesis with controllable spontaneous behaviors based on language models. In: Interspeech 2024, pp. 1785\u20131789 (2024). https:\/\/doi.org\/10.21437\/Interspeech.2024-1989","DOI":"10.21437\/Interspeech.2024-1989"},{"key":"37_CR15","doi-asserted-by":"publisher","unstructured":"Liu, S., et al.: How pause duration influences impressions of english speech: Comparison between native and non-native speakers. Front. Psychol. 13 (2022). https:\/\/doi.org\/10.3389\/fpsyg.2022.778018, https:\/\/www.frontiersin.org\/articles\/10.3389\/fpsyg.2022.778018","DOI":"10.3389\/fpsyg.2022.778018"},{"key":"37_CR16","unstructured":"Liu, W., et al.: Voxpopulitts: a large-scale multilingual tts corpus for zero-shot speech generation. In: Proceedings of the 31st International Conference on Computational Linguistics, pp. 10293\u201310297 (2025)"},{"key":"37_CR17","unstructured":"Matos, A., Ara\u00fajo, G., Junior, A.C., Ponti, M.: Accent classification is challenging but pre-training helps: a case study with novel Brazilian Portuguese datasets. In: Gamallo, P., Claro, D., Teixeira, A., Real, L., Garcia, M., Oliveira, H.G., Amaro, R. (eds.) Proceedings of the 16th International Conference on Computational Processing of Portuguese, vol. 1, pp. 364\u2013373. ACL, Santiago de Compostela, Galicia\/Spain, March 2024. https:\/\/aclanthology.org\/2024.propor-1.37\/"},{"key":"37_CR18","doi-asserted-by":"publisher","unstructured":"de\u00a0Moraes, J.A.: The pitch accents in brazilian portuguese: analysis by synthesis. In: Speech Prosody 2008, pp. 389\u2013397 (2008). https:\/\/doi.org\/10.21437\/SpeechProsody.2008-4","DOI":"10.21437\/SpeechProsody.2008-4"},{"key":"37_CR19","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1007\/978-3-031-40498-6_17","volume-title":"Text, Speech, and Dialogue","author":"FS Oliveira","year":"2023","unstructured":"Oliveira, F.S., Casanova, E., Junior, A.C., Soares, A.S., Galv\u00e3o Filho, A.R.: Cml-tts: a multilingual dataset for speech synthesis in low-resource languages. In: Ek\u0161tein, K., P\u00e1rtl, F., Konop\u00edk, M. (eds.) Text, Speech, and Dialogue, pp. 188\u2013199. Springer Nature Switzerland, Cham (2023)"},{"key":"37_CR20","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., Mcleavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision. In: Krause, A., Brunskill, E., Cho, K., Engelhardt, B., Sabato, S., Scarlett, J. (eds.) Proceedings of the 40th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0202, pp. 28492\u201328518. PMLR, 23\u201329 July 2023"},{"key":"37_CR21","unstructured":"Raso, T., Mello, H.: O Corpus c-oral-brasil. Editora UFMG, Belo Horizonte (2012)"},{"key":"37_CR22","unstructured":"Ren, Y., et al.: Fastspeech 2: fast and high-quality end-to-end text to speech (2022). https:\/\/arxiv.org\/abs\/2006.04558"},{"key":"37_CR23","doi-asserted-by":"crossref","unstructured":"Sagisaka, Y., Campbell, N., Higuchi, N.: Computing Prosody: Computational Models for Processing Spontaneous Speech. Springer Science & Business Media (1997)","DOI":"10.1007\/978-1-4612-2258-3"},{"key":"37_CR24","doi-asserted-by":"publisher","unstructured":"Santos, V.G., et al.: CORAA NURC-SP minimal corpus: a manually annotated corpus of Brazilian Portuguese spontaneous speech. In: Proceedings of IberSPEECH 2022, pp. 161\u2013165 (2022). https:\/\/doi.org\/10.21437\/IberSPEECH.2022-33","DOI":"10.21437\/IberSPEECH.2022-33"},{"issue":"1","key":"37_CR25","doi-asserted-by":"publisher","first-page":"514","DOI":"10.1121\/1.418114","volume":"101","author":"M Swerts","year":"1997","unstructured":"Swerts, M.: Prosodic features at discourse boundaries of different strength. J. Acoust. Soc. Am. 101(1), 514\u2013521 (1997)","journal-title":"J. Acoust. Soc. Am."},{"key":"37_CR26","doi-asserted-by":"publisher","unstructured":"Viola, I.C., Madureira, S.: The roles of pause in speech expression. In: Speech Prosody 2008, pp. 721\u2013724 (2008). https:\/\/doi.org\/10.21437\/SpeechProsody.2008-160","DOI":"10.21437\/SpeechProsody.2008-160"},{"key":"37_CR27","doi-asserted-by":"crossref","unstructured":"Xie, T., Rong, Y., Zhang, P., Wang, W., Liu, L.: Towards controllable speech synthesis in the era of large language models: a survey (2025). https:\/\/arxiv.org\/abs\/2412.06602","DOI":"10.18653\/v1\/2025.emnlp-main.40"}],"container-title":["Lecture Notes in Computer Science","Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-15984-7_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T20:34:02Z","timestamp":1769718842000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-15984-7_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032159830","9783032159847"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-15984-7_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"30 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"BRACIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazilian Conference on Intelligent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Fortaleza-CE","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bracis2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/bracis.sbc.org.br\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}