{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T06:24:05Z","timestamp":1774419845828,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10890772","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:19Z","timestamp":1741799719000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Precisely Controllable Neural Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Paul Konstantin","family":"Krug","sequence":"first","affiliation":[{"name":"Altavo GmbH,Dresden,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christoph","family":"Wagner","sequence":"additional","affiliation":[{"name":"Altavo GmbH,Dresden,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Birkholz","sequence":"additional","affiliation":[{"name":"Technische Universit&#x00E4;t Dresden,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Timo","family":"Stich","sequence":"additional","affiliation":[{"name":"Altavo GmbH,Dresden,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"A survey on neural speech synthesis","author":"Tan","year":"2021"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref3","first-page":"8067","article-title":"Glow-TTS: A generative flow for text-to-speech via monotonic alignment search","volume":"33","author":"Kim","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref4","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","volume-title":"Proc. ICLR","author":"Ren"},{"key":"ref5","first-page":"5530","article-title":"Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech","volume-title":"Proc. ICML","author":"Kim"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-534"},{"key":"ref7","article-title":"Natural language guidance of high-fidelity text-to-speech with synthetic annotations","author":"Lyth","year":"2024"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3402088"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-1622"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2351"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1121\/1.386780"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(82)90017-6"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0060603"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2021-18"},{"key":"ref15","article-title":"Towards an articulatory-driven neural vocoder for speech synthesis","volume-title":"Proc. ISSP","author":"Georges"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10892"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095404"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096796"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2316"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832354"},{"key":"ref21","article-title":"Articulatory encodec: Vocal tract kinematics as a codec for speech","author":"Cho","year":"2024"},{"key":"ref22","first-page":"37","article-title":"Enhanced area functions for noise source modeling in the vocal tract","volume-title":"Proc. ISSP","author":"Birkholz"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2545"},{"key":"ref24","first-page":"51","article-title":"Efficient exploration of articulatory dimensions","author":"Krug","year":"2022","journal-title":"Studien-texte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2022"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2023.01.003"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3264454"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2173"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1972.tb02651.x"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2011-685"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2410"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1121\/10.0005876"},{"key":"ref32","article-title":"Neural discrete representation learning","volume":"30","author":"Van Den Oord","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3015"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(00)00063-7"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1121\/1.3037222"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1584"},{"key":"ref40","first-page":"17 022","article-title":"Hifi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume":"33","author":"Kong","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746806"},{"key":"ref42","article-title":"From scenario to segment. the controlled elicitation, transcription, segmentation and labelling of spontaneous speech","volume":"29","author":"Kohler","year":"1995","journal-title":"Arbeitsberichte des Instituts f\u00fcr Phonetik und digitale Sprachverarbeitung der Christian-Albrechts-Universit\u00e4t Kiel (AIPUK)"},{"key":"ref43","first-page":"1","article-title":"From the acoustic data collection to a labelled speech data bank of spoken standard German","volume":"32","author":"Kohler","year":"1997","journal-title":"Arbeitsberichte des Instituts f\u00fcr Phonetik und digitale Sprachverarbeitung der Universit\u00e4t Kiel (AIPUK)"},{"issue":"9","key":"ref44","first-page":"341","article-title":"Praat, a system for doing phonetics by computer","volume":"5","author":"Boersma","year":"2001","journal-title":"Glot. Int."},{"key":"ref45","first-page":"28 492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"Proc. ICML","author":"Radford"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2826"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10890772.pdf?arnumber=10890772","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:25:00Z","timestamp":1774416300000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10890772\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10890772","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}