{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T11:36:17Z","timestamp":1770464177218,"version":"3.49.0"},"reference-count":23,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Lett."],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/lsp.2023.3313513","type":"journal-article","created":{"date-parts":[[2023,9,11]],"date-time":"2023-09-11T19:22:18Z","timestamp":1694460138000},"page":"1262-1266","source":"Crossref","is-referenced-by-count":8,"title":["Direct Text to Speech Translation System Using Acoustic Units"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3505-0249","authenticated-orcid":false,"given":"Victoria","family":"Mingote","sequence":"first","affiliation":[{"name":"ViVoLab - Arag&#x00F3;n Institute for Engineering Research (I3A), University of Zaragoza, Zaragoza, Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3142-0708","authenticated-orcid":false,"given":"Pablo","family":"Gimeno","sequence":"additional","affiliation":[{"name":"ViVoLab - Arag&#x00F3;n Institute for Engineering Research (I3A), University of Zaragoza, Zaragoza, Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4391-5203","authenticated-orcid":false,"given":"Luis","family":"Vicente","sequence":"additional","affiliation":[{"name":"ViVoLab - Arag&#x00F3;n Institute for Engineering Research (I3A), University of Zaragoza, Zaragoza, Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3182-1085","authenticated-orcid":false,"given":"Sameer","family":"Khurana","sequence":"additional","affiliation":[{"name":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2653-1008","authenticated-orcid":false,"given":"Antoine","family":"Laurent","sequence":"additional","affiliation":[{"name":"LIUM, Le Mans University, Le Mans, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5755-2653","authenticated-orcid":false,"given":"Jarod","family":"Duret","sequence":"additional","affiliation":[{"name":"LIA, Avignon University, Avignon, France"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","first-page":"6691","article-title":"CVSS corpus and massively multilingual speech-to-speech translation","author":"jia","year":"0","journal-title":"Proc 13th Lang Resour Eval Conf"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.391"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-475"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414460"},{"key":"ref11","first-page":"15748","article-title":"Multimodal and multilingual embeddings for large-scale speech mining","author":"duquenne","year":"0","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414641"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.63"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00343"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-143"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.80"},{"key":"ref16","first-page":"17022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","author":"kong","year":"0","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096149"},{"key":"ref18","first-page":"1336","article-title":"On generative spoken language modeling from raw audio","volume":"9","author":"lakhotia","year":"2021","journal-title":"Trans Assoc Comput Linguistics"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1951"},{"key":"ref7","first-page":"10120","article-title":"Translatotron 2: High-quality direct speech-to-speech translation with voice preservation","author":"jia","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.235"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.68"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3192714"},{"key":"ref5","article-title":"mSLAM: Massively multilingual joint pre-training for speech and text","author":"bapna","year":"2022"}],"container-title":["IEEE Signal Processing Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/97\/10036333\/10246390.pdf?arnumber=10246390","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T18:17:34Z","timestamp":1696270654000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10246390\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/lsp.2023.3313513","relation":{},"ISSN":["1070-9908","1558-2361"],"issn-type":[{"value":"1070-9908","type":"print"},{"value":"1558-2361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}