{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:47:43Z","timestamp":1776887263016,"version":"3.51.2"},"reference-count":36,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Lett."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/lsp.2024.3440956","type":"journal-article","created":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T17:42:34Z","timestamp":1723225354000},"page":"2115-2119","source":"Crossref","is-referenced-by-count":6,"title":["Very Low Complexity Speech Synthesis Using Framewise Autoregressive GAN (FARGAN) With Pitch Prediction"],"prefix":"10.1109","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9883-6927","authenticated-orcid":false,"given":"Jean-Marc","family":"Valin","sequence":"first","affiliation":[{"name":"Xiph.Org Foundation, Jaffrey, NH, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4820-8446","authenticated-orcid":false,"given":"Ahmed","family":"Mustafa","sequence":"additional","affiliation":[{"name":"Amazon Web Services, Palo Alto, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9684-1567","authenticated-orcid":false,"given":"Jan","family":"B\u00fcthe","sequence":"additional","affiliation":[{"name":"Amazon Web Services, Palo Alto, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"WaveNet: A. generative model for raw audio","author":"van den Oord","year":"2016"},{"key":"ref2","article-title":"SampleRNN: An unconditional end-to-end neural audio generation model","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mehri","year":"2017"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462529"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-11017"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2019.8937165"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2939"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2022.3164361"},{"key":"ref9","first-page":"2410","article-title":"Efficient neural audio synthesis","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Kalchbrenner","year":"2018"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682804"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.2.270"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-5616"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref14","article-title":"MelGAN: Generative adversarial networks for conditional waveform synthesis","volume":"32","author":"Kumar","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref15","first-page":"12","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","author":"Kong","year":"2020"},{"key":"ref16","article-title":"BigVGAN: A universal neural vocoder with large-scale training","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lee","year":"2022"},{"key":"ref17","first-page":"2021","article-title":"Chunked autoregressive GAN for conditional waveform synthesis","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Morrison"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096007"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1163\/9789004658820"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1016"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413605"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448332"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.304"},{"key":"ref24","first-page":"6532","article-title":"Open-source multi-speaker corpora of the English accents in the British isles","volume-title":"Proc. Lang. Resour. Eval. Conf.","author":"Demirsahin","year":"2020"},{"key":"ref25","first-page":"21","article-title":"Open-source high quality speech datasets for Basque, Catalan and Galician","volume-title":"Proc. 1st Joint Workshop Spoken Lang. Technol. Under-resourced Lang. & Collaboration Comput. Under-Resourced Lang.","author":"Kjartansson","year":"2020"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/sltu.2018-14"},{"key":"ref27","first-page":"6504","article-title":"Crowdsourcing latin American Spanish for low-resource text-to-speech","volume-title":"Proc. 12th Lang. Resour. Eval. Conf.","author":"Guevara-Rukoz","year":"2020"},{"key":"ref28","article-title":"Open-source multi-speaker speech corpora for building Gujarati, Kannada, Malayalam, Marathi, Tamil and Telugu speech synthesis systems","volume-title":"Proc. 12th Lang. Resour. Eval. Conf.","author":"He","year":"2020"},{"key":"ref29","first-page":"6328","article-title":"Burmese speech corpus, finite-state text normalization and pronunciation grammars with an application to text-to-speech","volume-title":"Proc. 12th Lang. Resour. Eval. Conf.","author":"Oo","year":"2020"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2017-1139"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-1096"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1599"},{"key":"ref33","first-page":"1509","article-title":"A pitch tracking corpus with evaluation on multipitch tracking scenario","volume-title":"Proc. Annu. Conf. Int. Speech Commun. Assoc.","author":"Pirker","year":"2011"},{"key":"ref34","article-title":"Recommendation P.862.2: Wideband extension to recommendation p.862 for the assessment of wideband telephone networks and speech codecs","year":"2005"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414901"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-2665"}],"container-title":["IEEE Signal Processing Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/97\/10380231\/10632624.pdf?arnumber=10632624","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T10:26:58Z","timestamp":1725013618000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10632624\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/lsp.2024.3440956","relation":{},"ISSN":["1070-9908","1558-2361"],"issn-type":[{"value":"1070-9908","type":"print"},{"value":"1558-2361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}