{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:29:13Z","timestamp":1775230153690,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,4]]},"DOI":"10.1109\/icassp49357.2023.10096509","type":"proceedings-article","created":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T17:28:30Z","timestamp":1683307710000},"page":"1-5","source":"Crossref","is-referenced-by-count":61,"title":["Audiodec: An Open-Source Streaming High-Fidelity Neural Audio Codec"],"prefix":"10.1109","author":[{"given":"Yi-Chiao","family":"Wu","sequence":"first","affiliation":[{"name":"Meta Reality Labs Research,USA"}]},{"given":"Israel D.","family":"Gebru","sequence":"additional","affiliation":[{"name":"Meta Reality Labs Research,USA"}]},{"given":"Dejan","family":"Markovi\u0107","sequence":"additional","affiliation":[{"name":"Meta Reality Labs Research,USA"}]},{"given":"Alexander","family":"Richard","sequence":"additional","affiliation":[{"name":"Meta Reality Labs Research,USA"}]}],"member":"263","reference":[{"key":"ref35","article-title":"Noisy speech database for training speech enhancement algorithms and TTS models","author":"valentini-botinhao","year":"2017","journal-title":"University of Edinburg"},{"key":"ref13","first-page":"482","article-title":"Fully vector-quantized neural network-based code-excited nonlinear predictive speech coding","volume":"2","author":"wu","year":"1994","journal-title":"IEEE TSAP"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/49.62823"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746108"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2604566"},{"key":"ref36","article-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit","author":"veaux","year":"2017","journal-title":"University of Edinburg"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2010-487"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00805"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1985.13340"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-41"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7179063"},{"key":"ref32","first-page":"14910","article-title":"MelGAN: generative adversarial networks for conditional waveform synthesis","author":"kumar","year":"2019","journal-title":"NIPS"},{"key":"ref10","article-title":"High-quality, low-delay music coding in the opus codec","author":"valin","year":"2013","journal-title":"AESC 135"},{"key":"ref2","first-page":"780","article-title":"ISO\/MPEG-1 audio: A generic standard for coding of high-quality digital audio","volume":"42","author":"brandenburg","year":"1994","journal-title":"AES"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1976.1170021"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682435"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462529"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA52581.2021.9632750"},{"key":"ref18","first-page":"3406","article-title":"A real-time wideband neural vocoder at 1.6 kb\/s using lpcnet","author":"valin","year":"2019","journal-title":"Proc INTERSPEECH"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1816"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683277"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA52581.2021.9632723"},{"key":"ref25","first-page":"495","article-title":"SoundStream: An end-to-end neural audio codec","volume":"30","author":"zeghidour","year":"2021","journal-title":"IEEE\/ACM TASLP"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.1990.117117"},{"key":"ref22","article-title":"Neural discrete representation learning","volume":"30","author":"van den oord","year":"2017","journal-title":"NIPS"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461487"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10603"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10084"},{"key":"ref29","first-page":"17022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume":"33","author":"kong","year":"2020","journal-title":"NIPS"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/45.1890"},{"key":"ref7","first-page":"937","article-title":"Code-excited linear prediction (CELP): High-quality speech at very low bit rates","volume":"10","author":"schroeder","year":"1985","journal-title":"Proc ICASSP"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2002.804299"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/25.312763"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1986.1164946"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1970.tb04297.x"},{"key":"ref5","author":"rao","year":"1996","journal-title":"Techniques and Standards for Image Video and Audio Coding"}],"event":{"name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Rhodes Island, Greece","start":{"date-parts":[[2023,6,4]]},"end":{"date-parts":[[2023,6,10]]}},"container-title":["ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10094559\/10094560\/10096509.pdf?arnumber=10096509","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,23]],"date-time":"2023-10-23T23:05:54Z","timestamp":1698102354000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10096509\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,4]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icassp49357.2023.10096509","relation":{},"subject":[],"published":{"date-parts":[[2023,6,4]]}}}