{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T14:42:42Z","timestamp":1776523362883,"version":"3.51.2"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10890252","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:02Z","timestamp":1741799702000},"page":"1-5","source":"Crossref","is-referenced-by-count":3,"title":["High-Fidelity Music Vocoder using Neural Audio Codecs"],"prefix":"10.1109","author":[{"given":"Luca A.","family":"Lanzend\u00f6rfer","sequence":"first","affiliation":[{"name":"ETH Zurich"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Florian","family":"Gr\u00f6tschla","sequence":"additional","affiliation":[{"name":"ETH Zurich"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Ungersb\u00f6ck","sequence":"additional","affiliation":[{"name":"ETH Zurich"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roger","family":"Wattenhofer","sequence":"additional","affiliation":[{"name":"ETH Zurich"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","volume":"12","author":"Van Den Oord","year":"2016","journal-title":"WaveNet: A generative model for raw audio"},{"key":"ref2","first-page":"3918","article-title":"Parallel WaveNet: Fast high-fidelity speech synthesis","volume-title":"International conference on machine learning","author":"Oord"},{"key":"ref3","article-title":"MelGAN: Generative adversarial networks for conditional waveform synthesis","volume-title":"Advances in neural information processing systems","volume":"32","author":"Kumar","year":"2019"},{"key":"ref4","first-page":"17 022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Advances in neural information processing systems","volume":"33","author":"Kong","year":"2020"},{"key":"ref5","article-title":"Riffusion - Stable diffusion for real-time music generation","author":"Forsgren","year":"2022"},{"key":"ref6","first-page":"21 450","article-title":"AudioLDM: Text-to-audio generation with latent diffusion models","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Liu"},{"key":"ref7","author":"Di Giorgi","year":"2022","journal-title":"Mel spectrogram inversion with stable pitch"},{"key":"ref8","article-title":"High-fidelity audio compression with improved RVQGAN","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Kumar","year":"2024"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"ref11","volume-title":"BigVGAN: A universal neural vocoder with large-scale training","author":"Lee","year":"2022"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3129994"},{"key":"ref13","volume-title":"High fidelity neural audio compression","author":"D\u00e9fossez","year":"2022"},{"key":"ref14","first-page":"1583","article-title":"Neural networks fail to learn periodic functions and how to fix it","volume":"33","author":"Ziyin","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1016"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/iccv.2017.304"},{"key":"ref17","first-page":"1558","article-title":"Autoencoding beyond pixels using a learned similarity metric","volume-title":"International conference on machine learning","author":"Larsen"},{"key":"ref18","article-title":"The MTG-Jamendo dataset for automatic music tagging","volume-title":"Machine Learning for Music Discovery Workshop, International Conference on Machine Learning (ICML 2019)","author":"Bogdanov"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-2441"},{"key":"ref20","volume-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/icassp40776.2020.9053795"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/qomex48832.2020.9123150"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413711"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/icassp.2001.941023"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.5334\/jors.187"},{"key":"ref26","first-page":"1543","article-title":"Method for the subjective assessment of intermediate sound quality (MUSHRA)","volume-title":"ITU, BS","year":"2001"},{"key":"ref27","article-title":"MUSDB18-HQ - an uncompressed version of MUSDB18","author":"Rafii","year":"2019"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/icassp.2017.7952261"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10890252.pdf?arnumber=10890252","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:22:01Z","timestamp":1774416121000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10890252\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10890252","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}