{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T06:58:22Z","timestamp":1775199502401,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11433840","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Improving Resource-Efficient Speech Enhancement via Neural Differentiable DSP Vocoder Refinement"],"prefix":"10.1109","author":[{"given":"Heitor R.","family":"Guimar\u00e3es","sequence":"first","affiliation":[{"name":"INRS - EMT,Montreal,CA"}]},{"given":"Ke","family":"Tan","sequence":"additional","affiliation":[{"name":"Meta Reality Labs,Redmond,US"}]},{"given":"Juan","family":"Azcarreta","sequence":"additional","affiliation":[{"name":"Meta Reality Labs,Cambridge,UK"}]},{"given":"Jesus M.","family":"Alvarez","sequence":"additional","affiliation":[{"name":"Meta Reality Labs,ES"}]},{"given":"Prabhav","family":"Agrawal","sequence":"additional","affiliation":[{"name":"Meta AI,New York,US"}]},{"given":"Ashutosh","family":"Pandey","sequence":"additional","affiliation":[{"name":"Meta Reality Labs,Redmond,US"}]},{"given":"Buye","family":"Xu","sequence":"additional","affiliation":[{"name":"Meta Reality Labs,Redmond,US"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1201\/9781420015836"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2418"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446087"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890512"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-958"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49660.2025.10889061"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447774"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-590"},{"key":"ref9","author":"Guimaraes","year":"2025","journal-title":"Ditse: High-fidelity generative speech enhancement via latent diffusion transformers"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-138"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888274"},{"key":"ref12","first-page":"17022","article-title":"Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Advances in neural information processing systems","volume":"33","author":"Kong","year":"2020"},{"key":"ref13","article-title":"Vocos: Closing the gap between time-domain and fourier-based neural vocoders for high-quality audio synthesis","volume-title":"The Twelfth International Conference on Learning Representations","author":"Siuzdak"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO63174.2024.10715278"},{"key":"ref15","article-title":"Ddsp: Differentiable digital signal processing","volume-title":"International Conference on Learning Representations","author":"Engel"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/9780470546475"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/THMS.2023.3339663"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447948"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3188"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10411"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747177"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3271151"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1515\/9783110873429"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1214"},{"key":"ref25","first-page":"1583","article-title":"Neural networks fail to learn periodic functions and how to fix it","volume":"33","author":"Ziyin","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref26","article-title":"BigVGAN: A universal neural vocoder with large-scale training","volume-title":"The Eleventh International Conference on Learning Representations","author":"Lee"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413901"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053795"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-2409"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3038"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2876171"},{"key":"ref32","author":"Kingma","year":"2014","journal-title":"Adam: A method for stochastic optimization"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746698"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.941023"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414878"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383567"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11433840.pdf?arnumber=11433840","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:54:18Z","timestamp":1775192058000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11433840\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11433840","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}