{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T18:38:39Z","timestamp":1776883119729,"version":"3.51.2"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,9,17]],"date-time":"2023-09-17T00:00:00Z","timestamp":1694908800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,9,17]],"date-time":"2023-09-17T00:00:00Z","timestamp":1694908800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,9,17]]},"DOI":"10.1109\/mlsp55844.2023.10285965","type":"proceedings-article","created":{"date-parts":[[2023,10,23]],"date-time":"2023-10-23T13:55:41Z","timestamp":1698069341000},"page":"1-6","source":"Crossref","is-referenced-by-count":4,"title":["Low-Complexity Streaming Speech Super-Resolution"],"prefix":"10.1109","author":[{"given":"Erfan","family":"Soltanmohammadi","sequence":"first","affiliation":[{"name":"Amazon Web Services, Inc."}]},{"given":"Paris","family":"Smaragdis","sequence":"additional","affiliation":[{"name":"Amazon Web Services, Inc."}]},{"given":"Michael M.","family":"Goodwin","sequence":"additional","affiliation":[{"name":"Amazon Web Services, Inc."}]}],"member":"263","reference":[{"key":"ref13","first-page":"17022","article-title":"Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis","volume":"33","author":"kong","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-45"},{"key":"ref23","year":"2018","journal-title":"Recommendation P 808 Subjective evaluation of speech quality with a crowdsourcing approach"},{"key":"ref15","first-page":"2019","article-title":"G. 711: Pulse code modulation (PCM) of voice frequencie","year":"0","journal-title":"The ITU Telecommunication Standardization Sector (ITU-T) Archived from the original on 2019-06-17"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11017"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref11","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"ho","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref22","year":"2001","journal-title":"ITU-T P 862 Perceptual evaluation of speech quality (PESQ) an objective method for end-to-end speech quality assessment of narrow-band telephone networks and speech codecs"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-36"},{"key":"ref21","author":"rabiner","year":"1993","journal-title":"Fundamentals of speech recognition"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2439281"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/QoMEX.2012.6263840"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3206148"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref19","author":"yamagishi","year":"2019","journal-title":"CSTR VCTK Corpus English multi-speaker corpus for CSTR voice cloning toolkit (version 0 92) [sound] University of Edinburgh The Centre for Speech Technology Research (CSTR)"},{"key":"ref18","author":"liu","year":"2021","journal-title":"VoiceFixer Toward general speech restoration with neural vocoder"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992934"},{"key":"ref7","first-page":"14881","article-title":"Melgan: Generative adversarial networks for conditional waveform synthesis","volume":"32","author":"kumar","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref9","first-page":"1649","article-title":"WSR-Glow: A Glow-Based Waveform Generative Model for Audio Super-Resolution","author":"zhang","year":"2021","journal-title":"Annual Conference of the International Speech Communication Association Interspeech 2012"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682215"},{"key":"ref3","article-title":"Audio super-resolution using neural nets","author":"kuleshov","year":"2017","journal-title":"ICLR 2017 - Workshop of International Conference on Learning Representations"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2605"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2019.2909077"}],"event":{"name":"2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP)","location":"Rome, Italy","start":{"date-parts":[[2023,9,17]]},"end":{"date-parts":[[2023,9,20]]}},"container-title":["2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10285838\/10285858\/10285965.pdf?arnumber=10285965","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,13]],"date-time":"2023-11-13T14:03:45Z","timestamp":1699884225000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10285965\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,17]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/mlsp55844.2023.10285965","relation":{},"subject":[],"published":{"date-parts":[[2023,9,17]]}}}