{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T06:27:01Z","timestamp":1774420021699,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10890511","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:19Z","timestamp":1741799719000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Wave-U-Mamba: An End-To-End Framework For High-Quality And Efficient Speech Super Resolution"],"prefix":"10.1109","author":[{"given":"Yongjoon","family":"Lee","sequence":"first","affiliation":[{"name":"Korea University,Department of Statistics,Seoul,South Korea"}]},{"given":"Chanwoo","family":"Kim","sequence":"additional","affiliation":[{"name":"Korea University,Department of Artificial Intelligence,Seoul,South Korea"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-45"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-36"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447246"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11017"},{"key":"ref5","article-title":"Nu-gan: High resolution neural upsampling with gan","author":"Kumar","year":"2020"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-892"},{"key":"ref7","article-title":"Audio super resolution using neural networks","volume-title":"ICLR (Workshop Track)","author":"Kuleshov"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3699757"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054551"},{"key":"ref10","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","volume-title":"Proc. ICLR","author":"Ren"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"ref13","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2024"},{"key":"ref14","article-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","volume-title":"Proc. ICML","author":"Zhu"},{"key":"ref15","article-title":"Jamba: A hybrid transformer-mamba language model","author":"Lieber","year":"2024"},{"key":"ref16","article-title":"U-mamba: Enhancing long-range dependency for biomedical image segmentation","author":"Ma","year":"2024"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref18","article-title":"Vm-unet: Vision mamba unet for medical image segmentation","author":"Ruan","year":"2024"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/MIPR62202.2024.00059"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462116"},{"key":"ref21","article-title":"Wave-u-net: A multi-scale neural network for end-to-end audio source separation","volume-title":"Proc. ISMIR","author":"Stoller"},{"key":"ref22","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. NeurIPS","author":"Baevski"},{"key":"ref23","article-title":"Vatt: Transformers for multimodal self-supervised learning from raw video, audio and text","volume-title":"Proc. NeurIPS","author":"Akbari"},{"key":"ref24","article-title":"Wavenet: A generative model for raw audio","author":"den Oord","year":"2016"},{"key":"ref25","article-title":"It\u2019s raw! audio generation with state-space models","volume-title":"Proc. ICML","author":"Goel"},{"key":"ref26","article-title":"Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Proc. NeurIPS","author":"Kong"},{"key":"ref27","article-title":"Layer normalization","volume-title":"Proc. NeurIPS","author":"Ba"},{"key":"ref28","article-title":"Clip-mamba: Clip pretrained mamba models with ood and hessian evaluation","author":"Huang","year":"2024"},{"key":"ref29","article-title":"Early convolutions help transformers see better","volume-title":"Proc. NeurIPS","author":"Xiao"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00003"},{"key":"ref31","article-title":"Weight normalization: A simple reparameterization to accelerate training of deep neural networks","volume-title":"Proc. NeurIPS","author":"Salimans"},{"key":"ref32","article-title":"Melgan: Generative adversarial networks for conditional waveform synthesis","volume-title":"Proc. NeurIPS","author":"Kumar"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053795"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413575"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-599"},{"key":"ref36","article-title":"CSTR VCTK Corpus: English Multi-speaker Corpus for CSTR Voice Cloning Toolkit (version 0.92)","author":"Yamagishi"},{"key":"ref37","article-title":"Decoupled weight decay regularization","volume-title":"Proc. ICLR","author":"Loshchilov"},{"key":"ref38","article-title":"Upsam-pling artifacts in neural audio synthesis","volume-title":"Proc. ICASSP","author":"Pons"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10890511.pdf?arnumber=10890511","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:25:59Z","timestamp":1774416359000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10890511\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10890511","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}