{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T23:25:33Z","timestamp":1766013933382,"version":"3.48.0"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"17","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62271344"],"award-info":[{"award-number":["62271344"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04958-z","type":"journal-article","created":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T02:12:13Z","timestamp":1765332733000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Audio super-resolution based on depthwise convolutional gated recurrent network"],"prefix":"10.1007","volume":"19","author":[{"given":"Ikram","family":"Azaz","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yasir","family":"Iqbal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanzhang","family":"Geng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,10]]},"reference":[{"key":"4958_CR1","doi-asserted-by":"crossref","unstructured":"Kontio, J., Laaksonen, L., Alku, P.: Neural network-based artificial bandwidth expansion of speech. IEEE transactions on audio, speech, and language processing. 15(3), 873\u2013881 (2007)","DOI":"10.1109\/TASL.2006.885934"},{"issue":"7","key":"4958_CR2","doi-asserted-by":"publisher","first-page":"2170","DOI":"10.1109\/TASL.2011.2118206","volume":"19","author":"H Pulakka","year":"2011","unstructured":"Pulakka, H., Alku, P.: Bandwidth extension of telephone speech using a neural network and a filter bank implementation for highband mel spectrum. IEEE Trans. Audio Speech Lang. Process. 19(7), 2170\u20132183 (2011)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"12","key":"4958_CR3","doi-asserted-by":"publisher","first-page":"2398","DOI":"10.1109\/TASLP.2015.2470560","volume":"23","author":"M Z\u00f6hrer","year":"2015","unstructured":"Z\u00f6hrer, M., Peharz, R., Pernkopf, F.: Representation learning for single-channel source separation and bandwidth extension. IEEE\/ACM Trans. Audio Speech Lang. Process. 23(12), 2398\u20132409 (2015)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"3","key":"4958_CR4","doi-asserted-by":"publisher","first-page":"594","DOI":"10.1109\/TASLP.2016.2519146","volume":"24","author":"X Liu","year":"2016","unstructured":"Liu, X., Bao, C.: Audio bandwidth extension based on ensemble echo state networks with Temporal evolution. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(3), 594\u2013607 (2016)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4958_CR5","doi-asserted-by":"crossref","unstructured":"Lee, J., Han, S.: Nu-wave: A diffusion probabilistic model for neural audio upsampling, arXiv preprint arXiv:2104.02321 (2021)","DOI":"10.21437\/Interspeech.2021-36"},{"key":"4958_CR6","doi-asserted-by":"publisher","unstructured":"Iqbal, Y., et al.: Speech enhancement using complex depthwise convolutional recurrent network with self-attention mechanism. Signal, Image and Video Processing. 19(12), 990 (2025). https:\/\/doi.org\/10.1007\/s11760-025-04534-5","DOI":"10.1007\/s11760-025-04534-5"},{"key":"4958_CR7","doi-asserted-by":"crossref","unstructured":"Li, K., Lee, C.-H.: A deep neural network approach to speech bandwidth expansion, in IEEE international conference on acoustics, speech and signal processing (ICASSP), 2015: IEEE, pp. 4395\u20134399. (2015)","DOI":"10.1109\/ICASSP.2015.7178801"},{"key":"4958_CR8","unstructured":"Kuleshov, V., Enam, S.Z., Ermon, S.: Audio super resolution using neural networks. arXiv preprint arXiv:1708.00853 (2017)"},{"key":"4958_CR9","doi-asserted-by":"crossref","unstructured":"Rakotonirina, N.C.: Self-attention for audio super-resolution, in 2021 IEEE 31st International Workshop on Machine Learning for Signal Processing (MLSP), : IEEE, pp. 1\u20136. (2021)","DOI":"10.1109\/MLSP52302.2021.9596082"},{"key":"4958_CR10","doi-asserted-by":"crossref","unstructured":"Yoneyama, R., Yamamoto, R., Tachibana, K.: Nonparallel high-quality audio super resolution with domain adaptation and resampling cyclegans, in ICASSP 2023\u20132023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), : IEEE, pp. 1\u20135. (2023)","DOI":"10.1109\/ICASSP49357.2023.10097002"},{"key":"4958_CR11","doi-asserted-by":"crossref","unstructured":"Han, S., Lee, J.: NU-Wave 2: A general neural audio upsampling model for various sampling rates. arXiv preprint arXiv:2206.08545 (2022)","DOI":"10.21437\/Interspeech.2022-45"},{"key":"4958_CR12","doi-asserted-by":"crossref","unstructured":"Shuai, C., Shi, C., Gan, L., Liu, H.: mdctGAN: Taming transformer-based GAN for speech super-resolution with modified DCT spectra. arXiv preprint arXiv:2305.11104 (2023)","DOI":"10.21437\/Interspeech.2023-113"},{"issue":"5","key":"4958_CR13","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1109\/TASLP.2018.2798811","volume":"26","author":"Z-H Ling","year":"2018","unstructured":"Ling, Z.-H., Ai, Y., Gu, Y., Dai, L.-R.: Waveform modeling and generation using hierarchical recurrent neural networks for speech bandwidth extension. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(5), 883\u2013894 (2018).","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4958_CR14","doi-asserted-by":"publisher","unstructured":"Azaz, I., Zhang, T., Iqbal, Y., Zhao, X., Geng, Y.: A multi-task generalized speech restoration model using depthwise separable convolution Layers. Signal, Image and Video Processing. 19(11), 902 (2025). https:\/\/doi.org\/10.1007\/s11760-025-04427-7","DOI":"10.1007\/s11760-025-04427-7"},{"key":"4958_CR15","doi-asserted-by":"crossref","unstructured":"Lim, T.Y., Yeh, R.A., Xu, Y., Do, M.N., Hasegawa-Johnson, M.: Time-frequency networks for audio super-resolution, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018: IEEE, pp. 646\u2013650. (2018)","DOI":"10.1109\/ICASSP.2018.8462049"},{"key":"4958_CR16","unstructured":"Birnbaum, S., Kuleshov, V., Enam, Z., Koh, P.W.W., Ermon, S.: Temporal film: Capturing long-range sequence dependencies with feature-wise modulations. Adv. Neural. Inf. Process. Syst., 32, (2019)."},{"key":"4958_CR17","unstructured":"Van Den Oord, A., et al.: Wavenet: A generative model for raw audio. arXiv preprint arXiv:1609.03499, vol. 12, (2016)"},{"key":"4958_CR18","doi-asserted-by":"crossref","unstructured":"Zhang, K., Ren, Y., Xu, C., Zhao, Z.: WSRGlow: A glow-based waveform generative model for audio super-resolution. arXiv preprint arXiv:2106.08507 (2021)","DOI":"10.21437\/Interspeech.2021-892"},{"key":"4958_CR19","doi-asserted-by":"crossref","unstructured":"Su, J., Wang, Y., Finkelstein, A., Jin, Z.: Bandwidth extension is all you need, in ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), : IEEE, pp. 696\u2013700. (2021)","DOI":"10.1109\/ICASSP39728.2021.9413575"},{"key":"4958_CR20","doi-asserted-by":"crossref","unstructured":"Liu, H., et al.: Voicefixer: A unified framework for high-fidelity speech restoration. arXiv preprint arXiv:2204.05841 (2022)","DOI":"10.21437\/Interspeech.2022-11026"},{"key":"4958_CR21","unstructured":"Tian, Q., et al.: TFGAN: Time and frequency domain based generative adversarial network for high-fidelity speech synthesis. arXiv preprint arXiv:.12206, 2020. (2011)"},{"key":"4958_CR22","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation, in Medical image computing and computer-assisted intervention\u2013MICCAI 2015: 18th international conference, Munich, Germany, October 5\u20139, proceedings, part III 18, 2015: Springer, pp. 234\u2013241. (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"4958_CR23","doi-asserted-by":"crossref","unstructured":"Li, S., Villette, S., Ramadas, P., Sinder, D.J.: Speech bandwidth extension using generative adversarial networks, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2018: IEEE, pp. 5029\u20135033. (2018)","DOI":"10.1109\/ICASSP.2018.8462588"},{"key":"4958_CR24","unstructured":"Kumar, R., Kumar, K., Anand, V., Bengio, Y., Courville, A.: NU-GAN: High resolution neural upsampling with GAN. arXiv preprint arXiv:.11362, 2020. (2010)"},{"key":"4958_CR25","doi-asserted-by":"crossref","unstructured":"Mandel, M., Tal, O., Adi, Y.: Aero: Audio super resolution in the spectral domain, in ICASSP 2023\u20132023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), : IEEE, pp. 1\u20135. (2023)","DOI":"10.1109\/ICASSP49357.2023.10095382"},{"key":"4958_CR26","doi-asserted-by":"crossref","unstructured":"Yu, C.-Y., Yeh, S.-L., Fazekas, G., Tang, H.: Conditioning and sampling in variational diffusion models for speech super-resolution, in ICASSP 2023\u20132023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), : IEEE, pp. 1\u20135. (2023)","DOI":"10.1109\/ICASSP49357.2023.10095103"},{"key":"4958_CR27","doi-asserted-by":"crossref","unstructured":"Liu, H., Chen, K., Tian, Q., Wang, W., Plumbley, M.D.: AudioSR: Versatile audio super-resolution at scale, in ICASSP 2024\u20132024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), : IEEE, pp. 1076\u20131080. (2024)","DOI":"10.1109\/ICASSP48485.2024.10447246"},{"key":"4958_CR28","doi-asserted-by":"crossref","unstructured":"Shuai, C., Shi, C., Gan, L., Liu, H.: mdctGAN: Taming transformer-based GAN for speech super-resolution with Modified DCT spectra, in Proc. Interspeech 2023, pp. 5112\u20135116. (2023)","DOI":"10.21437\/Interspeech.2023-113"},{"key":"4958_CR29","doi-asserted-by":"crossref","unstructured":"Zen, H., et al.: Libritts: A corpus derived from librispeech for text-to-speech, arXiv preprint arXiv:.02882, 2019. (1904)","DOI":"10.21437\/Interspeech.2019-2441"},{"key":"4958_CR30","unstructured":"Yamagishi, J., Veaux, C., MacDonald, K., Corpus, C.S.T.R.V.C.T.K.: English multi-speaker corpus for CSTR voice cloning toolkit (version 0.92), University of Edinburgh. The Centre for Speech Technology Research (CSTR), pp. 271\u2013350, (2019)"},{"key":"4958_CR31","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"4958_CR32","doi-asserted-by":"publisher","first-page":"2523","DOI":"10.1109\/TASLP.2023.3288409","volume":"31","author":"Z Borsos","year":"2023","unstructured":"Borsos, Z., et al.: Audiolm: A Language modeling approach to audio generation. IEEE\/ACM Trans. Audio Speech Lang. Process. 31, 2523\u20132533 (2023)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4958_CR33","first-page":"17022","volume":"33","author":"J Kong","year":"2020","unstructured":"Kong, J., Kim, J., Bae, J.: Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis. Adv. Neural. Inf. Process. Syst. 33, 17022\u201317033 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4958_CR34","doi-asserted-by":"publisher","first-page":"1012","DOI":"10.1109\/TASLP.2023.3349053","volume":"32","author":"S-B Kim","year":"2024","unstructured":"Kim, S.-B., Lee, S.-H., Choi, H.-Y., Lee, S.-W.: Audio super-resolution with robust speech representation learning of masked autoencoder. IEEE\/ACM Trans. Audio Speech Lang. Process. 32, 1012\u20131022 (2024)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4958_CR35","doi-asserted-by":"crossref","unstructured":"Liu, H., Choi, W., Liu, X., Kong, Q., Tian, Q., Wang, D.: Neural vocoder is all you need for speech super-resolution, in Proc. Interspeech 2022, pp. 4227\u20134231. (2022)","DOI":"10.21437\/Interspeech.2022-11017"},{"key":"4958_CR36","doi-asserted-by":"crossref","unstructured":"Lu, Y.-X., Ai, Y., Du, H.-P., Ling, Z.-H.: Towards high-quality and efficient speech bandwidth extension with parallel amplitude and phase prediction. IEEE\/ACM Trans. Audio Speech Lang. Process Vol.33. (2024)","DOI":"10.1109\/TASLP.2024.3519881"},{"key":"4958_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, T.-W., Ruan, S.-J.: VM-ASR: A lightweight dual-stream U-Net model for efficient audio super-resolution. IEEE Trans. Audio Speech Lang Vol.33. Process. (2025)","DOI":"10.1109\/TASLPRO.2025.3533365"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04958-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04958-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04958-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T23:20:41Z","timestamp":1766013641000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04958-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":37,"journal-issue":{"issue":"17","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4958"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04958-z","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"type":"print","value":"1863-1703"},{"type":"electronic","value":"1863-1711"}],"subject":[],"published":{"date-parts":[[2025,12]]},"assertion":[{"value":"27 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 October 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 November 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 December 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"The authors declare no competing interests.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"1431"}}