{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,23]],"date-time":"2026-05-23T02:04:32Z","timestamp":1779501872853,"version":"3.53.1"},"reference-count":41,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition Letters"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.patrec.2026.04.013","type":"journal-article","created":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:30:28Z","timestamp":1776108628000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Diffusion for regression: A model-agnostic generative approach to controllable speech enhancement"],"prefix":"10.1016","volume":"205","author":[{"given":"Fang","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Siqi","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gang","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenxin","family":"Tai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9892-7018","authenticated-orcid":false,"given":"Yue","family":"Lei","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ting","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fan","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patrec.2026.04.013_bib0001","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0002","article-title":"Diffusion-lm improves controllable text generation","volume":"35","author":"Li","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0003","series-title":"International Conference on Learning Representations","first-page":"1","article-title":"Diffwave: a versatile diffusion model for audio synthesis","author":"Kong","year":"2021"},{"key":"10.1016\/j.patrec.2026.04.013_bib0004","article-title":"Card: classification and regression diffusion models","volume":"35","author":"Han","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0005","series-title":"International Conference on Learning Representations","first-page":"1","article-title":"A conditional point diffusion-Refinement paradigm for 3D point cloud completion","author":"Lyu","year":"2022"},{"key":"10.1016\/j.patrec.2026.04.013_bib0006","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"18938","article-title":"Diffusion model as representation learner","author":"Yang","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0007","unstructured":"Y. Yang, M. Jin, H. Wen, C. Zhang, Y. Liang, L. Ma, Y. Wang, C. Liu, B. Yang, Z. Xu, et al., A survey on diffusion models for time series and spatio-temporal data, (2024). arXiv: 2404.18886."},{"key":"10.1016\/j.patrec.2026.04.013_bib0008","article-title":"Dose: diffusion dropout with adaptive prior for speech enhancement","volume":"36","author":"Tai","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0009","series-title":"International Conference on Machine Learning","first-page":"26245","article-title":"Input perturbation reduces exposure bias in diffusion models","author":"Ning","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0010","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"3836","article-title":"Adding conditional control to text-to-image diffusion models","author":"Zhang","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0011","article-title":"Video diffusion models","volume":"35","author":"Ho","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0012","series-title":"Conference of the International Speech Communication Association","first-page":"4294","article-title":"Undiff: unsupervised voice restoration with unconditional diffusion model","author":"Iashchenko","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0013","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0014","series-title":"International Conference on Learning Representations","first-page":"1","article-title":"Score-Based generative modeling through stochastic differential equations","author":"Song","year":"2021"},{"key":"10.1016\/j.patrec.2026.04.013_bib0015","first-page":"26565","article-title":"Elucidating the design space of diffusion-based generative models","volume":"35","author":"Karras","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"3","key":"10.1016\/j.patrec.2026.04.013_bib0016","doi-asserted-by":"crossref","first-page":"313","DOI":"10.1016\/0304-4149(82)90051-5","article-title":"Reverse-time diffusion equation models","volume":"12","author":"Anderson","year":"1982","journal-title":"Stoch Process Their Appl."},{"key":"10.1016\/j.patrec.2026.04.013_bib0017","unstructured":"D.P. Kingma, M. Welling, Auto-encoding variational bayes, (2013). arXiv: 1312.6114."},{"key":"10.1016\/j.patrec.2026.04.013_bib0018","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0019","series-title":"International Conference on Machine Learning","first-page":"1530","article-title":"Variational inference with normalizing flows","author":"Rezende","year":"2015"},{"key":"10.1016\/j.patrec.2026.04.013_bib0020","article-title":"Don\u2019T blame the elbo! a linear vae perspective on posterior collapse","volume":"32","author":"Lucas","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0021","article-title":"Improved training of wasserstein gans","volume":"30","author":"Gulrajani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0022","article-title":"Neural ordinary differential equations","volume":"31","author":"Chen","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patrec.2026.04.013_bib0023","series-title":"International Conference on Learning Representations","first-page":"1","article-title":"(Certified!!) adversarial robustness for free!","author":"Carlini","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0024","unstructured":"X. Chen, Z. Liu, S. Xie, K. He, Deconstructing denoising diffusion models for self-supervised learning, (2024). arXiv: 2401.14404."},{"key":"10.1016\/j.patrec.2026.04.013_bib0025","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"13627","article-title":"Revisiting denoising diffusion probabilistic models for speech enhancement: condition collapse, efficiency and refinement","author":"Tai","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0026","series-title":"International Conference on Learning Representations","first-page":"1","article-title":"Diffusion posterior sampling for general noisy inverse problems","author":"Chung","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0027","series-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"1","article-title":"Solving audio inverse problems with a diffusion model","author":"Moliner","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0028","doi-asserted-by":"crossref","unstructured":"J.-M. Lemercier, J. Richter, S. Welker, E. Moliner, V. V\u00e4lim\u00e4ki, T. Gerkmann, Diffusion Models for Audio Restoration, (2024). arXiv: 2402.09821.","DOI":"10.1109\/MSP.2024.3445871"},{"key":"10.1016\/j.patrec.2026.04.013_bib0029","series-title":"International Conference on Learning Representations","first-page":"1","article-title":"Progressive distillation for fast sampling of diffusion models","author":"Salimans","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0030","series-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"1","article-title":"Cold diffusion for speech enhancement","author":"Yen","year":"2023"},{"key":"10.1016\/j.patrec.2026.04.013_bib0031","unstructured":"Q. Yao, L. Gao, Q. Mao, M. Dong, Regularized Schr\\\u201d odinger Bridge: Alleviating Distortion and Exposure Bias in Solving Inverse Problems, (2025). arXiv: 2511.11686."},{"key":"10.1016\/j.patrec.2026.04.013_bib0032","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"4667","article-title":"Multi-step denoising scheduled sampling: towards alleviating exposure bias for diffusion models","volume":"38","author":"Ren","year":"2024"},{"key":"10.1016\/j.patrec.2026.04.013_bib0033","unstructured":"W. Tai, J. Li, Y. Wang, T. Lan, Q. Liu, Foster strengths and circumvent weaknesses: a speech enhancement framework with two-branch collaborative learning, (2021). arXiv: 2110.05713."},{"key":"10.1016\/j.patrec.2026.04.013_bib0034","series-title":"SSW","first-page":"146","article-title":"Investigating RNN-based speech enhancement methods for noise-robust text-to-Speech","author":"Valentini-Botinhao","year":"2016"},{"issue":"3","key":"10.1016\/j.patrec.2026.04.013_bib0035","doi-asserted-by":"crossref","first-page":"247","DOI":"10.1016\/0167-6393(93)90095-3","article-title":"Assessment for automatic speech recognition: II. NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems","volume":"12","author":"Varga","year":"1993","journal-title":"Speech Commun."},{"key":"10.1016\/j.patrec.2026.04.013_bib0036","doi-asserted-by":"crossref","first-page":"535","DOI":"10.1016\/j.csl.2016.11.005","article-title":"An analysis of environment, microphone and data simulation mismatches in robust speech recognition","volume":"46","author":"Vincent","year":"2017","journal-title":"Comput. Speech Language"},{"key":"10.1016\/j.patrec.2026.04.013_bib0037","unstructured":"G. Yang, Y. Lei, W. Tai, J. Wu, J. Chen, T. Zhong, F. Zhou, Compose Yourself: Average-Velocity Flow Matching for One-Step Speech Enhancement, (2025). arXiv: 2509.15952."},{"key":"10.1016\/j.patrec.2026.04.013_bib0038","series-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)","first-page":"749","article-title":"Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs","author":"Rix","year":"2001"},{"key":"10.1016\/j.patrec.2026.04.013_bib0039","series-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4214","article-title":"A short-time objective intelligibility measure for time-frequency weighted noisy speech","author":"Taal","year":"2010"},{"key":"10.1016\/j.patrec.2026.04.013_bib0040","series-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)","first-page":"626","article-title":"SDR\u2013Half-baked or well done?","author":"Le Roux","year":"2019"},{"issue":"1","key":"10.1016\/j.patrec.2026.04.013_bib0041","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1109\/TASL.2007.911054","article-title":"Evaluation of objective quality measures for speech enhancement","volume":"16","author":"Hu","year":"2007","journal-title":"IEEE Trans. Audio Speech Lang. Process."}],"container-title":["Pattern Recognition Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167865526001340?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167865526001340?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,23]],"date-time":"2026-05-23T01:06:21Z","timestamp":1779498381000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167865526001340"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":41,"alternative-id":["S0167865526001340"],"URL":"https:\/\/doi.org\/10.1016\/j.patrec.2026.04.013","relation":{},"ISSN":["0167-8655"],"issn-type":[{"value":"0167-8655","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Diffusion for regression: A model-agnostic generative approach to controllable speech enhancement","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition Letters","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patrec.2026.04.013","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}]}}