{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T14:12:33Z","timestamp":1774879953467,"version":"3.50.1"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:00:00Z","timestamp":1765497600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T00:00:00Z","timestamp":1765497600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10772-025-10239-4","type":"journal-article","created":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:43:50Z","timestamp":1765547030000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Transform-based nonlinear speech enhancement for monaural scenarios"],"prefix":"10.1007","volume":"29","author":[{"given":"Navneet","family":"Upadhyay","sequence":"first","affiliation":[]},{"given":"Munir","family":"Georges","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,12]]},"reference":[{"key":"10239_CR1","doi-asserted-by":"crossref","unstructured":"Amehraye, A., Pastor, D., & Tamtaoui, A. (2008) Perceptual improvement of Wiener filtering. In International conference on acoustics, speech, and signal processing (pp. 2081\u20132084).","DOI":"10.1109\/ICASSP.2008.4518051"},{"key":"10239_CR2","doi-asserted-by":"crossref","unstructured":"Berouti, M., Schwartz, R., & Makhoul, J. (1979). Enhancement of speech corrupted by acoustic noise. In Proceedings international conference on acoustics, speech, and signal processing (pp. 208\u2013211).","DOI":"10.1109\/ICASSP.1979.1170788"},{"issue":"2","key":"10239_CR3","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S. F. Boll","year":"1979","unstructured":"Boll, S. F. (1979). Suppression of acoustic noise in speech using spectral subtraction. IEEE Transaction on Acoustic, Speech, Signal Processing, 27(2), 113\u2013120.","journal-title":"IEEE Transaction on Acoustic, Speech, Signal Processing"},{"key":"10239_CR4","doi-asserted-by":"publisher","first-page":"466","DOI":"10.1109\/TSA.2003.811544","volume":"11","author":"I. Cohen","year":"2003","unstructured":"Cohen, I. (2003). Noise spectrum estimation in adverse environments: Improved minima controlled recursive averaging. IEEE Transactions on Speech and Audio Processing, 11, 466\u2013475.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"10239_CR5","first-page":"1513","volume":"2","author":"G. Doblinger","year":"1995","unstructured":"Doblinger, G. (1995). Computationally efficient speech enhancement by spectral minima tracking in subbands. Proceedings of Euro Speech, 2, 1513\u20131516.","journal-title":"Proceedings of Euro Speech"},{"issue":"10","key":"10239_CR6","doi-asserted-by":"publisher","first-page":"1526","DOI":"10.1109\/5.168664","volume":"80","author":"Y. Ephraim","year":"1992","unstructured":"Ephraim, Y. (1992). Statistical-model-based speech enhancement systems. Proceedings IEEE, 80(10), 1526\u20131555.","journal-title":"Proceedings IEEE"},{"key":"10239_CR7","volume-title":"The electrical engineering handbook","author":"Y. Ephraim","year":"2006","unstructured":"Ephraim, Y., Ari, H. L., & Roberts, W. (2006). A brief survey of speech enhancement. In The electrical engineering handbook (3rd ed.). CRC Press.","edition":"3rd"},{"key":"10239_CR8","first-page":"12","volume-title":"The electrical engineering handbook","author":"Y. Ephraim","year":"2006","unstructured":"Ephraim, Y., & Cohen, I. (2006). Recent advancements in speech enhancement. In The electrical engineering handbook (ch. 5, pp. 12\u201326). CRC Press."},{"issue":"9","key":"10239_CR9","doi-asserted-by":"publisher","first-page":"1570","DOI":"10.1109\/TASLP.2018.2821903","volume":"26","author":"S.-W. Fu","year":"2018","unstructured":"Fu, S.-W., Wang, T.-W., Tsao, Y., Lu, X., & Kawai, H. (2018). End to-end waveform utterance enhancement for direct evaluation metrics optimization by fully convolutional neural networks. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 26(9), 1570\u20131584.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10239_CR20","unstructured":"ITU-T. (2024). Perceptual evaluation of speech quality (PESQ), and objective method for end-to-end speech quality assessment of narrowband telephone networks and speech codecs. (2000). ITU-T Recommendations, 862."},{"key":"10239_CR10","doi-asserted-by":"crossref","unstructured":"Kamath, S., & Loizou, P. (2002). A multiband spectral subtraction method for enhancing speech corrupted by colored noise. In International conference on acoustics, speech, and signal processing.","DOI":"10.1109\/ICASSP.2002.5745591"},{"key":"10239_CR11","doi-asserted-by":"publisher","first-page":"754","DOI":"10.1049\/el:20030480","volume":"39","author":"L. Lin","year":"2003","unstructured":"Lin, L., Holmes, W. H., & Ambikairajah, E. (2003, May). Adaptive noise estimation algorithm for speech enhancement. Electronics Letters, 39, 754\u2013755.","journal-title":"Electronics Letters"},{"key":"10239_CR12","doi-asserted-by":"publisher","DOI":"10.1201\/9781420015836","volume-title":"Speech enhancement: Theory and practice","author":"P. Loizou","year":"2007","unstructured":"Loizou, P. (2007). Speech enhancement: Theory and practice. CRC."},{"key":"10239_CR13","unstructured":"Loizou, P. (2009). NOIZEUS: A noisy speech corpus for evaluation of speech enhancement algorithms. http:\/\/www.utdallas.edu\/%7Eloizou\/speech\/noizeus\/"},{"issue":"8","key":"10239_CR14","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/TASLP.2019.2915167","volume":"27","author":"Y. Luo","year":"2019","unstructured":"Luo, Y., & Mesgarani, N. (2019). Conv-tasnet: Surpassing ideal time frequency magnitude masking for speech separation. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 27(8), 1256\u20131266.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10239_CR15","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/34.192463","volume":"11","author":"S. Mallat","year":"1989","unstructured":"Mallat, S. (1989). A theory for multi-resolution signal decomposition: The wavelet representation. IEEE Transactions on Pattern Analysis & Machine Intelligence, 11, 674\u2013693.","journal-title":"IEEE Transactions on Pattern Analysis & Machine Intelligence"},{"key":"10239_CR16","volume-title":"A wavelet tour of signal processing: The sparse way","author":"S. Mallat","year":"2009","unstructured":"Mallat, S. (2009). A wavelet tour of signal processing: The sparse way (3rd ed.). Academic Press\/United Press.","edition":"3rd"},{"key":"10239_CR17","doi-asserted-by":"publisher","first-page":"845","DOI":"10.1109\/TSA.2005.851927","volume":"11","author":"R. Martin","year":"2005","unstructured":"Martin, R. (2005). Speech enhancement based on minimum mean-square error estimation and super-Gaussian priors. IEEE Trans Speech Audio Process, 11, 845\u2013856.","journal-title":"IEEE Trans Speech Audio Process"},{"key":"10239_CR19","doi-asserted-by":"crossref","unstructured":"Olhede, S., & Walden, A. T. (2005). A generalized demodulation approach to time frequency projections for multi-component signals. Proceedings of the Royal Society A. Mathematical, Physical and Engineering Sciences, 461, 2159\u20132179.","DOI":"10.1098\/rspa.2005.1455"},{"key":"10239_CR18","volume-title":"Speech communications: Human and machine","author":"D. O\u2019Shaughnessy","year":"2007","unstructured":"O\u2019Shaughnessy, D. (2007). Speech communications: Human and machine (2nd ed.). University Press (India) Pvt. Ltd.","edition":"2nd"},{"key":"10239_CR21","doi-asserted-by":"crossref","unstructured":"Rethage, D., Pons, J., & Serra, X. (2018). A wavenet for speech denoising. In IEEE International conference on acoustics, speech and signal processing (pp. 5069\u20135073).","DOI":"10.1109\/ICASSP.2018.8462417"},{"issue":"2","key":"10239_CR22","first-page":"1","volume":"26","author":"N. Upadhyay","year":"2023","unstructured":"Upadhyay, N. (2023). Psychoacoustic scale-driven modified spectral subtraction for monaural speech enhancement. International Journal of Speech Technology, 26(2), 1\u201317.","journal-title":"International Journal of Speech Technology"},{"key":"10239_CR23","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1016\/j.procs.2016.04.061","volume":"84","author":"N. Upadhyay","year":"2016","unstructured":"Upadhyay, N., & Jaiswal, R. (2016). Single channel speech enhancement: Using Wiener filtering with recursive noise estimation. Procedia Computer Science, 84, 23\u201324.","journal-title":"Procedia Computer Science"},{"key":"10239_CR24","doi-asserted-by":"crossref","unstructured":"Upadhyay, N., & Karmakar, A. (2012a). An auditory perception based improved multiband spectral subtraction algorithm for enhancement of speech degraded by non-stationary noises. In IEEE international conference on intelligent human computer interaction (IHCI 2012) (pp. 392\u2013398). IIT Kharagpur, 27\u201329 December.","DOI":"10.1109\/IHCI.2012.6481854"},{"key":"10239_CR25","doi-asserted-by":"crossref","unstructured":"Upadhyay, N., & Karmakar, A. (2012b). A perceptually motivated stationary wavelet filter-bank utilizing improved spectral over-subtraction algorithm for enhancing speech in non-stationary environments. In IEEE international conference on intelligent human computer interaction (IHCI 2012) (pp. 472\u2013478). IIT Kharagpur, 27\u201329 December.","DOI":"10.1109\/IHCI.2012.6481840"},{"key":"10239_CR26","doi-asserted-by":"publisher","first-page":"13","DOI":"10.5815\/ijigsp.2013.11.02","volume":"5","author":"N. Upadhyay","year":"2013","unstructured":"Upadhyay, N., & Karmakar, A. (2013). Spectral subtractive-type algorithms for enhancement of noisy speech: An integrative review. International Journal of Image, Graphic, and Signal Processing, 5, 13\u201322.","journal-title":"International Journal of Image, Graphic, and Signal Processing"},{"key":"10239_CR27","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1007\/s10772-013-9213-5","volume":"17","author":"N. Upadhyay","year":"2014","unstructured":"Upadhyay, N., & Karmakar, A. (2014). A perceptually motivated stationary wavelet packet filterbank using improved spectral over-subtraction for enhancement of speech in various noise environments. International Journal of Speech Technology, 17, 117\u2013132.","journal-title":"International Journal of Speech Technology"},{"key":"10239_CR28","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1109\/89.748118","volume":"7","author":"N. Virag","year":"1999","unstructured":"Virag, N. (1999). Single channel speech enhancement based on masking properties of the human auditory system. IEEE Transactions on Speech and Audio Processing, 7, 126\u2013137.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"10239_CR29","doi-asserted-by":"publisher","first-page":"1354","DOI":"10.3390\/electronics14071354","volume":"14","author":"Z.-T. Wu","year":"2025","unstructured":"Wu, Z.-T., & Hung, J.-W. (2025, March). Improving the speech enhancement model with discrete wavelet transform sub-band features in adaptive FullSubNet. Electronics, 14, 1354 (1-18).","journal-title":"Electronics"},{"key":"10239_CR30","doi-asserted-by":"publisher","first-page":"1523","DOI":"10.1121\/1.385079","volume":"68","author":"E. Zwicker","year":"1980","unstructured":"Zwicker, E., & Terhardt, E. (1980). Analytical expressions for critical band rate and critical bandwidth as a function of frequency. Journal of the Acoustical Society of America, 68, 1523\u20131525.","journal-title":"Journal of the Acoustical Society of America"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10239-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-025-10239-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10239-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T13:23:09Z","timestamp":1774876989000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-025-10239-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,12]]},"references-count":30,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["10239"],"URL":"https:\/\/doi.org\/10.1007\/s10772-025-10239-4","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,12]]},"assertion":[{"value":"6 August 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"10"}}