{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:28:02Z","timestamp":1740122882128,"version":"3.37.3"},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2019,8,29]],"date-time":"2019-08-29T00:00:00Z","timestamp":1567036800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,8,29]],"date-time":"2019-08-29T00:00:00Z","timestamp":1567036800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s10772-019-09627-4","type":"journal-article","created":{"date-parts":[[2019,8,29]],"date-time":"2019-08-29T08:58:20Z","timestamp":1567069100000},"page":"841-850","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Deep neural network based speech enhancement using mono channel mask"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1010-2022","authenticated-orcid":false,"given":"Pallavi P.","family":"Ingale","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sanjay L.","family":"Nalbalwar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,8,29]]},"reference":[{"key":"9627_CR1","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1016\/j.csl.2017.02.005","volume":"46","author":"H Barfuss","year":"2017","unstructured":"Barfuss, H., Huemmer, C., Schwarz, A., & Kellermann, W. (2017). Robust coherence-based spectral enhancement for speech recognition in adverse real-world environments. Computer Speech & Language, 46, 388\u2013400.","journal-title":"Computer Speech & Language"},{"key":"9627_CR2","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/978-3-642-35289-8_26","volume-title":"Neural networks: Tricks of the trade","author":"Y Bengio","year":"2012","unstructured":"Bengio, Y. (2012). Practical recommendations for gradient-based training of deep architectures. Neural networks: Tricks of the trade (pp. 437\u2013478). Berlin: Springer."},{"key":"9627_CR3","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1016\/j.csl.2015.09.001","volume":"36","author":"S Chehrehsa","year":"2016","unstructured":"Chehrehsa, S., & Moir, T. J. (2016). Speech enhancement using maximum a-posteriori and gaussian mixture models for speech and noise periodogram estimation. Computer Speech & Language, 36, 58\u201371.","journal-title":"Computer Speech & Language"},{"issue":"5","key":"9627_CR4","doi-asserted-by":"publisher","first-page":"1085","DOI":"10.1109\/TASLP.2017.2687829","volume":"25","author":"M Delfarah","year":"2017","unstructured":"Delfarah, M., & Wang, D. (2017). Features for masking-based monaural speech separation in reverberant conditions. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 25(5), 1085\u20131094.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"6","key":"9627_CR5","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASSP.1984.1164453","volume":"32","author":"Y Ephraim","year":"1984","unstructured":"Ephraim, Y., & Malah, D. (1984). Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator. IEEE Transactions on Acoustics, Speech, and Signal Processing, 32(6), 1109\u20131121.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9627_CR6","unstructured":"F\u00e9votte, C., Gribonval, R., & Vincent, E. (2005). Bss_eval toolbox user guide-revision 2.0."},{"key":"9627_CR7","unstructured":"Garofolo, J.\u00a0S., Lamel, L.\u00a0F., Fisher, W.\u00a0M., Fiscus, J.\u00a0G., & Pallett, D.\u00a0S. (1993). Darpa timit acoustic-phonetic continous speech corpus cd-rom. nist speech disc 1-1.1. NASA STI\/Recon technical report n,\u00a093."},{"issue":"5","key":"9627_CR8","doi-asserted-by":"publisher","first-page":"3475","DOI":"10.1121\/1.4754541","volume":"132","author":"K Han","year":"2012","unstructured":"Han, K., & Wang, D. (2012). A classification based approach to speech segregation. The Journal of the Acoustical Society of America, 132(5), 3475\u20133483.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"4","key":"9627_CR9","doi-asserted-by":"publisher","first-page":"450","DOI":"10.1109\/LSP.2004.824017","volume":"11","author":"MK Hasan","year":"2004","unstructured":"Hasan, M. K., Salahuddin, S., & Khan, M. R. (2004). A modified a priori snr for speech enhancement using spectral subtraction rules. IEEE Signal Processing Letters, 11(4), 450\u2013453.","journal-title":"IEEE Signal Processing Letters"},{"key":"9627_CR10","first-page":"485","volume-title":"Topics in acoustic echo and noise control","author":"G Hu","year":"2006","unstructured":"Hu, G., & Wang, D. (2006). An auditory scene analysis approach to monaural speech segregation. Topics in acoustic echo and noise control (pp. 485\u2013515). Berlin: Springer."},{"issue":"2","key":"9627_CR11","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1109\/TASL.2006.881700","volume":"15","author":"G Hu","year":"2007","unstructured":"Hu, G., & Wang, D. (2007). Auditory segmentation based on onset and offset analysis. IEEE Transactions on Audio, Speech, and Language Processing, 15(2), 396\u2013405.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"2","key":"9627_CR12","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/s10772-018-9509-6","volume":"21","author":"PP Ingale","year":"2018","unstructured":"Ingale, P. P., & Nalbalwar, S. L. (2018). Singing voice separation using mono-channel mask. International Journal of Speech Technology, 21(2), 309\u2013318.","journal-title":"International Journal of Speech Technology"},{"issue":"11","key":"9627_CR13","doi-asserted-by":"publisher","first-page":"1800","DOI":"10.1109\/TASLP.2015.2443983","volume":"23","author":"MT Islam","year":"2015","unstructured":"Islam, M. T., Shahnaz, C., Zhu, W.-P., & Ahmad, M. O. (2015). Speech enhancement based on student t modeling of teager energy operated perceptual wavelet packet coefficients and a custom thresholding function. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 23(11), 1800\u20131811.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"key":"9627_CR14","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1016\/j.dsp.2017.12.002","volume":"74","author":"TG Kang","year":"2018","unstructured":"Kang, T. G., Shin, J. W., & Kim, N. S. (2018). Dnn-based monaural speech enhancement with temporal and spectral variations equalization. Digital Signal Processing, 74, 102\u2013110.","journal-title":"Digital Signal Processing"},{"issue":"3","key":"9627_CR15","doi-asserted-by":"publisher","first-page":"1486","DOI":"10.1121\/1.3184603","volume":"126","author":"G Kim","year":"2009","unstructured":"Kim, G., Lu, Y., Hu, Y., & Loizou, P. C. (2009). An algorithm that improves speech intelligibility in noise for normal-hearing listeners. The Journal of the Acoustical Society of America, 126(3), 1486\u20131494.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"6","key":"9627_CR16","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1016\/j.specom.2008.01.003","volume":"50","author":"Y Lu","year":"2008","unstructured":"Lu, Y., & Loizou, P. C. (2008). A geometric approach to spectral subtraction. Speech Communication, 50(6), 453\u2013466.","journal-title":"Speech Communication"},{"key":"9627_CR17","unstructured":"Mohammadiha, N., Taghia, J., & Leijon, A. (2012). Single channel speech enhancement using bayesian nmf with recursive temporal updates of prior distributions. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), 2012 (pp. 4561\u20134564). IEEE."},{"key":"9627_CR18","unstructured":"Polikar, R. (1996). The wavelet tutorial."},{"key":"9627_CR19","unstructured":"Recommendation, I.-T. (2001). Perceptual evaluation of speech quality (pesq): An objective method for end-to-end speech quality assessment of narrow-band telephone networks and speech codecs. Rec. ITU-T P. 862."},{"key":"9627_CR20","unstructured":"Tseng, H.-W., Hong, M., & Luo, Z.-Q. (2015). Combining sparse nmf with deep neural network: A new classification-based approach for speech enhancement. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), 2015 (pp. 2145\u20132149). IEEE."},{"key":"9627_CR21","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1007\/0-387-22794-6_12","volume-title":"Speech separation by humans and machines","author":"D Wang","year":"2005","unstructured":"Wang, D. (2005). On ideal binary mask as the computational goal of auditory scene analysis. Speech separation by humans and machines (pp. 181\u2013197). New York: Springer."},{"issue":"12","key":"9627_CR22","doi-asserted-by":"publisher","first-page":"1849","DOI":"10.1109\/TASLP.2014.2352935","volume":"22","author":"Y Wang","year":"2014","unstructured":"Wang, Y., Narayanan, A., & Wang, D. (2014). On training targets for supervised speech separation. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 22(12), 1849\u20131858.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"key":"9627_CR23","unstructured":"Wang, Y., & Wang, D. (2014). A structure-preserving training target for supervised speech separation. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), 2014 (pp. 6107\u20136111). IEEE."},{"key":"9627_CR24","unstructured":"Wang, Z., Sha, F. (2014). Discriminative non-negative matrix factorization for single-channel speech separation. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), 2014 (pp. 3749\u20133753). IEEE."},{"key":"9627_CR25","unstructured":"Wilson, K. W., Raj, B., Smaragdis, P., & Divakaran, A. (2008). Speech denoising using nonnegative matrix factorization with priors. In: IEEE international conference on acoustics, speech and signal processing, 2008. ICASSP (2008) (pp. 4029\u20134032). IEEE."},{"issue":"1","key":"9627_CR26","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1186\/1687-4722-2013-2","volume":"2013","author":"W Yu","year":"2013","unstructured":"Yu, W., Jiajun, L., Ning, C., & Wenhao, Y. (2013). Improved monaural speech segregation based on computational auditory scene analysis. EURASIP Journal on Audio, Speech, and Music Processing, 2013(1), 2.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"issue":"4","key":"9627_CR27","doi-asserted-by":"publisher","first-page":"836","DOI":"10.1109\/TASLP.2014.2308398","volume":"22","author":"X Zhao","year":"2014","unstructured":"Zhao, X., Wang, Y., & Wang, D. (2014). Robust speaker identification in noisy and reverberant conditions. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 22(4), 836\u2013845.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-019-09627-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-019-09627-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-019-09627-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,27]],"date-time":"2020-08-27T23:15:20Z","timestamp":1598570120000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-019-09627-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,8,29]]},"references-count":27,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["9627"],"URL":"https:\/\/doi.org\/10.1007\/s10772-019-09627-4","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2019,8,29]]},"assertion":[{"value":"25 January 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 August 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 August 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}