{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T04:08:30Z","timestamp":1771474110739,"version":"3.50.1"},"reference-count":81,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["MOST 109-2221-E-001-022-"],"award-info":[{"award-number":["MOST 109-2221-E-001-022-"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["108-2628-E-001-002-MY3"],"award-info":[{"award-number":["108-2628-E-001-002-MY3"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["107-2221-E-001-012-MY2"],"award-info":[{"award-number":["107-2221-E-001-012-MY2"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/taslp.2020.3025638","type":"journal-article","created":{"date-parts":[[2020,10,5]],"date-time":"2020-10-05T21:29:42Z","timestamp":1601933382000},"page":"2756-2769","source":"Crossref","is-referenced-by-count":31,"title":["Speech Enhancement Based on Denoising Autoencoder With Multi-Branched Encoders"],"prefix":"10.1109","volume":"28","author":[{"given":"Cheng","family":"Yu","sequence":"first","affiliation":[]},{"given":"Ryandhimas E.","family":"Zezario","sequence":"additional","affiliation":[]},{"given":"Syu-Siang","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jonathan","family":"Sherman","sequence":"additional","affiliation":[]},{"given":"Yi-Yen","family":"Hsieh","sequence":"additional","affiliation":[]},{"given":"Xugang","family":"Lu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3599-5071","authenticated-orcid":false,"given":"Hsin-Min","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6956-0418","authenticated-orcid":false,"given":"Yu","family":"Tsao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","volume":"87","author":"vetterli","year":"1995","journal-title":"Wavelets and Subband Coding"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2012.6252809"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2250961"},{"key":"ref70","article-title":"Perception optimized deep denoising autoencoders for speech enhancement","author":"xia","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref76","article-title":"100nonspeechenvironmentalsounds2004[online]","author":"hu","year":"2004"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2108"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2913512"},{"key":"ref74","author":"hayter","year":"2012","journal-title":"Probability Statis Eng Scientists"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2821903"},{"key":"ref75","article-title":"Statistics: The art and science of learning from data (mystatlab series)","author":"agresti","year":"2008"},{"key":"ref78","article-title":"Speech recognition (version 3.6) [software]","author":"zhang","year":"0","journal-title":"Proc ICCC"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.911054"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2013.2291240"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178061"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2352935"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854963"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-211"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1284"},{"key":"ref35","article-title":"Speech enhancement based on deep denoising autoencoder","author":"lu","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref34","article-title":"Experiments on deep learning for speech denoising","author":"liu","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2817798"},{"key":"ref62","article-title":"Learning with ensembles: How overfitting can be useful","author":"sollich","year":"0","journal-title":"Proc NIPS"},{"key":"ref61","first-page":"1","article-title":"Ensemble learning for speech enhancement","author":"watanabe","year":"0","journal-title":"Proc WASPAA"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2628641"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2002.5743777"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016.2571727"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/1970392.1970395"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.3115\/1075527.1075614"},{"key":"ref66","article-title":"Development of taiwan mandarin hearing in noise test","author":"huang","year":"2005","journal-title":"Dept Speech Lang Pathol Audiol Nat Taipei Univ Nursing Health Sci"},{"key":"ref29","first-page":"774","article-title":"Complex ratio masking for monaural speech separation","volume":"14","author":"hu","year":"2006","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"ref67","first-page":"708","article-title":"Perceptual evaluation of speech quality (PESQ), an objective method for end-to-end speech quality assessment of narrowband telephone networks and speech codecs","author":"rix","year":"2001","journal-title":"International Telecommunication Union ITU-T Recommendation"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2014.02.001"},{"key":"ref2","article-title":"Robust automatic speech recognition: A bridge to practical applications","author":"li","year":"2015","journal-title":"Elsevier Orlando FL USA Academic"},{"key":"ref1","article-title":"Intelligibility metric based on a simple model of speech communication","author":"kuyk","year":"0","journal-title":"Proc IWAENC"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6287816"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518538"},{"key":"ref21","article-title":"Algorithms for non-negative matrix factorization","author":"lee","year":"0","journal-title":"Proc NIPS"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2598306"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2270369"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2017.2678458"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2004.1381036"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952121"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2017.8170055"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.845819"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45014-9_1"},{"key":"ref57","author":"chollet","year":"2015","journal-title":"Keras"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2011.2168604"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462155"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2955276"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2536478"},{"key":"ref52","article-title":"Multi-objective learning and mask-based post-processing for deep neural network based speech enhancement","author":"xu","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISPA.2009.5297793"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/51.765187"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/HSCMA.2017.7895577"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1159\/000094648"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2017.8282144"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1979.1163209"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.543199"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164453"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.12.088"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1985.1164550"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2002.5743782"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/89.902276"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1201\/b14529"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462581"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2528171"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1620"},{"key":"ref5","first-page":"2632","article-title":"Multichannel end-to-end speech recognition","author":"ochiai","year":"0","journal-title":"Proc ICML"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MSPEC.2017.7864754"},{"key":"ref49","article-title":"Ensemble modeling of denoising autoencoder for speech spectrum restoration","author":"lu","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1496"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1121\/1.5055562"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2474388"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2016231"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462662"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"316","DOI":"10.1109\/TCBB.2010.96","article-title":"Ensemble learning with active example selection for imbalanced biomedical data classification","volume":"8","author":"oh","year":"2011","journal-title":"IEEE\/ACM Trans Comput Biol Bioinf"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref41","article-title":"Speech enhancement and recognition using multi-task learning of long short-term memory recurrent neural networks","author":"chen","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref44","article-title":"Active bias: Training more accurate neural networks by emphasizing high variance samples","author":"chang","year":"0","journal-title":"Proc NIPS"},{"key":"ref43","article-title":"Self-paced learning for latent variable models","author":"kumar","year":"0","journal-title":"Proc NIPS"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/8938144\/09212562.pdf?arnumber=9212562","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T01:07:34Z","timestamp":1641949654000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9212562\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":81,"URL":"https:\/\/doi.org\/10.1109\/taslp.2020.3025638","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}