{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:19:47Z","timestamp":1775229587602,"version":"3.50.1"},"reference-count":80,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Science Fund of China","award":["61571435"],"award-info":[{"award-number":["61571435"]}]},{"name":"IACAS Yong Elite Researcher Project","award":["QNYC201813"],"award-info":[{"award-number":["QNYC201813"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/taslp.2021.3079813","type":"journal-article","created":{"date-parts":[[2021,5,14]],"date-time":"2021-05-14T19:51:55Z","timestamp":1621021915000},"page":"1829-1843","source":"Crossref","is-referenced-by-count":176,"title":["Two Heads are Better Than One: A Two-Stage Complex Spectral Mapping Approach for Monaural Speech Enhancement"],"prefix":"10.1109","volume":"29","author":[{"given":"Andong","family":"Li","sequence":"first","affiliation":[]},{"given":"Wenzhe","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5656-994X","authenticated-orcid":false,"given":"Chengshi","family":"Zheng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6318-8803","authenticated-orcid":false,"given":"Cunhang","family":"Fan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4170-0076","authenticated-orcid":false,"given":"Xiaodong","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref73","first-page":"2031","article-title":"Generative adversarial networks based black-box metric scores optimization for speech enhancement","author":"fu","year":"0","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462068"},{"key":"ref71","first-page":"2487","article-title":"PoCoNet: Better speech enhancement with frequency-positional embeddings, semi-supervised conversational data, and biased loss","author":"isik","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref70","article-title":"Exploring the Best Loss Function for Dnn-Based Low-Latency Speech Enhancement With Temporal Convolutional Networks","author":"koyama","year":"2020"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1169"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/528"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462417"},{"key":"ref39","article-title":"Two heads are better than one: a two-stage approach for monaural noise reduction in the complex domain","author":"li","year":"2020"},{"key":"ref75","article-title":"Multi-Domain Processing Via Hybrid Denoising Networks for Speech Enhancement","author":"kim","year":"2018"},{"key":"ref38","article-title":"Multi-Scale Context Aggregation by Dilated Convolutions","author":"yu","year":"2015"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-24"},{"key":"ref79","article-title":"ITU-T Recommendation P.862.2: Wideband Extension to Recommendation P. 862 for the Assessment of Wideband Telephone Networks and Speech Codecs","year":"2007","journal-title":"International Telecommunication Union"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2982029"},{"key":"ref32","first-page":"769","article-title":"A time-domain monaural speech enhancement with feedback learning","author":"li","year":"0","journal-title":"Proc Asia-Pacific Signal Inf Process Assoc (APSIPA)"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1513"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107347"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682783"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2998279"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053188"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.941023"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74494-8_69"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2585878"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495701"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref64","author":"benesty","year":"2007","journal-title":"Springer Handbook of Speech Processing"},{"key":"ref27","first-page":"9458","article-title":"Phasen: A phase-and-harmonics-aware speech enhancement network","author":"yin","year":"0","journal-title":"Proc Assoc Advancement Artificial Intelligence (AAAI)"},{"key":"ref65","article-title":"Phase-Aware Speech Enhancement With Deep Complex U-Net","author":"choi","year":"2019"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-224"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3064421"},{"key":"ref68","first-page":"2492","article-title":"The interspeech 2020 deep noise suppression challenge: Datasets, subjective speech quality and testing framework","author":"reddy","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2631"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-8644-3"},{"key":"ref1","author":"yu","year":"2012","journal-title":"Automatic Speech Recognition"},{"key":"ref20","first-page":"9633","article-title":"Listening to sounds of silence for speech denoising","author":"xu","year":"0","journal-title":"Proc NeurIPS"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"ref21","first-page":"334","article-title":"Wave-U-Net: A multi-scale neural network for end-to-end audio source separation","author":"stoller","year":"0","journal-title":"Proc ISMIR"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2409"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2915167"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2019.8937253"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP49062.2020.9231900"},{"key":"ref50","first-page":"5998","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"0","journal-title":"Proc NeurIPS"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3115\/1075527.1075614"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683855"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2913512"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2782"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2537"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1121\/1.4948445"},{"key":"ref54","article-title":"Automatic Differentiation in Pytorch","author":"paszke","year":"0","journal-title":"Proc NeurIPS Autodiff Workshop"},{"key":"ref53","article-title":"Adam: A Method for Stochastic Optimization","author":"kingma","year":"2014"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(93)90095-3"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2352935"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2364452"},{"key":"ref40","first-page":"6628","article-title":"ICASSP 2021 deep noise suppression challenge: Decoupling magnitude and phase optimization with a two-stage deep network","author":"li","year":"0","journal-title":"Proc ICASSP"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1121\/1.1610463"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-55016-4_12"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2876171"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2628641"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1982.1163920"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2010.12.003"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2512042"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2955276"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.911054"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1201\/b14529"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1044\/jshr.3602.228"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2013.06.001"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1979.1163209"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1985.1164550"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164453"},{"key":"ref49","article-title":"Instance Normalization: The Missing Ingredient for Fast Stylization","author":"ulyanov","year":"2016"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2987429"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1405"},{"key":"ref47","first-page":"315","article-title":"Deep sparse rectifier neural networks","author":"glorot","year":"0","journal-title":"Proc 14th Int Conf Artif Intell Statist"},{"key":"ref42","first-page":"933","article-title":"Language modeling with gated convolutional networks","author":"dauphin","year":"0","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054172"},{"key":"ref43","first-page":"1747","article-title":"Pixel recurrent neural networks","author":"oord","year":"0","journal-title":"Proc Int Conf Mach Learn (ICML)"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9289074\/09431717.pdf?arnumber=9431717","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:54:01Z","timestamp":1652194441000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9431717\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":80,"URL":"https:\/\/doi.org\/10.1109\/taslp.2021.3079813","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}