{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:12:57Z","timestamp":1750219977503,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T00:00:00Z","timestamp":1663891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,9,23]]},"DOI":"10.1145\/3573942.3573975","type":"proceedings-article","created":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T23:45:42Z","timestamp":1684280742000},"page":"956-962","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Solving Size and Performance Dilemma by Reversible and Invertible Recurrent Network for Speech Enhancement"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8459-0412","authenticated-orcid":false,"given":"Dengfeng","family":"Ke","sequence":"first","affiliation":[{"name":"Beijing Language and Culture University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6765-4808","authenticated-orcid":false,"given":"Yanlu","family":"Xie","sequence":"additional","affiliation":[{"name":"Beijing Language and Culture University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1603-3136","authenticated-orcid":false,"given":"Jinsong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing Language and Culture University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0770-1582","authenticated-orcid":false,"given":"Liangjie","family":"Huang","sequence":"additional","affiliation":[{"name":"Beijing Language and Culture University, China"}]}],"member":"320","published-online":{"date-parts":[[2023,5,16]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746171"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Defossez A. Synnaeve G. and Adi Y. 2020. Real time speech enhancement in the waveform domain. arXiv preprint arXiv:2006.12847 (2020).","DOI":"10.21437\/Interspeech.2020-2409"},{"key":"e_1_3_2_1_3_1","first-page":"2461","volume-title":"INTERSPEECH","author":"Deng F.","year":"2020","unstructured":"Deng, F., Jiang, T., Wang, X., Zhang, C., and Li, Y. 2020. Naagn: Noise-aware attention-gated network for speech enhancement. In INTERSPEECH (2020), pp. 2457\u20132461."},{"key":"e_1_3_2_1_4_1","unstructured":"Dinh L. Sohl-Dickstein J. and Bengio S. 2016. Density estimation using real nvp. arXiv preprint arXiv:1605.08803 (2016)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.911054"},{"key":"e_1_3_2_1_6_1","unstructured":"Ke D. Zhang J. Xie Y. Xu Y. and Lin B. 2021. Speech enhancement using separable polling attention and global layer normalization followed with prelu. arXiv preprint arXiv:2105.02509 (2021)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053591"},{"key":"e_1_3_2_1_8_1","volume-title":"Glow: Generative flow with invertible 1x1 convolutions. Advances in neural information processing systems 31","author":"Kingma D. P.","year":"2018","unstructured":"Kingma, D. P., and Dhariwal, P. 2018. Glow: Generative flow with invertible 1x1 convolutions. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2021.108499"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404793"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054266"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747029"},{"key":"e_1_3_2_1_13_1","first-page":"862","volume-title":"Wideband extension to recommendation","year":"2005","unstructured":"P.862.2, I.-T. R. 2005. Wideband extension to recommendation p.862 for the assessment of wideband telephone networks and speech codecs. International Telecommunication Union, CH-Geneva 1 (2005)."},{"key":"e_1_3_2_1_14_1","volume-title":"Segan: Speech enhancement generative adversarial network. arXiv preprint arXiv:1703.09452","author":"Pascual S.","year":"2017","unstructured":"Pascual, S., Bonafonte, A., and Serra, J. 2017. Segan: Speech enhancement generative adversarial network. arXiv preprint arXiv:1703.09452 (2017)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747055"},{"key":"e_1_3_2_1_16_1","volume-title":"Deepfilternet2: Towards real-time speech enhancement on embedded devices for full-band audio. arXiv preprint arXiv:2205.05474","author":"Schr\u00f6ter H.","year":"2022","unstructured":"Schr\u00f6ter, H., Rosenkranz, T., Maier, A., Deepfilternet2: Towards real-time speech enhancement on embedded devices for full-band audio. arXiv preprint arXiv:2205.05474 (2022)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495701"},{"key":"e_1_3_2_1_18_1","first-page":"035081","volume-title":"Proceedings of Meetings on Acoustics ICA2013","volume":"19","author":"Thiemann J.","year":"2013","unstructured":"Thiemann, J., Ito, N., and Vincent, E. 2013. The diverse environments multi-channel acoustic noise database (demand): A database of multichannel environmental noise recordings. In Proceedings of Meetings on Acoustics ICA2013 (2013), vol. 19, Acoustical Society of America, p. 035081."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-24"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414140"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSDA.2013.6709856"},{"key":"e_1_3_2_1_22_1","first-page":"7102","volume-title":"ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Wang K.","year":"2021","unstructured":"Wang, K., He, B., and Zhu, W.-P. 2021. Tstnn: Two-stage transformer based neural network for speech enhancement in the time domain. In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2021), IEEE, pp. 7098\u20137102."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Wang Y. Narayanan A. and Wang D. 2014. On training targets for supervised speech separation. IEEE\/ACM transactions on audio speech and language processing 22 12 (2014) 1849\u20131858.","DOI":"10.1109\/TASLP.2014.2352935"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Williamson D. S. Wang Y. and Wang D. 2015. Complex ratio masking for monaural speech separation. IEEE\/ACM transactions on audio speech and language processing 24 3 (2015) 483\u2013492.","DOI":"10.1109\/TASLP.2015.2512042"},{"key":"e_1_3_2_1_25_1","first-page":"9465","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"34","author":"Yin D.","year":"2020","unstructured":"Yin, D., Luo, C., Xiong, Z., and Zeng, W. 2020. Phasen: A phase-and-harmonics-aware speech enhancement network. In Proceedings of the AAAI Conference on Artificial Intelligence (2020), vol. 34, pp. 9458\u20139465."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Graves Alex. 2012. Long short-term memory. Supervised sequence labelling with recurrent neural networks (2012): 37-45.","DOI":"10.1007\/978-3-642-24797-2_4"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Cho K. Van Merri\u00ebnboer B. Gulcehre C. Bahdanau D. Bougares F. Schwenk H. & Bengio Y. 2014. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078.","DOI":"10.3115\/v1\/D14-1179"}],"event":{"name":"AIPR 2022: 2022 5th International Conference on Artificial Intelligence and Pattern Recognition","acronym":"AIPR 2022","location":"Xiamen China"},"container-title":["Proceedings of the 2022 5th International Conference on Artificial Intelligence and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3573975","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3573942.3573975","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:22Z","timestamp":1750182562000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3573975"}},"subtitle":["Solving Size and Performance Dilemma by Reversible and Invertible Recurrent Network for Speech Enhancement"],"short-title":[],"issued":{"date-parts":[[2022,9,23]]},"references-count":27,"alternative-id":["10.1145\/3573942.3573975","10.1145\/3573942"],"URL":"https:\/\/doi.org\/10.1145\/3573942.3573975","relation":{},"subject":[],"published":{"date-parts":[[2022,9,23]]},"assertion":[{"value":"2023-05-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}