{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:23:37Z","timestamp":1780356217139,"version":"3.54.1"},"reference-count":23,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"5","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2020,5,1]]},"DOI":"10.1587\/transinf.2019edl8183","type":"journal-article","created":{"date-parts":[[2020,4,30]],"date-time":"2020-04-30T22:13:50Z","timestamp":1588284830000},"page":"1195-1198","source":"Crossref","is-referenced-by-count":2,"title":["Orthogonal Gradient Penalty for Fast Training of Wasserstein GAN Based Multi-Task Autoencoder toward Robust Speech Recognition"],"prefix":"10.1587","volume":"E103.D","author":[{"given":"Chao-Yuan","family":"KAO","sequence":"first","affiliation":[{"name":"School of Electrical Engineering, Korea University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sangwook","family":"PARK","sequence":"additional","affiliation":[{"name":"School of Electrical and Computer Engineering, Johns Hopkins University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alzahra","family":"BADI","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, Korea University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"David K.","family":"HAN","sequence":"additional","affiliation":[{"name":"US Army Research Laboratory (ARL)"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hanseok","family":"KO","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, Korea University"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"publisher","unstructured":"[1] Y. Xu, J. Du, L.-R. Dai, and C.-H. Lee, \u201cA regression approach to speech enhancement based on deep neural networks,\u201d IEEE\/ACM Trans. Audio, Speech, Language Process., vol.23, no.1, pp.7-19, Jan. 2015. 10.1109\/taslp.2014.2364452","DOI":"10.1109\/TASLP.2014.2364452"},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] X. Feng, Y. Zhang, and J. Glass, \u201cSpeech feature denoising and dereverberation via deep autoencoders for noisy reverberant speech recognition,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.1759-1763, May 2014. 10.1109\/icassp.2014.6853900","DOI":"10.1109\/ICASSP.2014.6853900"},{"key":"3","doi-asserted-by":"crossref","unstructured":"[3] S.R. Park and J.W. Lee, \u201cA fully convolutional neural network for speech enhancement,\u201d Proc. Annual Conference of the International Speech Communication Association, INTERSPEECH, pp.1993-1997, 2017. 10.21437\/interspeech.2017-1465","DOI":"10.21437\/Interspeech.2017-1465"},{"key":"4","doi-asserted-by":"publisher","unstructured":"[4] B. Li and K.C. Sim, \u201cA spectral masking approach to noise-robust speech recognition using deep neural networks,\u201d IEEE Trans. Audio, Speech, Language Process., vol.22, no.8, pp.1296-1305, Aug. 2014. 10.1109\/taslp.2014.2329237","DOI":"10.1109\/TASLP.2014.2329237"},{"key":"5","doi-asserted-by":"publisher","unstructured":"[5] D. Wang and J. Chen, \u201cSupervised speech separation based on deep learning: An overview,\u201d IEEE\/ACM Trans. Audio, Speech, Language Process., vol.26, no.10, pp.1702-1726, Oct. 2018. 10.1109\/taslp.2018.2842159","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] A.L. Maas, Q.V. Le, T.M. O&apos;Neil, O. Vinyals, P. Nguyen, and A.Y. Ng, \u201cRecurrent neural networks for noise reduction in robust ASR,\u201d 13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012, pp.22-25, 2012.","DOI":"10.21437\/Interspeech.2012-6"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] S. Pascual, A. Bonafonte, and J. Serr\u00e0, \u201cSEGAN: Speech enhancement generative adversarial network,\u201d Proc. Annual Conference of the International Speech Communication Association, INTERSPEECH, vol.2017-August, no.D, pp.3642-3646, 2017. 10.21437\/interspeech.2017-1428","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"8","doi-asserted-by":"crossref","unstructured":"[8] A. Pandey and D. Wang, \u201cOn adversarial training and loss functions for speech enhancement,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.5414-5418, Sept. 2018. 10.1109\/icassp.2018.8462614","DOI":"10.1109\/ICASSP.2018.8462614"},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] C. Donahue, B. Li, and R. Prabhavalkar, \u201cExploring speech enhancement with generative adversarial networks for robust speech recognition,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.5024-5028, Sept, 2018. 10.1109\/icassp.2018.8462581","DOI":"10.1109\/ICASSP.2018.8462581"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] D. Michelsanti and Z.-H. Tan, \u201cConditional generative adversarial networks for speech enhancement and noise-robust speaker verification,\u201d Proc. Annual Conference of the International Speech Communication Association, INTERSPEECH, pp.2008-2012, 2017. 10.21437\/interspeech.2017-1620","DOI":"10.21437\/Interspeech.2017-1620"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] M. Mimura, S. Sakai, and T. Kawahara, \u201cCross-domain speech recognition using nonparallel corpora with cycle-consistent adversarial networks,\u201d 2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017, pp.134-140, 2018. 10.1109\/asru.2017.8268927","DOI":"10.1109\/ASRU.2017.8268927"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] T. Higuchi, K. Kinoshita, M. Delcroix, and T. Nakatani, \u201cAdversarial training for data-driven speech enhancement without parallel corpus,\u201d 2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017, pp.40-47, Dec. 2018. 10.1109\/asru.2017.8268914","DOI":"10.1109\/ASRU.2017.8268914"},{"key":"13","doi-asserted-by":"crossref","unstructured":"[13] H. Zhang, C. Liu, N. Inoue, and K. Shinoda, \u201cMulti-task autoencoder for noise-robust speech recognition,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.5599-5603, April 2018. 10.1109\/icassp.2018.8461446","DOI":"10.1109\/ICASSP.2018.8461446"},{"key":"14","unstructured":"[14] I. Gulrajani, F. Ahmed, M. Arjovsky, V. Dumoulin, and A. Courville, \u201cImproved training of Wasserstein GANs,\u201d Advances in Neural Information Processing Systems, pp.5768-5778, 2017."},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] C. Donahue, B. Li, and R. Prabhavalkar, \u201cExploring speech enhancement with generative adversarial networks for robust speech recognition,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.5024-5028, 2018. 10.1109\/icassp.2018.8462581","DOI":"10.1109\/ICASSP.2018.8462581"},{"key":"16","unstructured":"[16] X. Wei, B. Gong, Z. Liu, W. Lu, and L. Wang, \u201cImproving the improved training of Wasserstein GANs: A consistency term and its dual effect,\u201d International Conference on Learning Representations (ICLR), 2018."},{"key":"17","unstructured":"[17] H. Petzka, A. Fischer, and D. Lukovnicov, \u201cOn the regularization of Wasserstein GANs,\u201d International Conference on Learning Representations (ICLR), 2018."},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] L. Bottou, \u201cLarge-scale machine learning with stochastic gradient descent,\u201d Proc. COMPSTAT&apos;2010, eds. Y. Lechevallier and G. Saporta, ch. Large-Scal, pp.177-186, Physica-Verlag HD, Heidelberg, 2010. 10.1007\/978-3-7908-2604-3_16","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"19","unstructured":"[19] M. Arjovsky, S. Chintala, and L. Bottou, \u201cWasserstein GAN,\u201d International Conference on Machine Learning (ICML), Jan. 2017."},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] S.-X. Wen, J. Du, and C.-H. Lee, \u201cOn generating mixing noise signals with basis functions for simulating noisy speech and learning DNN-based speech enhancement models,\u201d IEEE International Workshop on Machine Learning for Signal Processing (MLSP), pp.1-6, Dec. 2017. 10.1109\/mlsp.2017.8168192","DOI":"10.1109\/MLSP.2017.8168192"},{"key":"21","unstructured":"[21] ITU, \u201cP.56 recommendation: Objective measurement of active speech level,\u201d 2011."},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] X. Lu, Y. Tsao, S. Matsuda, and C. Hori, \u201cSpeech enhancement based on deep denoising autoencoder,\u201d Proc. Annual Conference of the International Speech Communication Association, INTERSPEECH, pp.436-440, 2013.","DOI":"10.21437\/Interspeech.2013-130"},{"key":"23","unstructured":"[23] R. Pascanu, T. Mikolov, and Y. Bengio, \u201cOn the difficulty of training recurrent neural networks,\u201d 30th International Conference on Machine Learning, ICML 2013, pp.2347-2355, 2013."}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E103.D\/5\/E103.D_2019EDL8183\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T18:30:35Z","timestamp":1666463435000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E103.D\/5\/E103.D_2019EDL8183\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5,1]]},"references-count":23,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2020]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2019edl8183","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,5,1]]}}}