{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T10:54:57Z","timestamp":1730199297344,"version":"3.28.0"},"reference-count":48,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/asru46091.2019.9003973","type":"proceedings-article","created":{"date-parts":[[2020,2,21]],"date-time":"2020-02-21T07:01:33Z","timestamp":1582268493000},"page":"988-995","source":"Crossref","is-referenced-by-count":6,"title":["Power-Law Nonlinearity with Maximally Uniform Distribution Criterion for Improved Neural Network Training in Automatic Speech Recognition"],"prefix":"10.1109","author":[{"given":"Chanwoo","family":"Kim","sequence":"first","affiliation":[]},{"given":"Mehul","family":"Kumar","sequence":"additional","affiliation":[]},{"given":"Kwangyoun","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Dhananjaya","family":"Gowda","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953177"},{"key":"ref38","first-page":"5206","article-title":"Lib-rispeech: An asr corpus based on public domain audio books","author":"panayotov","year":"0","journal-title":"IEEE Int Conf Acoust Speech and Signal Processing"},{"key":"ref33","first-page":"188","article-title":"Power function-based power distribution normalization algorithm for robust speech recognition","year":"2009","journal-title":"IEEE Automatic Speech Recognition and Understanding Workshop"},{"key":"ref32","first-page":"2598","article-title":"Robust signal-to-noise ratio estimation based on waveform amplitude distribution analysis","year":"2008","journal-title":"INTERSPEECH-2008"},{"key":"ref31","first-page":"28","article-title":"Feature extraction for robust speech recognition using a power-law nonlinearity and power-bias subtraction","year":"2009","journal-title":"INTERSPEECH-2009"},{"key":"ref30","first-page":"4574","article-title":"Feature extraction for robust speech recognition based on maximizing the sharpness of the power distribution and on power flooring","year":"2010","journal-title":"IEEE Int Conf on Acoustics Speech and Signal Processing"},{"key":"ref37","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"International Conference on Learning Representations ICLR 2015"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004027"},{"key":"ref35","article-title":"Monotonic chunkwise attention","author":"chiu","year":"2018","journal-title":"International Conference on Learning Representations"},{"journal-title":"Probability random variables and stochastic processes","year":"2002","author":"papoulis","key":"ref34"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"399","DOI":"10.21437\/Interspeech.2017-234","article-title":"Acoustic modeling for Google Home","author":"li","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1616"},{"journal-title":"Samsung bixby","year":"0","key":"ref11"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"3707","DOI":"10.21437\/Interspeech.2017-1566","article-title":"Neural speech recognizer: Acoustic-to-word lstm model for large vocabulary speech recognition","author":"soltau","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639610"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"2378","DOI":"10.21437\/Interspeech.2016-1386","article-title":"Two-stage data augmentation for low-resourced speech recognition","author":"hartmann","year":"2016","journal-title":"Proc Interspeech 2016"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"1469","DOI":"10.1109\/TASLP.2015.2438544","article-title":"Data augmentation for deep neural network acoustic modeling","volume":"23","author":"cui","year":"2015","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462223"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3227"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2009.5373230"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2545928"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"journal-title":"Signal Processing for Robust Speech Recognition Motivated by Auditory Processing","year":"2010","author":"kim","key":"ref27"},{"key":"ref3","article-title":"Improving the speed of neural networks on CPUs","author":"vanhoucke","year":"2011","journal-title":"Proc NIPS Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1938"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6288820"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1423"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"939","DOI":"10.21437\/Interspeech.2017-233","article-title":"A comparison of sequence-to-sequence models for speech recognition","author":"prabhavalkar","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref2","article-title":"Feature learning in deep neural networks - studies on speech recognition tasks","author":"yu","year":"2013","journal-title":"Proceedings of the International Conference on Learning Representations"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"379","DOI":"10.21437\/Interspeech.2017-1510","article-title":"Generation of large-scale simulated utterances in virtual rooms to train deep-neural networks for far-field speech recognition in google home","author":"kim","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639100"},{"key":"ref46","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"12th USENIX Symp Operating Systems Design and Implementation (OSDI 16)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2566"},{"key":"ref45","first-page":"iii-1310","article-title":"On the difficulty of training recurrent neural networks","author":"pascanu","year":"2013","journal-title":"Proceedings of the 30th International Conference on International Conference on Machine Learning - Volume 28 ser ICML'13 JMLR org"},{"key":"ref48","first-page":"18","article-title":"librosa: Audio and music signal analysis in python","author":"mcfee","year":"0","journal-title":"Proceedings of the 14th Python in Science Conference"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1121\/1.380738"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003976"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2672401"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3216"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462269"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1780"},{"key":"ref23","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","volume":"48","author":"amodei","year":"2016","journal-title":"Proceedings of The 33rd International Conference on Machine Learning ser Proceedings of Machine Learning Research"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-1"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003936"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"2734","DOI":"10.21437\/Interspeech.2014-157","article-title":"Robust speech recognition using temporal masking and thresholding algorithma","author":"kim","year":"2014","journal-title":"Proc INTERSPEECH 2014"}],"event":{"name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2019,12,14]]},"location":"SG, Singapore","end":{"date-parts":[[2019,12,18]]}},"container-title":["2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8985378\/9003727\/09003973.pdf?arnumber=9003973","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T14:44:17Z","timestamp":1658155457000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9003973\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/asru46091.2019.9003973","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}