{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T10:48:08Z","timestamp":1725706088251},"reference-count":35,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383626","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T20:46:54Z","timestamp":1616705214000},"page":"209-214","source":"Crossref","is-referenced-by-count":2,"title":["Frame-Level Specaugment for Deep Convolutional Neural Networks in Hybrid ASR Systems"],"prefix":"10.1109","author":[{"given":"Xinwei","family":"Li","sequence":"first","affiliation":[]},{"given":"Yuanyuan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xiaodan","family":"Zhuang","sequence":"additional","affiliation":[]},{"given":"Daben","family":"Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1986.1169179"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2840"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053889"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1819"},{"journal-title":"Ph D thesis","year":"2004","author":"povey","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.940763"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1510","article-title":"Generation of large-scale simulated utterances in virtual rooms to train deep-neural networks for far-field speech recognition in Google Home","author":"kim","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SSD.2019.8893184"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682816"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053205"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053573"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003972"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472765"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"article-title":"Sequence Transduction with Recurrent Neural Net works","year":"2012","author":"graves","key":"ref4"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-233"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053973"},{"key":"ref6","article-title":"Vocal Tract Length Perturbation (VTLP) improves speech recognition","author":"jaitly","year":"2013","journal-title":"ICML Workshop on Deep Learning for Audio Speech and Language Processing"},{"year":"0","key":"ref29","article-title":"Quicknet"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"article-title":"Deep speech: Scaling up end-to-end speech recognition","year":"2014","author":"hannun","key":"ref8"},{"key":"ref7","first-page":"3586","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-251"},{"article-title":"Multilingual Graphemic Hybrid ASR with Massive Data Augmentation","year":"2020","author":"liu","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-619"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960445"},{"key":"ref21","first-page":"971","article-title":"Self-normalizing neural networks","author":"klambauer","year":"2017","journal-title":"Proc NeurIPS"},{"key":"ref24","article-title":"Sequence-discriminative training of deep neural networks","author":"vesely","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-3","article-title":"Scalable minimum Bayes risk training of deep neural network acoustic models using distributed Hessian-free optimization","author":"kingsbury","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref26","first-page":"4835","article-title":"Joint CTC-attention based end-to-end speech recognition using multi-task learning","author":"kim","year":"2017","journal-title":"Proc ICASSP"},{"key":"ref25","first-page":"5880","article-title":"Scalable training of deep learning ma chines by incremental block training with intra-block parallel optimization and blockwise model-update filtering","author":"chen","year":"2016","journal-title":"Proc ICASSP"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2021,1,19]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383626.pdf?arnumber=9383626","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,22]],"date-time":"2022-12-22T13:16:45Z","timestamp":1671715005000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383626\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383626","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}