{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T12:16:45Z","timestamp":1769516205472,"version":"3.49.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6,6]]},"DOI":"10.1109\/icassp39728.2021.9414505","type":"proceedings-article","created":{"date-parts":[[2021,5,13]],"date-time":"2021-05-13T19:53:45Z","timestamp":1620935625000},"page":"6014-6018","source":"Crossref","is-referenced-by-count":20,"title":["Dynamic Sparsity Neural Networks for Automatic Speech Recognition"],"prefix":"10.1109","author":[{"given":"Zhaofeng","family":"Wu","sequence":"first","affiliation":[{"name":"University of Washington,Paul G. Allen School of Computer Science &#x0026; Engineering"}]},{"given":"Ding","family":"Zhao","sequence":"additional","affiliation":[{"name":"Google"}]},{"given":"Qiao","family":"Liang","sequence":"additional","affiliation":[{"name":"Google"}]},{"given":"Jiahui","family":"Yu","sequence":"additional","affiliation":[{"name":"Google"}]},{"given":"Anmol","family":"Gulati","sequence":"additional","affiliation":[{"name":"Google"}]},{"given":"Ruoming","family":"Pang","sequence":"additional","affiliation":[{"name":"Google"}]}],"member":"263","reference":[{"key":"ref33","article-title":"Dynamic deep neural networks: Optimizing accuracy-efficiency trade-offs by selective execution","author":"liu","year":"2018","journal-title":"Proc Of AAAI"},{"key":"ref32","article-title":"Stabilizing the lottery ticket hypothesis","author":"frankle","year":"2019"},{"key":"ref31","article-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks","author":"frankle","year":"2019","journal-title":"Proc of ICLR"},{"key":"ref30","article-title":"Second order derivatives for network pruning: Optimal brain surgeon","author":"hassibi","year":"1993","journal-title":"Proc of NIPS"},{"key":"ref36","article-title":"Network slimming by slimmable networks: Towards one-shot architecture search for channel numbers","author":"yu","year":"2019"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K16-1029"},{"key":"ref34","article-title":"Multi-scale dense networks for resource efficient image classification","author":"huang","year":"2018","journal-title":"Proc of ICLR"},{"key":"ref10","article-title":"Rethinking the value of network pruning","author":"liu","year":"2019","journal-title":"Proc of ICLR"},{"key":"ref11","article-title":"SNIP: Single-shot network pruning based on connection sensitivity","author":"lee","year":"2019","journal-title":"Proc of ICLR"},{"key":"ref12","article-title":"Dynamic network surgery for efficient DNNs","author":"guo","year":"2016","journal-title":"Proc of NIPS"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/309"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_41"},{"key":"ref16","article-title":"Regularization of neural networks using dropconnect","author":"wan","year":"2013","journal-title":"Proc of ICML"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.541"},{"key":"ref18","article-title":"Learning-compression&#x201D; algorithms for neural net pruning","author":"carreira-perpin\u00e1n","year":"2018","journal-title":"Proc of CVPR"},{"key":"ref19","article-title":"Efficient knowledge distillation for rnn-transducer models","author":"panchapagesan","year":"2020"},{"key":"ref28","article-title":"A closer look at memorization in deep networks","author":"arpit","year":"2017","journal-title":"Proc of ICML"},{"key":"ref4","article-title":"AI benchmark: Running deep neural networks on android smartphones","author":"ignatov","year":"2018","journal-title":"Proc of ECCV"},{"key":"ref27","article-title":"Understanding deep learning requires rethinking generalization","author":"zhang","year":"2017"},{"key":"ref3","article-title":"Optimizing speech recognition for the edge","author":"shangguan","year":"2019"},{"key":"ref6","article-title":"What&#x2019;s hidden in a randomly weighted neural network?","author":"ramanujan","year":"2020","journal-title":"Proc of CVPR"},{"key":"ref29","article-title":"Optimal brain damage","author":"lecun","year":"1990","journal-title":"Proc of NIPS"},{"key":"ref5","article-title":"Deconstructing lottery tickets: Zeros, signs, and the supermask","author":"zhou","year":"2019","journal-title":"Proc of NeurIPS"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00189"},{"key":"ref7","article-title":"Slimmable neural networks","author":"yu","year":"2019","journal-title":"Proc of ICLR"},{"key":"ref2","article-title":"To prune, or not to prune: Exploring the efficacy of pruning for model compression","author":"zhu","year":"2018"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref1","article-title":"Learning both weights and connections for efficient neural network","author":"han","year":"2015","journal-title":"Proc of NIPS"},{"key":"ref20","article-title":"Shallow-fusion end-to-end contextual biasing","author":"zhao","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref22","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"Proc of ICML"},{"key":"ref21","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1510","article-title":"Generated of large-scale simulated utterances in virtual rooms to train deep-neural networks for far-field speech recognition in Google Home","author":"kim","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref24","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc of ICLR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268917"},{"key":"ref26","article-title":"Block-sparse recurrent neural networks","author":"narang","year":"2017"},{"key":"ref25","article-title":"Tensorflow: Large-scale machine learning on heterogeneous distributed systems","author":"abadi","year":"2015"}],"event":{"name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Toronto, ON, Canada","start":{"date-parts":[[2021,6,6]]},"end":{"date-parts":[[2021,6,11]]}},"container-title":["ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9413349\/9413350\/09414505.pdf?arnumber=9414505","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T00:18:38Z","timestamp":1659485918000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9414505\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,6]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icassp39728.2021.9414505","relation":{},"subject":[],"published":{"date-parts":[[2021,6,6]]}}}