{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T10:19:49Z","timestamp":1773656389490,"version":"3.50.1"},"reference-count":22,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,3]]},"DOI":"10.1109\/icassp.2016.7472809","type":"proceedings-article","created":{"date-parts":[[2016,6,24]],"date-time":"2016-06-24T01:58:30Z","timestamp":1466733510000},"page":"5900-5904","source":"Crossref","is-referenced-by-count":60,"title":["Recurrent neural network training with dark knowledge transfer"],"prefix":"10.1109","author":[{"given":"Zhiyuan","family":"Tang","sequence":"first","affiliation":[]},{"given":"Dong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhiyong","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"735","article-title":"Deep learning via hessian-free optimization","author":"martens","year":"2010","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"key":"ref11","first-page":"1033","article-title":"Learning recurrent neural networks with hessian-free optimization","author":"martens","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref12","first-page":"1139","article-title":"On the importance of initialization and momentum in deep learning","author":"sutskever","year":"2013","journal-title":"Proceedings of the 30th International Conference on Machine Learning (ICML-13)"},{"key":"ref13","first-page":"2654","article-title":"Do deep nets really need to be deep?","author":"ba","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref14","article-title":"Distilling the knowledge in a neural network","author":"hinton","year":"2014","journal-title":"NIPS 2014 Deep Learning Workshop"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150464"},{"key":"ref16","article-title":"Learning small-size DNN with output-distribution-based criteria","author":"li","year":"2014","journal-title":"Conference of the International Speech Communication Association (Inter-Speech)"},{"key":"ref17","article-title":"Transferring knowledge from a RNN to a DNN","author":"chan","year":"2015","journal-title":"arXiv preprint arXiv 1504 01483"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","article-title":"Reducing the dimensionality of data with neural networks","volume":"313","author":"hinton","year":"2006","journal-title":"Science"},{"key":"ref19","first-page":"153","article-title":"Greedy layer-wise training of deep networks","volume":"19","author":"bengio","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref4","article-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling","author":"sak","year":"2014","journal-title":"Proceedings of the Annual Conference of International Speech Communication Association (INTERSPEECH)"},{"key":"ref3","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"Proceedings of the 31st International Conference on Machine Learning (ICML-14)"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/72.279181"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/323533a0"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1561\/2000000039","article-title":"Deep learning: Methods and applications","volume":"7","author":"deng","year":"2013","journal-title":"Foundations and Trends in Signal Processing"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1126\/science.1091277"},{"key":"ref20","article-title":"Fitnets: Hints for thin deep nets","author":"romero","year":"2014","journal-title":"arXiv preprint arXiv 1412 6550"},{"key":"ref22","article-title":"The kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"2011 IEEE Workshop on Automatic Speech Recognition &amp; Understanding"},{"key":"ref21","first-page":"625","article-title":"Why does unsupervised pretraining help deep learning?","volume":"11","author":"erhan","year":"2010","journal-title":"The Journal of Machine Learning Research"}],"event":{"name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Shanghai","start":{"date-parts":[[2016,3,20]]},"end":{"date-parts":[[2016,3,25]]}},"container-title":["2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7465907\/7471614\/07472809.pdf?arnumber=7472809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T17:00:10Z","timestamp":1498323610000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7472809\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/icassp.2016.7472809","relation":{},"subject":[],"published":{"date-parts":[[2016,3]]}}}