{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T22:22:04Z","timestamp":1773872524094,"version":"3.50.1"},"reference-count":48,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,3]]},"DOI":"10.1109\/icassp.2017.7952599","type":"proceedings-article","created":{"date-parts":[[2017,6,20]],"date-time":"2017-06-20T21:35:36Z","timestamp":1497994536000},"page":"2462-2466","source":"Crossref","is-referenced-by-count":89,"title":["A comprehensive study of deep bidirectional LSTM RNNS for acoustic modeling in speech recognition"],"prefix":"10.1109","author":[{"given":"Albert","family":"Zeyer","sequence":"first","affiliation":[]},{"given":"Patrick","family":"Doetsch","sequence":"additional","affiliation":[]},{"given":"Paul","family":"Voigtlaender","sequence":"additional","affiliation":[]},{"given":"Ralf","family":"Schluter","sequence":"additional","affiliation":[]},{"given":"Hermann","family":"Ney","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","author":"neelakantan","year":"2015","journal-title":"Adding gradient noise improves learning for very deep networks"},{"key":"ref38","article-title":"Incorporating Nesterov momentum into Adam","author":"dozat","year":"2015","journal-title":"Tech Rep"},{"key":"ref33","first-page":"1139","article-title":"On the importance of initialization and momentum in deep learning","author":"sutskever","year":"2013","journal-title":"Proceedings of the 30th International Conference on Machine Learning (ICML-13)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/0041-5553(64)90137-5"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-759"},{"key":"ref37","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"duchi","year":"2011","journal-title":"The Journal of Machine Learning Research"},{"key":"ref36","author":"zeiler","year":"2012","journal-title":"ADADELTA An Adaptive Learning Rate Method"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853582"},{"key":"ref34","first-page":"372","article-title":"A method of solving a convex programming problem with convergence rate $O$ (1\/$k$2)","volume":"27","author":"nesterov","year":"1983","journal-title":"Soviet Mathematics Doklady"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178826"},{"key":"ref40","article-title":"Lecture 6.5 - RMSprop: Divide the gradient by a running average of its recent magnitude","author":"tieleman","year":"2012","journal-title":"COURSERA Neural Networks for Machine Learning"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178842"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"ref13","author":"chan","year":"2015","journal-title":"Deep recurrent neural networks for acoustic modelling"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178839"},{"key":"ref15","author":"zhang","year":"2015","journal-title":"Highway long short-term memory RNNs for distant speech recognition"},{"key":"ref16","volume":"abs 1412 3555","author":"chung","year":"2014","journal-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling"},{"key":"ref17","first-page":"2342","article-title":"An empirical exploration of recurrent network architectures","author":"jozefowicz","year":"2015","journal-title":"Proceedings of the 32nd International Conference on Machine Learning (ICML-15)"},{"key":"ref18","author":"greff","year":"2015","journal-title":"LSTM A Search Space Odyssey"},{"key":"ref19","author":"breuel","year":"2015","journal-title":"Benchmarking of LSTM networks"},{"key":"ref28","first-page":"2368","article-title":"Training very deep networks","author":"srivastava","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"631","DOI":"10.21437\/Interspeech.2014-151","article-title":"Robust speech recognition using long short-term memory recurrent neural networks for hybrid acoustic modelling","author":"geiger","year":"2014","journal-title":"InterSpeech"},{"key":"ref27","author":"kingma","year":"2014","journal-title":"Adam A method for stochastic optimization"},{"key":"ref3","author":"sak","year":"2014","journal-title":"Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition"},{"key":"ref6","article-title":"RETURNN: The RWTH extensible training framework for universal recurrent neural networks","author":"doetsch","year":"2016","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP) 2016"},{"key":"ref29","author":"he","year":"2015","journal-title":"Deep residuallearning for image recognition"},{"key":"ref5","year":"2016","journal-title":"GitHub repository with config files for LSTM experiments in RETURNN"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/72.279192"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404850"},{"key":"ref20","first-page":"115","article-title":"Learning precise timing with LSTM recurrent networks","volume":"3","author":"gers","year":"2003","journal-title":"The Journal of Machine Learning Research"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163899"},{"key":"ref48","first-page":"1260","article-title":"Multilingual features based keyword search for very low-resource languages","author":"golik","year":"2015","journal-title":"InterSpeech"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854207"},{"key":"ref47","author":"danihelka","year":"2016","journal-title":"Associative long short-term memory"},{"key":"ref21","article-title":"RASR - the RWTH Aachen university open source speech recognition toolkit","author":"rybach","year":"2011","journal-title":"IEEE Automatic Speech Recognition and Understanding Workshop"},{"key":"ref42","author":"gulcehre","year":"2014","journal-title":"Adasecant robust adaptive secant method for stochastic gradient"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"1517","DOI":"10.21437\/Interspeech.2010-443","article-title":"The RWTH 2009 Quaero ASR evaluation system for English and German","author":"nu\u00dfbaum-thom","year":"2010","journal-title":"InterSpeech"},{"key":"ref41","author":"funk","year":"0","journal-title":"RMSprop loses to SMORMS3 - beware the epsilon!"},{"key":"ref23","article-title":"Theano: new features and speed improvements","author":"bastien","year":"2012","journal-title":"NIPS 2012 Workshop on Deep Learning and Unsupervised Feature Learning"},{"key":"ref44","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","author":"glorot","year":"2010","journal-title":"International Conference on Artificial Intelligence and Statistics"},{"key":"ref26","first-page":"iv-649","article-title":"Gammatone features and feature combination for large vocabulary speech recognition","volume":"4","author":"bezrukov","year":"2007","journal-title":"Acoustics Speech and Signal Processing 2007 ICASSP 2007 IEEE International Conference on"},{"key":"ref43","author":"hinton","year":"2012","journal-title":"Improving Neural Networks by Preventing Co-adaptation of Feature Detectors"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3210-1"}],"event":{"name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"New Orleans, LA","start":{"date-parts":[[2017,3,5]]},"end":{"date-parts":[[2017,3,9]]}},"container-title":["2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7943262\/7951776\/07952599.pdf?arnumber=7952599","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T21:00:43Z","timestamp":1750366843000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7952599\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,3]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/icassp.2017.7952599","relation":{},"subject":[],"published":{"date-parts":[[2017,3]]}}}