{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:25:01Z","timestamp":1775229901751,"version":"3.50.1"},"reference-count":36,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1109\/icassp.2018.8461809","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T22:24:48Z","timestamp":1537568688000},"page":"4839-4843","source":"Crossref","is-referenced-by-count":80,"title":["Minimum Word Error Rate Training for Attention-Based Sequence-to-Sequence Models"],"prefix":"10.1109","author":[{"given":"Rohit","family":"Prabhavalkar","sequence":"first","affiliation":[]},{"given":"Tara N.","family":"Sainath","sequence":"additional","affiliation":[]},{"given":"Yonghui","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Patrick","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Zhifeng","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Chung-Cheng","family":"Chiu","sequence":"additional","affiliation":[]},{"given":"Anjuli","family":"Kannan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"Hogwild: A lock-free approach to parallelizing stochastic gradient descent","author":"recht","year":"2011","journal-title":"Proc of NIPS"},{"key":"ref32","author":"abadi","year":"2015","journal-title":"Tensorflow Large-scale machine learning on heterogeneous distributed systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref36","article-title":"State-of-the-art speech recognition with sequence-to-sequence models","author":"chiu","year":"2018","journal-title":"Proceedings of ICASSP (accepted)"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-275"},{"key":"ref34","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proceedings of ICL"},{"key":"ref10","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with RNN-transducer","author":"rao","year":"2017","journal-title":"Proc of ASRU"},{"key":"ref11","article-title":"A comparison of sequence-to-sequence models for speech recognition","author":"prabhavalkar","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref12","article-title":"BLEU: A method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proc of ACL"},{"key":"ref13","article-title":"Lattice-based optimization of sequence classification criteria for neural-network acoustic modeling","author":"kingsbury","year":"2009","journal-title":"Proc of ICASSP"},{"key":"ref14","article-title":"Sequence-discriminative training of deep neural networks","author":"vesel\u00fd","year":"2013","journal-title":"Proc of Interspeech"},{"key":"ref15","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"Proc of ICML"},{"key":"ref16","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-639","article-title":"Optimizing expected word error rate via sampling for speech recognition","author":"shannon","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref17","article-title":"Sequence level training with recurrent neural networks","author":"ranzato","year":"2016","journal-title":"Proceedings of ICL"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"williams","year":"1992","journal-title":"Machine Learning"},{"key":"ref19","article-title":"An actor-critic algorithm for structured prediction","author":"bahadanau","year":"2017","journal-title":"Proceedings of ICL"},{"key":"ref28","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1510","article-title":"Generation of large-scale simulated utterances in virtual rooms to train deep-neural networks for far-field speech recognition in google home","author":"kim","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref4","article-title":"Listen, attend and spell: A neural network for large vocabulary conversational speech recognition","author":"chan","year":"2016","journal-title":"Proc of ICASSP"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-232","article-title":"An analysis of &#x201C;attention&#x201D; in sequence-to-sequence models","author":"prabhavalkar","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref3","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1705","article-title":"Recurrent neural aligner: An encoder-decoder neural network model for sequence to sequence mapping","author":"sak","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref29","article-title":"Fast and accurate recurrent neural network acoustic models for speech recognition","author":"sak","year":"2015","journal-title":"Proc of Interspeech"},{"key":"ref5","article-title":"End-to-end attention-based large vocabulary speech recognition","author":"bahdanau","year":"2016","journal-title":"Proc of ICASSP"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1566","article-title":"Neural speech recognizer: Acoustic-to-word LSTM model for large vocabulary speech recognition","author":"soltau","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref7","article-title":"Advances in all-neural speech recognition","author":"zweig","year":"2017","journal-title":"Proc of ICASSP"},{"key":"ref2","article-title":"Speech recognition with deep neural networks","author":"graves","year":"2013","journal-title":"Proc of ICASSP"},{"key":"ref9","article-title":"Japanese and korean voice search","author":"schuster","year":"2012","journal-title":"Proc of ICASSP"},{"key":"ref1","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"Proc ICML Workshop on Representation Learning"},{"key":"ref20","author":"povey","year":"2003","journal-title":"Discriminative training for large vocabulary speech recognition"},{"key":"ref22","article-title":"Acoustic modelling with CD-CTC-SMBR LSTM RNNs","author":"senior","year":"2015","journal-title":"Proc of ASRU"},{"key":"ref21","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Proc of NIPS"},{"key":"ref24","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc of NIPS"},{"key":"ref23","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"Proceedings of ICL"},{"key":"ref26","article-title":"Error back propagation for sequence training of context-dependent deep networks for conversational speech transcription","author":"su","year":"2013","journal-title":"Proc of ICASSP"},{"key":"ref25","article-title":"Scheduled sampling for sequence prediction with recurrent neural networks","author":"bengio","year":"2015","journal-title":"Proc of NIPS"}],"event":{"name":"ICASSP 2018 - 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Calgary, AB","start":{"date-parts":[[2018,4,15]]},"end":{"date-parts":[[2018,4,20]]}},"container-title":["2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8450881\/8461260\/08461809.pdf?arnumber=8461809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,23]],"date-time":"2020-08-23T22:31:21Z","timestamp":1598221881000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8461809\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icassp.2018.8461809","relation":{},"subject":[],"published":{"date-parts":[[2018,4]]}}}