{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T12:16:14Z","timestamp":1766578574889,"version":"3.28.0"},"reference-count":23,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1109\/icassp.2018.8462492","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T22:24:48Z","timestamp":1537568688000},"page":"4764-4768","source":"Crossref","is-referenced-by-count":54,"title":["Attention-Based End-to-End Speech Recognition on Voice Search"],"prefix":"10.1109","author":[{"given":"Changhao","family":"Shan","sequence":"first","affiliation":[]},{"given":"Junbo","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yujun","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","author":"glorot","year":"2010","journal-title":"Proc of the International Conference on Artificial Intelligence and Statistics"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/168304.168306"},{"journal-title":"Advances in joint CTC-attention based end-to-end speech recognition with a deep CNN encoder and RNN-LM","year":"2017","author":"hori","key":"ref14"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"1424","DOI":"10.1109\/72.548170","article-title":"An analysis of noise in recurrent neural networks: convergence and generalization","volume":"7","author":"jim","year":"1996","journal-title":"IEEE Transactions on Neural Networks"},{"journal-title":"Adam A method for stochastic optimization","year":"2014","author":"kingma","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472084"},{"key":"ref19","article-title":"A comparison of sequence-to-sequence models for speech recognition","author":"prabhavalkar","year":"2017","journal-title":"InterSpeech"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"journal-title":"End-to-end continuous speech recognition using attention-based recurrent nn First results","year":"2014","author":"chorowski","key":"ref6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-334"},{"key":"ref8","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"2015","journal-title":"Advances in neural information processing systems"},{"journal-title":"Towards better decoding and language model integration in sequence to sequence models","year":"2016","author":"chorowski","key":"ref7"},{"journal-title":"Neural machine translation by jointly learning to align and translate","year":"2014","author":"bahdanau","key":"ref2"},{"key":"ref1","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in English and Mandarin","author":"amodei","year":"2016","journal-title":"ICML"},{"key":"ref9","first-page":"8604","article-title":"Recent advances in deep learning for speech research at Microsoft","author":"deng","year":"2013","journal-title":"ICASSP IEEE"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1044"},{"key":"ref22","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"ICML"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2014.09.003"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953077"}],"event":{"name":"ICASSP 2018 - 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2018,4,15]]},"location":"Calgary, AB","end":{"date-parts":[[2018,4,20]]}},"container-title":["2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8450881\/8461260\/08462492.pdf?arnumber=8462492","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T02:19:22Z","timestamp":1598235562000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8462492\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/icassp.2018.8462492","relation":{},"subject":[],"published":{"date-parts":[[2018,4]]}}}