{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T23:45:39Z","timestamp":1771458339497,"version":"3.50.1"},"reference-count":32,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1109\/icassp.2018.8461558","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T22:24:48Z","timestamp":1537568688000},"page":"4769-4773","source":"Crossref","is-referenced-by-count":33,"title":["Advancing Connectionist Temporal Classification with Attention Modeling"],"prefix":"10.1109","author":[{"given":"Amit","family":"Das","sequence":"first","affiliation":[]},{"given":"Jinyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Rui","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Yifan","family":"Gong","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref32","article-title":"Advancing Acoustic-to-Word CTC Model","author":"li","year":"2018","journal-title":"Proc ICASSP"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref30","first-page":"1045","article-title":"Recurrent Neural Networks Based Language Model","author":"mikolov","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref10","author":"sainath","year":"2017","journal-title":"Improving the Performance of Online Neural Transducer Models"},{"key":"ref11","first-page":"369","article-title":"Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with Recurrent Neural Networks","author":"graves","year":"2006","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref12","first-page":"1764","article-title":"Towards End-to-End Speech Recognition with Recurrent Neural Networks","author":"graves","year":"2014","journal-title":"Machine Learning Research"},{"key":"ref13","article-title":"Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation","author":"cho","year":"2014","journal-title":"Proc Empirical Methods in Natural Language Processing"},{"key":"ref14","article-title":"Neural Machine Translation by Jointly Learning to Align and Translate","author":"bahdanau","year":"2015","journal-title":"ICLRE"},{"key":"ref15","article-title":"End-to-End Attention-Based Large Vocabulary Speech Recognition","volume":"abs 1508 4395","author":"bahdanau","year":"2015","journal-title":"CoRR"},{"key":"ref16","article-title":"Attention- Based Models for Speech Recognition","author":"chorowski","year":"2015","journal-title":"Conf on Neural Information Processing Systems"},{"key":"ref17","article-title":"Sequence Transduction with Recurrent Neural Networks","volume":"abs 1211 3711","author":"graves","year":"2012","journal-title":"CoRR"},{"key":"ref18","author":"soltau","year":"2016","journal-title":"Neural speech recognizer Acoustic-to-word lstm model for large vocabulary speech recognition"},{"key":"ref19","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with RNN-transducer","author":"rao","year":"2017","journal-title":"Proc ASRU"},{"key":"ref28","author":"hori","year":"2017","journal-title":"Advances in joint CTC-attention based end-to-end speech recognition with a deep CNN encoder and RNN-LM"},{"key":"ref4","article-title":"Listen, Attend and Spell","volume":"abs 1508 1211","author":"chan","year":"2015","journal-title":"CoRR"},{"key":"ref27","first-page":"4835","article-title":"Joint CTC-Attention Based End-to-End Speech Recognition Using Multi-Task Learning","author":"kim","year":"2017","journal-title":"Proc ICASSP"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref6","author":"battenberg","year":"2017","journal-title":"Exploring neural transducers for end-to-end speech recognition"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"3532","DOI":"10.21437\/Interspeech.2017-1118","article-title":"Multi-task Learning with Low-Level Auxiliary Tasks for Encoder-Decoder Based Speech Recognition","author":"toshniwal","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"939","DOI":"10.21437\/Interspeech.2017-233","article-title":"A Comparison of Sequence-to-Sequence Models for Speech Recognition","author":"prabhavalkar","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref8","article-title":"Towards Discriminatively-trained HMM - based End-to-end models for Automatic Speech Recognition","author":"hadian","year":"2018","journal-title":"Submitted to ICASSP"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1705","article-title":"Recurrent neural aligner: An encoder-decoder neural network model for sequence to sequence mapping","author":"sak","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178778"},{"key":"ref9","article-title":"State-of-the-art speech recognition with sequence-to-sequence models","author":"chung-cheng","year":"2018","journal-title":"Submitted to ICASSP"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2017.7510508"},{"key":"ref20","article-title":"Deep Speech: Scaling up End-to-End Speech Recognition","volume":"abs 1412 5567","author":"hannun","year":"2014","journal-title":"CoRR"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-71"},{"key":"ref21","article-title":"Fast and Accurate Recurrent Neural Network Acoustic Models for Speech Recognition","author":"sak","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref24","author":"liu","year":"2017","journal-title":"Gram-CTC Automatic unit selection and target decomposition for sequence labelling"},{"key":"ref23","first-page":"4805","article-title":"Advances in All-Neural Speech Recognition","author":"zweig","year":"2017","journal-title":"Proc ICASSP"},{"key":"ref26","article-title":"Acoustic-to-Word Model Without OOV","author":"li","year":"2017","journal-title":"Proc ASRU IEEE"},{"key":"ref25","author":"audhkhasi","year":"2017","journal-title":"Direct acoustics-to-word models for English conversational speech recognition"}],"event":{"name":"ICASSP 2018 - 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Calgary, AB","start":{"date-parts":[[2018,4,15]]},"end":{"date-parts":[[2018,4,20]]}},"container-title":["2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8450881\/8461260\/08461558.pdf?arnumber=8461558","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,23]],"date-time":"2020-08-23T22:37:10Z","timestamp":1598222230000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8461558\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/icassp.2018.8461558","relation":{},"subject":[],"published":{"date-parts":[[2018,4]]}}}