{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T23:54:27Z","timestamp":1778716467132,"version":"3.51.4"},"reference-count":24,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.1109\/iscslp.2018.8706675","type":"proceedings-article","created":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T22:58:35Z","timestamp":1558393115000},"page":"146-150","source":"Crossref","is-referenced-by-count":27,"title":["Hybrid CTC-Attention based End-to-End Speech Recognition using Subword Units"],"prefix":"10.1109","author":[{"given":"Zhangyu","family":"Xiao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhijian","family":"Ou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Chu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1038"},{"key":"ref11","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","author":"amodei","year":"2016","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1109\/ASRU.2017.8268935","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with rnn-transducer","author":"rao","year":"2017","journal-title":"IEEE Autom Speech Recognition Understanding Workshop (ASRU)"},{"key":"ref13","article-title":"Subword and crossword units for ctc acoustic models","author":"zenkel","year":"2017","journal-title":"arXiv preprint arXiv 1712 06855"},{"key":"ref14","article-title":"Neural speech recognizer: Acoustic-to-word lstm model for large vocabulary speech recognition","author":"soltau","year":"2016","journal-title":"arXiv preprint arXiv 1610 09975"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref16","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016","journal-title":"arXiv preprint arXiv 1609 09861"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6289079"},{"key":"ref18","article-title":"Gram-ctc: Automatic unit selection and target decomposition for sequence labelling","author":"liu","year":"2017","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref19","article-title":"Latent sequence decompositions","author":"chan","year":"2016","journal-title":"arXiv preprint arXiv 1610 01292"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref6","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"ref8","article-title":"Wav2letter: an end-to-end convnet-based speech recognition system","author":"collobert","year":"2016","journal-title":"arXiv preprint arXiv 1609 03193"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/5.18626"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref21","article-title":"Chainer: a next-generation open source framework for deep learning","author":"tokui","year":"2015","journal-title":"Workshop on Machine Learning Systems"},{"key":"ref24","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref23","article-title":"The kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"IEEE Autom Speech Recognition Understanding Workshop (ASRU)"}],"event":{"name":"2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)","location":"Taipei City, Taiwan","start":{"date-parts":[[2018,11,26]]},"end":{"date-parts":[[2018,11,29]]}},"container-title":["2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8701133\/8706262\/08706675.pdf?arnumber=8706675","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,23]],"date-time":"2020-08-23T23:11:10Z","timestamp":1598224270000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8706675\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/iscslp.2018.8706675","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}