{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,3]],"date-time":"2026-01-03T15:17:03Z","timestamp":1767453423071,"version":"3.28.0"},"reference-count":36,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639693","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T23:36:34Z","timestamp":1550187394000},"page":"389-396","source":"Crossref","is-referenced-by-count":71,"title":["End-to-end Speech Recognition With Word-Based Rnn Language Models"],"prefix":"10.1109","author":[{"given":"Takaaki","family":"Hori","sequence":"first","affiliation":[]},{"given":"Jaejin","family":"Cho","sequence":"additional","affiliation":[]},{"given":"Shinji","family":"Watanabe","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"Towards better decoding and language model integration in sequence to sequence models","author":"chorowski","year":"2016","journal-title":"arXiv preprint arXiv 1612 02695"},{"key":"ref32","article-title":"On the difficulty of training recurrent neural networks","author":"pascanu","year":"2012","journal-title":"arXiv preprint arXiv 1211 5063"},{"key":"ref31","article-title":"Adadelta: an adaptive learning rate method","author":"zeiler","year":"2012","journal-title":"arXiv preprint arXiv 1212 5701"},{"key":"ref30","article-title":"Very deep convolutional networks for end-to-end speech recognition","author":"zhang","year":"2017","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4842-2766-4_12"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref34","article-title":"Chainer: a next-generation open source framework for deep learning","author":"tokui","year":"2015","journal-title":"Proceedings of Workshop on Machine Learning Systems (LearningSys) in NIPS"},{"key":"ref10","article-title":"An analysis of incorporating an external language model into a sequence-to-sequence model","author":"kannan","year":"2017","journal-title":"arXiv preprint arXiv 1712 01996"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268948"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607215"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.540308"},{"key":"ref14","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"arXiv preprint arXiv 1409 1556"},{"key":"ref15","article-title":"Supervised sequence labelling with recurrent neural networks","author":"graves","year":"2008","journal-title":"PhD thesis"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2001.0184"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.2200\/S00462ED1V01Y201212SAP010"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref28","volume":"ldc93s6a","author":"garofalo","year":"2007","journal-title":"CSR-I (WSJ0) Complete"},{"key":"ref3","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"International Conference on Machine Learning (ICML)"},{"volume":"ldc94s13a","journal-title":"CSR-II (WSJ1) Complete","year":"1994","key":"ref27"},{"key":"ref6","first-page":"4835","article-title":"Joint CTC-attention based end-to-end speech recognition using multitask learning","author":"kim","year":"2017","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP)"},{"key":"ref5","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"2015","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref8","article-title":"On using monolingual corpora in neural machine translation","author":"gulcehre","year":"2015","journal-title":"arXiv preprint arXiv 1503"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1048"},{"key":"ref2","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)"},{"key":"ref1","article-title":"Convolutional, Long Short-Term Memory, Fully Connected Deep Neural Networks","author":"sainath","year":"2015","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1296","article-title":"Advances in joint CTC-attention based end-to-end speech recognition with a deep CNN encoder and RNN-LM","author":"hori","year":"2017","journal-title":"InterSpeech"},{"key":"ref20","article-title":"Neural speech recognizer: Acoustic-to-word lstm model for large vocabulary speech recognition","author":"soltau","year":"2016","journal-title":"arXiv preprint arXiv 1610 09975"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268924"},{"key":"ref21","article-title":"Direct acoustics-to-word models for english conversational speech recognition","author":"audhkhasi","year":"2017","journal-title":"arXiv preprint arXiv 1703 06870"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1109\/ASRU.2017.8268935","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with rnn-transducer","author":"rao","year":"2017","journal-title":"IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)"},{"key":"ref23","article-title":"Google&#x2019;s neural machine translation system: Bridging the gap between human and machine translation","author":"wu","year":"2016","journal-title":"arXiv preprint arXiv 1609 09861"},{"key":"ref26","first-page":"23","article-title":"A new algorithm for data compression","volume":"12","author":"gage","year":"1994","journal-title":"C\/C++ Users J"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1616"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2018,12,18]]},"location":"Athens, Greece","end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639693.pdf?arnumber=8639693","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T04:11:15Z","timestamp":1643256675000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639693\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639693","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}