{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:29:26Z","timestamp":1775230166113,"version":"3.50.1"},"reference-count":27,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639034","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T23:36:34Z","timestamp":1550187394000},"page":"418-425","source":"Crossref","is-referenced-by-count":114,"title":["Deep Context: End-to-end Contextual Speech Recognition"],"prefix":"10.1109","author":[{"given":"Golan","family":"Pundak","sequence":"first","affiliation":[]},{"given":"Tara N.","family":"Sainath","sequence":"additional","affiliation":[]},{"given":"Rohit","family":"Prabhavalkar","sequence":"additional","affiliation":[]},{"given":"Anjuli","family":"Kannan","sequence":"additional","affiliation":[]},{"given":"Ding","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Exploring architectures, data and units for streaming end-to-end speech recognition with rnn-transducer","author":"rao","year":"2017","journal-title":"Proc of ASRU"},{"key":"ref11","article-title":"Listen, attend and spell","author":"chan","year":"2015","journal-title":"CoRR"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462682"},{"key":"ref15","article-title":"Cold fusion: Training seq2seq models together with language models","author":"sriram","year":"2017","journal-title":"CoRR"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2416"},{"key":"ref17","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"Proceedings of ICL"},{"key":"ref18","article-title":"Streaming Small-footprint Keyword Spotting Using Sequence-to-Sequence Models","author":"he","year":"2017","journal-title":"Proc ASRU"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178957"},{"key":"ref27","article-title":"Parallel wavenet: Fast high-fidelity speech synthesis","author":"van den oord","year":"2017","journal-title":"arXiv preprint arXiv 1711 11585"},{"key":"ref3","first-page":"253","article-title":"Voice search language model adaptation using contextual information","author":"scheiner","year":"2016","journal-title":"Proc SLT"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461809"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6289079"},{"key":"ref8","article-title":"Neural speech recognizer: Acoustic-to-word lstm model for large vocabulary speech recognition","author":"soltau","year":"2016","journal-title":"CoRR"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref2","article-title":"Bringing contextual information to google speech recognition","author":"aleksic","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref9","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"Proceedings of the 29th International Conference on Machine Learning (ICML 2012)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472820"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2015-340","article-title":"Composition-based on-the-fly rescoring for salient n-gram biasing","author":"hall","year":"2015","journal-title":"Proceedings of Interspeech 2015"},{"key":"ref21","article-title":"Attention Is All You Need","author":"vaswani","year":"2017","journal-title":"CoRR"},{"key":"ref24","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"NIPS"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2001.0184"},{"key":"ref26","author":"abadi","year":"2015","journal-title":"Tensorflow Large-scale machine learning on heterogeneous distributed systems"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1510","article-title":"Generation of large-scale simulated utterances in virtual rooms to train deep-neural networks for far-field speech recognition in google home","author":"kim","year":"2017","journal-title":"Proc of Interspeech"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","location":"Athens, Greece","start":{"date-parts":[[2018,12,18]]},"end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639034.pdf?arnumber=8639034","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T05:54:11Z","timestamp":1643262851000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639034\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639034","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}