{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,11]],"date-time":"2025-06-11T04:13:59Z","timestamp":1749615239802,"version":"3.41.0"},"reference-count":35,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"10","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2016]]},"DOI":"10.1587\/transinf.2016slp0014","type":"journal-article","created":{"date-parts":[[2016,9,30]],"date-time":"2016-09-30T22:23:31Z","timestamp":1475274211000},"page":"2462-2470","source":"Crossref","is-referenced-by-count":3,"title":["N-gram Approximation of Latent Words Language Models for Domain Robust Automatic Speech Recognition"],"prefix":"10.1587","volume":"E99.D","author":[{"given":"Ryo","family":"MASUMURA","sequence":"first","affiliation":[{"name":"NTT Media Intelligence Laboratories, NTT Corporation"}]},{"given":"Taichi","family":"ASAMI","sequence":"additional","affiliation":[{"name":"NTT Media Intelligence Laboratories, NTT Corporation"}]},{"given":"Takanobu","family":"OBA","sequence":"additional","affiliation":[{"name":"NTT Media Intelligence Laboratories, NTT Corporation"}]},{"given":"Hirokazu","family":"MASATAKI","sequence":"additional","affiliation":[{"name":"NTT Media Intelligence Laboratories, NTT Corporation"}]},{"given":"Sumitaka","family":"SAKAUCHI","sequence":"additional","affiliation":[{"name":"NTT Media Intelligence Laboratories, NTT Corporation"}]},{"given":"Satoshi","family":"TAKAHASHI","sequence":"additional","affiliation":[{"name":"NTT Media Intelligence Laboratories, NTT Corporation"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] J.T. Goodman, \u201cA bit of progress in language modeling,\u201d Computer Speech &amp; Language, vol.15, pp.403-434, 2001.","DOI":"10.1006\/csla.2001.0174"},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] S.F. Chen and J. Goodman, \u201cAn empirical study of smoothing techniques for language modeling,\u201d Computer Speech &amp; Language, vol.13, pp.359-383, 1999.","DOI":"10.1006\/csla.1999.0128"},{"key":"3","doi-asserted-by":"crossref","unstructured":"[3] R. Kneser and H. Ney, \u201cImproved backing-off for m-gram language modeling,\u201d Proc. ICASSP, vol.1, pp.181-184, 1995.","DOI":"10.1109\/ICASSP.1995.479394"},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] Y.W. Teh, \u201cA hierarchical Bayesian language model based on Pitman-Yor processes,\u201d Proc. COLING\/ACL, pp.985-992, 2006.","DOI":"10.3115\/1220175.1220299"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] S. Huang and M. Yor, \u201cHierarchical Pitman-Yor language models for ASR in meetings,\u201d Proc. ASRU, pp.124-129, 2007.","DOI":"10.1109\/ASRU.2007.4430096"},{"key":"6","unstructured":"[6] P.F. Brown, P.V. deSouza, R.L. Mercer, V.J.D. Pietra, and J.C. Lai, \u201cClass-based n-gram models of natural language,\u201d Computational Linguistics, vol.18, pp.467-479, 1992."},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] G. Potamianos and F. Jelinek, \u201cA study of n-gram and decision tree letter language modeling methods,\u201d Speech Communication, vol.24, no.3, pp.171-192, 1998.","DOI":"10.1016\/S0167-6393(98)00018-1"},{"key":"8","unstructured":"[8] P. Xu and F. Jelinek, \u201cRandom forests in language modeling,\u201d Proc. EMNLP, pp.325-332, 2004."},{"key":"9","unstructured":"[9] Y. Bengio, R. Ducharme, P. Vincent, and C. Jauvin, \u201cA neural prob-abilistic language model,\u201d J. Mach. Learn. Res., vol.3, pp.1137-1155, 2003."},{"key":"10","unstructured":"[10] T. Mikolov, M. Karafiat, L. Burget, J. Cernocky, and S. Khudanpur, \u201cRecurrent neural network based language model,\u201d Proc. INTERSPEECH, pp.1045-1048, 2010."},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] T. Mikolov, S. Kombrink, L. Burget, J. Cernocky, and S. Khudanpur, \u201cExtensions of recurrent neural network language model,\u201d Proc. ICASSP, pp.5528-5531, 2011.","DOI":"10.1109\/ICASSP.2011.5947611"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] K. Deschacht, J.D. Belder, and M.-F. Moens, \u201cThe latent words language model,\u201d Computer Speech &amp; Language, vol.26, pp.384-409, 2012.","DOI":"10.1016\/j.csl.2012.04.001"},{"key":"13","unstructured":"[13] S. Goldwater and T. Griffiths, \u201cA fully Bayesian approach to unsupervised part-of-speech tagging,\u201d Proc. ACL, pp.744-751, 2007."},{"key":"14","unstructured":"[14] P. Blunsom and T. Cohn, \u201cA hierarchical Pitman-Yor process HMM for unsupervised part of speech induction,\u201d Proc. ACL, pp.865-874, 2011."},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] Y. Su, \u201cBayesian class-based language models,\u201d Proc. ICASSP, pp.5564-5567, 2011.","DOI":"10.1109\/ICASSP.2011.5947620"},{"key":"16","doi-asserted-by":"crossref","unstructured":"[16] J.-T. Chien and C.H. Chueh, \u201cDirichlet class language models for speech recognition,\u201d IEEE Transactions on Audio, Speech and Language Processing, vol.19, pp.482-495, 2011.","DOI":"10.1109\/TASL.2010.2050717"},{"key":"17","doi-asserted-by":"crossref","unstructured":"[17] W. Wang, A. Stolcke, and M.P. Harper, \u201cThe use of a linguistically motivated language model in conversational speech recognition,\u201d Proc. ICASSP, pp.261-264, 2004.","DOI":"10.1109\/ICASSP.2004.1325972"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] R. Wang, M. Utiyama, I. Goto, E. Sumita, H. Zhao, and B.L. Lu, \u201cConverting continuous-space language models into n-gram language models for statistical machine translation,\u201d Proc. EMNLP, pp.845-850, 2013.","DOI":"10.18653\/v1\/D13-1082"},{"key":"19","doi-asserted-by":"crossref","unstructured":"[19] E. Arisoy, S.F. Chen, B. Ramabhadran, and A. Sethy, \u201cConverting neural network language models into back-off language models for efficient decoding in automatic speech recognition,\u201d Proc. ICASSP, pp.8242-8246, 2013.","DOI":"10.1109\/ICASSP.2013.6639272"},{"key":"20","unstructured":"[20] E. Arisoy, S.F. Chen, B. Ramabhadran, and A. Sethy, \u201cConverting neural network language models into back-off language models for efficient decoding in automatic speech recognition,\u201d IEEE\/ACM Trans. Audio, Speech, Language Process., vol.22, pp.2329-9290, 2014."},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] A. Deoras, T. Mikolov, S. Kombrink, M. Karafiat, and S. Khudanpur, \u201cVariational approximation of long-span language models in LVCSR,\u201d Proc. ICASSP, pp.5532-5535, 2011.","DOI":"10.1109\/ICASSP.2011.5947612"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] A. Deoras, T. Mikolov, S. Kombrink, and K. Church, \u201cApproximate inference: A sampling based modeling technique to capture complex dependencies in a language model,\u201d Speech Communication, vol.55, no.1, pp.162-177, 2013.","DOI":"10.1016\/j.specom.2012.08.004"},{"key":"23","doi-asserted-by":"crossref","unstructured":"[23] H. Adel, K. Kirchhoff, N.T. Vu, D. Telaar, and T. Schultz, \u201cComparing approaches to convert recurrent neural networks into backoff language models for efficient decoding,\u201d Proc. INTERSPEECH, pp.651-655, 2014.","DOI":"10.21437\/Interspeech.2014-165"},{"key":"24","doi-asserted-by":"crossref","unstructured":"[24] R. Masumura, H. Masataki, T. Oba, O. Yoshioka, and S. Takahashi, \u201cUse of latent words language models in ASR: a sampling-based implementation,\u201d Proc. ICASSP, pp.8445-8449, 2013.","DOI":"10.1109\/ICASSP.2013.6639313"},{"key":"25","doi-asserted-by":"crossref","unstructured":"[25] A. Emami and F. Jelinek, \u201cRandom clusterings for language modeling,\u201d Proc. ICASSP, vol.1, pp.581-584, 2005.","DOI":"10.1109\/ICASSP.2005.1415180"},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] Y.W. Teh, M.I. Jordan, M.J. Beal, and D.M. Blei, \u201cHierarchical Dirichlet processes,\u201d Journal of the American Statistical Association, vol.101, no.476, pp.1566-1581, 2006.","DOI":"10.1198\/016214506000000302"},{"key":"27","doi-asserted-by":"crossref","unstructured":"[27] D.J.C. MacKay and L.C. Peto, \u201cA hierarchical Dirichlet language model,\u201d Natural Language Engineering, vol.1, pp.289-308, 1994.","DOI":"10.1017\/S1351324900000218"},{"key":"28","doi-asserted-by":"crossref","unstructured":"[28] A. Stolcke, \u201cSRILM-an extensible language modeling toolkit,\u201d Proc. ICSLP, vol.2, pp.901-904, 2002.","DOI":"10.21437\/ICSLP.2002-303"},{"key":"29","unstructured":"[29] A. Stolcke, \u201cEntropy-based pruning of backoff language models,\u201d Proc. DARPA Broadcast News Transcription and Understanding Workshop, pp.270-274, 1998."},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] M.P. Marcus, M.A. Marcinkiewicz, and B. Santorini, \u201cBuilding a large annotated corpus of English: The penn treebank,\u201d Computational Linguistics, vol.19, pp.313-330, 1993.","DOI":"10.21236\/ADA273556"},{"key":"31","unstructured":"[31] K. Maekawa, H. Koiso, S. Furui, and H. Isahara, \u201cSpontaneous speech corpus of Japanese,\u201d Proc. LREC, pp.947-952, 2000."},{"key":"32","doi-asserted-by":"crossref","unstructured":"[32] G. Hinton, L. Deng, D. Yu, G. Dahl, A.-R. Mohamed, N. Jaitly, A. Senior, V. Vanhoucke, P. Nguyen, T. Sainath, and B. Kingsbury, \u201cDeep neural networks for acoustic modeling in speech recognition,\u201d IEEE Signal Process. Mag., vol.29, no.6, pp.82-97, 2012.","DOI":"10.1109\/MSP.2012.2205597"},{"key":"33","doi-asserted-by":"crossref","unstructured":"[33] T. Hori, C. Hori, Y. Minami, and A. Nakamura, \u201cEfficient WFST-based one-pass decoding with on-the-fly hypothesis rescoring in extremely large vocabulary continuous speech recognition,\u201d IEEE transactions on Audio, Speech and Language Processing, vol.15, no.4, pp.1352-1365, 2007.","DOI":"10.1109\/TASL.2006.889790"},{"key":"34","unstructured":"[34] H. Masataki, D. Shibata, Y. Nakazawa, S. Kobashikawa, A. Ogawa, and K. Ohtsuki, \u201cVoiceRex spontaneous speech recognition technology for contact-center conversations,\u201d NTT Technical Review, vol.5, no.1, pp.22-27, 2007."},{"key":"35","doi-asserted-by":"crossref","unstructured":"[35] T. Fuchi and S. Takagi, \u201cJapanese morphological analyzer using word co-occurrence: JTAG,\u201d Proc. COLING\/ACL, pp.409-413, 1998.","DOI":"10.3115\/980845.980915"}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E99.D\/10\/E99.D_2016SLP0014\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T22:12:55Z","timestamp":1749593575000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transinf\/E99.D\/10\/E99.D_2016SLP0014\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"references-count":35,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2016]]}},"URL":"https:\/\/doi.org\/10.1587\/transinf.2016slp0014","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"type":"print","value":"0916-8532"},{"type":"electronic","value":"1745-1361"}],"subject":[],"published":{"date-parts":[[2016]]}}}