{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T16:39:55Z","timestamp":1774975195283,"version":"3.50.1"},"reference-count":53,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2015,3,1]],"date-time":"2015-03-01T00:00:00Z","timestamp":1425168000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2015,3]]},"DOI":"10.1109\/taslp.2015.2400218","type":"journal-article","created":{"date-parts":[[2015,2,26]],"date-time":"2015-02-26T19:49:15Z","timestamp":1424980155000},"page":"517-529","source":"Crossref","is-referenced-by-count":401,"title":["From Feedforward to Recurrent LSTM Neural Networks for Language Modeling"],"prefix":"10.1109","volume":"23","author":[{"given":"Martin","family":"Sundermeyer","sequence":"first","affiliation":[]},{"given":"Hermann","family":"Ney","sequence":"additional","affiliation":[]},{"given":"Ralf","family":"Schluter","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1049\/cp:19991218"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref33","first-page":"221","article-title":"Forming word classes by statistical clustering for statistical language modelling","author":"kneser","year":"1991","journal-title":"Proc QUALICO"},{"key":"ref32","first-page":"246","article-title":"Hierarchical probabilistic neural network language model","author":"morin","year":"2005","journal-title":"Proc 10th Int Workshop Artif Intell Statist"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.940893"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1093\/oso\/9780198538493.003.0003","author":"bishop","year":"1995","journal-title":"Neural Networks for Pattern Recognition"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/72.279181"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(97)00062-9"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639271"},{"key":"ref34","first-page":"467","article-title":"Class-based <formula formulatype=\"inline\"><tex Notation=\"TeX\">$n$<\/tex><\/formula>-gram models of natural language","volume":"18","author":"brown","year":"1992","journal-title":"Comput Linguist"},{"key":"ref28","first-page":"6404","article-title":"Cache based recurrent neural network language model inference for first pass speech recognition","author":"huang","year":"2015","journal-title":"Proc ICASSP"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2000.0152"},{"key":"ref29","first-page":"6414","article-title":"Real-time one-pass decoding with recurrent neural network language model for speech recognition","author":"hori","year":"2015","journal-title":"Proc ICASSP"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1987.1165125"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/5.880083"},{"key":"ref20","first-page":"3419","article-title":"Prefix tree based N-best list re-scoring for recurrent neural network language model used in speech recognition system","author":"si","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref22","first-page":"1116","article-title":"A fast re-scoring strategy to capture long-distance dependencies","author":"deoras","year":"2011","journal-title":"Proc EML"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"1567","DOI":"10.21437\/Eurospeech.1999-397x","article-title":"Recognition performance of a structured language model","volume":"4","author":"chelba","year":"1999","journal-title":"Proc EUROSPEECH"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854535"},{"key":"ref23","first-page":"1044","article-title":"Joint language and translation modeling with recurrent neural networks","author":"auli","year":"2013","journal-title":"Proc EMNLP"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2012.06.004"},{"key":"ref25","first-page":"661","article-title":"Lattice decoding and rescoring with long-span neural network language models","author":"sundermeyer","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref50","first-page":"2222","article-title":"Multilingual hierarchical MRASTA features for ASR","author":"t\ufffdske","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.862024"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(01)00041-3"},{"key":"ref52","article-title":"RASR?the RWTH Aachen University open source speech recognition toolkit","author":"rybach","year":"2011","journal-title":"Proc ASRU"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-65","article-title":"LSTM neural networks for language modeling","author":"sundermeyer","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref40","first-page":"115","article-title":"Learning precise timing with LSTM recurrent networks","volume":"3","author":"gers","year":"2002","journal-title":"J Mach Learn Res"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/79.790984"},{"key":"ref13","first-page":"14","article-title":"Translation modeling with bidirectional recurrent neural networks","author":"sundermeyer","year":"2014","journal-title":"Proc EMNLP"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947611"},{"key":"ref15","first-page":"20","article-title":"Deep neural network language models","author":"ar soy","year":"2012","journal-title":"NAACL HLT Workshop"},{"key":"ref16","first-page":"5005","article-title":"Performance analysis of neural networks in combination with <formula formulatype=\"inline\"><tex Notation=\"TeX\">$n$<\/tex> <\/formula>-gram language models","author":"oparin","year":"2012","journal-title":"Proc ICASSP"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639310"},{"key":"ref18","first-page":"1","article-title":"Measuring the influence of long range dependencies with neural network language models","author":"le","year":"2012","journal-title":"NAACL HLT Workshop"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"2877","DOI":"10.21437\/Interspeech.2011-720","article-title":"Recurrent neural network based language modeling in meeting recognition","author":"kombrink","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1999.0128"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1995.479394"},{"key":"ref6","first-page":"933","article-title":"A neural probabilistic language model","volume":"13","author":"bengio","year":"2000","journal-title":"Proc NIPS"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1007\/978-94-017-1183-8_6","author":"ney","year":"1997","journal-title":"Corpus-Based Methods in Language and Speech Processing"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2215599"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5946920"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2006.09.003"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"1045","DOI":"10.21437\/Interspeech.2010-343","article-title":"Recurrent neural network based language model","author":"mikolov","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163930"},{"key":"ref45","first-page":"196","article-title":"RNNLM?recurrent neural network language modeling toolkit","author":"mikolov","year":"2011","journal-title":"Proc ASRU"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/5.880081"},{"key":"ref47","article-title":"Efficient lattice representation and generation","author":"weng","year":"1998","journal-title":"Proc ICSP"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1007\/978-3-642-35289-8_25","author":"bottou","year":"2012","journal-title":"Neural Networks Tricks of the Trade"},{"key":"ref41","first-page":"2093","article-title":"RWTHLM?The RWTH Aachen University neural network language modeling toolkit","author":"sundermeyer","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref44","first-page":"433","author":"williams","year":"1995","journal-title":"Backpropagation Theory Architectures and Applications"},{"key":"ref43","first-page":"318","author":"rumelhart","year":"1986","journal-title":"The PDP Research Group Parallel Distributed Processing"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/7050384\/07050391.pdf?arnumber=7050391","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,20]],"date-time":"2025-05-20T03:32:16Z","timestamp":1747711936000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7050391\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,3]]},"references-count":53,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2015.2400218","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,3]]}}}