{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T06:55:53Z","timestamp":1760597753925,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"S2","license":[{"start":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T00:00:00Z","timestamp":1498003200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100010261","name":"Fok Ying Tung Education Foundation","doi-asserted-by":"crossref","award":["151068"],"award-info":[{"award-number":["151068"]}],"id":[{"id":"10.13039\/501100010261","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61332002"],"award-info":[{"award-number":["61332002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Foundation for Youth Science and Technology Innovation Research Team of Sichuan Province","award":["2016TD0018"],"award-info":[{"award-number":["2016TD0018"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2019,2]]},"DOI":"10.1007\/s00521-017-3065-x","type":"journal-article","created":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T08:22:28Z","timestamp":1498033348000},"page":"999-1011","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Global context-dependent recurrent neural network language model with sparse feature learning"],"prefix":"10.1007","volume":"31","author":[{"given":"Hongli","family":"Deng","sequence":"first","affiliation":[]},{"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lituan","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,6,21]]},"reference":[{"issue":"6","key":"3065_CR1","first-page":"1137","volume":"3","author":"Y Bengio","year":"2003","unstructured":"Bengio Y, Schwenk H, Sencal JS, Morin F, Gauvain JL (2003) A neural probabilistic language model. J Mach Learn Res 3(6):1137\u20131155","journal-title":"J Mach Learn Res"},{"issue":"2","key":"3065_CR2","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio Y, Simard P, Frasconi P (1994) Learning long-term dependencies with gradient descent is difficult. IEEE Trans Neural Netw 5(2):157\u2013166","journal-title":"IEEE Trans Neural Netw"},{"issue":"4","key":"3065_CR3","first-page":"467","volume":"18","author":"PF Brown","year":"1997","unstructured":"Brown PF, Desouza PV, Mercer RL, Pietra VJD, Lai JC (1997) Class-based n -gram models of natural language. Comput Linguist 18(4):467\u2013479","journal-title":"Comput Linguist"},{"key":"3065_CR4","unstructured":"Chelba C, Mikolov T, Schuster M, Ge Q, Brants T, Koehn P, Robinson T (2013) One billion word benchmark for measuring progress in statistical language modeling. arXiv preprint; arXiv:1312.3005"},{"key":"3065_CR5","first-page":"2493","volume":"12","author":"R Collobert","year":"2011","unstructured":"Collobert R, Weston J, Bottou L, Karlen M, Kavukcuoglu K, Kuksa P (2011) Natural language processing (almost) from scratch. J Mach Learn Res 12:2493\u20132537","journal-title":"J Mach Learn Res"},{"key":"3065_CR6","doi-asserted-by":"crossref","unstructured":"Federico M (1996) Bayesian estimation methods for n-gram language model adaptation. In: Proceedings of the international conference on spoken language, Icslp 96. vol 1, pp 240\u2013243","DOI":"10.1109\/ICSLP.1996.607087"},{"issue":"10","key":"3065_CR7","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"2000","unstructured":"Gers FA, Schmidhuber J, Cummins F (2000) Learning to forget: continual prediction with LSTM. Neural Comput 12(10):2451\u20132471","journal-title":"Neural Comput"},{"key":"3065_CR8","first-page":"115","volume":"3","author":"FA Gers","year":"2003","unstructured":"Gers FA, Schraudolph NN, Schmidhuber J (2003) Learning precise timing with LSTM recurrent networks. J Mach Learn Res 3:115\u2013143","journal-title":"J Mach Learn Res"},{"key":"3065_CR9","unstructured":"Graves A (2013) Generating sequences with recurrent neural networks. arXiv preprint; arXiv:1308.0850"},{"issue":"8","key":"3065_CR10","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"3065_CR11","unstructured":"Jozefowicz R, Vinyals O, Schuster M, Shazeer N, Wu Y (2016) Exploring the limits of language modeling. arXiv preprint; arXiv:1602.02410"},{"key":"3065_CR12","unstructured":"Kim Y, Jernite Y, Sontag D, Rush AM (2015) Character-aware neural language models. arXiv preprint; arXiv:1508.06615"},{"key":"3065_CR13","unstructured":"Kneser R, Ney H (1995) Improved backing-off for n-gram language modeling"},{"key":"3065_CR14","unstructured":"Lee H, Ekanadham C, Ng AY (2008) Sparse deep belief net model for visual area v2. In: Advances in neural information processing systems, pp 873\u2013880"},{"key":"3065_CR15","doi-asserted-by":"crossref","unstructured":"Liu X, Chen X, Gales M, Woodland P (2015) Paraphrastic recurrent neural network language models. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), 2015, pp 5406\u20135410. IEEE","DOI":"10.1109\/ICASSP.2015.7179004"},{"key":"3065_CR16","unstructured":"Mahoney M (2009) Large text compression benchmark. URL: http:\/\/www. mattmahoney. net\/text\/text. html"},{"issue":"2","key":"3065_CR17","first-page":"313","volume":"19","author":"MP Marcus","year":"1993","unstructured":"Marcus MP, Marcinkiewicz MA, Santorini B (1993) Building a large annotated corpus of english: the Penn treebank. Comput Linguist 19(2):313\u2013330","journal-title":"Comput Linguist"},{"key":"3065_CR18","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. arXiv preprint; arXiv:1301.3781"},{"key":"3065_CR19","doi-asserted-by":"crossref","unstructured":"Mikolov T, Karafi\u00e1t M, Burget L, Cernock\u1ef3 J, Khudanpur S (2010) Recurrent neural network based language model. In: INTERSPEECH, vol\u00a02, p\u00a03","DOI":"10.1109\/ICASSP.2011.5947611"},{"key":"3065_CR20","doi-asserted-by":"crossref","unstructured":"Mikolov T, Zweig G (2012) Context dependent recurrent neural network language model. In: SLT, pp 234\u2013239","DOI":"10.1109\/SLT.2012.6424228"},{"key":"3065_CR21","doi-asserted-by":"crossref","unstructured":"Niesler TR, Woodland PC (1996) A variable-length category-based n-gram language model. In: ICASSP, pp 164\u2013167","DOI":"10.1109\/ICASSP.1996.540316"},{"key":"3065_CR22","unstructured":"Pascanu R, Gulcehre C, Cho K, Bengio Y (2013) How to construct deep recurrent neural networks. arXiv preprint; arXiv:1312.6026"},{"key":"3065_CR23","doi-asserted-by":"crossref","unstructured":"Peng X, Yu Z, Yi Z, Tang H (2016) Constructing the L2-graph for robust subspace learning and subspace clustering. IEEE Trans Cybern 99: 1\u201314. 10.1109\/TCYB.2016.2536752","DOI":"10.1109\/TCYB.2016.2536752"},{"key":"3065_CR24","unstructured":"Saxe AM, McClelland JL, Ganguli S (2013) Exact solutions to the nonlinear dynamics of learning in deep linear neural networks. arXiv preprint; arXiv:1312.6120"},{"issue":"1","key":"3065_CR25","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton GE, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"3065_CR26","unstructured":"Sukhbaatar S, Weston J, Fergus R et\u00a0al (2015) End-to-end memory networks. In: Advances in neural information processing systems, pp 2431\u20132439"},{"key":"3065_CR27","doi-asserted-by":"crossref","unstructured":"Sundermeyer M, Schl\u00fcter R, Ney H (2012) LSTM neural networks for language modeling. In: INTERSPEECH, pp 194\u2013197","DOI":"10.21437\/Interspeech.2012-65"},{"key":"3065_CR28","unstructured":"Team TD, Alrfou R, Alain G, Almahairi A, Angermueller C, Bahdanau D, Ballas N, Bastien F, Bayer J, Belikov A (2016) Theano: a python framework for fast computation of mathematical expressions"},{"key":"3065_CR29","unstructured":"Tom\u00e1\u0161 M (2012) Statistical language models based on neural networks. Ph.D. thesis, Brno University of Technology, 2012"},{"key":"3065_CR30","unstructured":"Wang T, Cho K (2015) Larger-context language modelling. arXiv preprint; arXiv:1511.03729"},{"key":"3065_CR31","unstructured":"Xiong D, Zhang M, Li H (2011) Enhancing language models in statistical machine translation with backward n-grams and mutual information triggers. In: Proceedings of the 49th annual meeting of the association for computational linguistics: human language technologies, vol 1, pp 1288\u20131297. Association for Computational Linguistics"},{"issue":"7","key":"3065_CR32","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1002\/scj.1210","volume":"34","author":"H Yamamoto","year":"2003","unstructured":"Yamamoto H, Isogai S, Sagisaka Y (2003) Multi-class composite n-gram language model. Syst Comput Jpn 34(7):108\u2013114","journal-title":"Syst Comput Jpn"},{"key":"3065_CR33","unstructured":"Zaremba W, Sutskever I, Vinyals O (2014) Recurrent neural network regularization. arXiv preprint; arXiv:1409.2329"},{"key":"3065_CR34","unstructured":"Zeiler MD (2012) Adadelta: an adaptive learning rate method. arXiv preprint; arXiv:1212.5701"},{"key":"3065_CR35","unstructured":"Zhang S, Jiang H, Wei S, Dai L (2015) Feedforward sequential memory neural networks without recurrent feedback. arXiv preprint; arXiv:1510.02693"},{"key":"3065_CR36","doi-asserted-by":"crossref","unstructured":"Zhang S, Jiang H, Xu M, Hou J, Dai L (2015) The fixed-size ordinally-forgetting encoding method for neural network language models. Short Papers 2: 495","DOI":"10.3115\/v1\/P15-2081"},{"key":"3065_CR37","doi-asserted-by":"publisher","unstructured":"Zhen L, Peng D, Yi Z, Xiang Y, Chen P (2016) Underdetermined blind source separation using sparse coding. IEEE Trans Neural Netw Learn Syst 99: 1\u20137. doi: 10.1109\/TNNLS.2016.2610960","DOI":"10.1109\/TNNLS.2016.2610960"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-017-3065-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-017-3065-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-017-3065-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,29]],"date-time":"2022-07-29T22:39:29Z","timestamp":1659134369000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-017-3065-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,6,21]]},"references-count":37,"journal-issue":{"issue":"S2","published-print":{"date-parts":[[2019,2]]}},"alternative-id":["3065"],"URL":"https:\/\/doi.org\/10.1007\/s00521-017-3065-x","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2017,6,21]]},"assertion":[{"value":"11 December 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 June 2017","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2017","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflicts of interest to this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}