{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T05:55:16Z","timestamp":1759384516277,"version":"3.37.3"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2018,1,10]],"date-time":"2018-01-10T00:00:00Z","timestamp":1515542400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s00521-017-3322-z","type":"journal-article","created":{"date-parts":[[2018,1,10]],"date-time":"2018-01-10T02:30:41Z","timestamp":1515551441000},"page":"4001-4017","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Character-level recurrent neural networks in practice: comparing training and sampling schemes"],"prefix":"10.1007","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0763-8114","authenticated-orcid":false,"given":"Cedric","family":"De Boom","sequence":"first","affiliation":[]},{"given":"Thomas","family":"Demeester","sequence":"additional","affiliation":[]},{"given":"Bart","family":"Dhoedt","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,1,10]]},"reference":[{"key":"3322_CR1","unstructured":"Bradbury J, Merity S, Xiong C, Socher R (2016) Quasi-recurrent neural networks. \n                    arXiv:1611.01576"},{"key":"3322_CR2","doi-asserted-by":"crossref","unstructured":"Cho K, van Merrienboer B, G\u00fcl\u00e7ehre \u00c7, Bahdanau D, Bougares F, Schwenk H, Bengio Y (2014) Learning phrase representations using RNN encoder\u2013decoder for statistical machine translation. \n                    arXiv:1406.1078","DOI":"10.3115\/v1\/D14-1179"},{"key":"3322_CR3","unstructured":"Chung J, G\u00fcl\u00e7ehre \u00c7, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. \n                    arXiv:1412.3555"},{"key":"3322_CR4","unstructured":"Chung J, Ahn S, Bengio Y (2016) Hierarchical multiscale recurrent neural networks. \n                    arXiv:1609.01704"},{"key":"3322_CR5","unstructured":"Cooijmans T, Ballas N, Laurent C, Courville A (2016) Recurrent batch normalization. \n                    arXiv:1603.09025"},{"key":"3322_CR6","unstructured":"De Boom C, Agrawal R, Hansen S, Kumar E, Yon R, Chen CW, Demeester T, Dhoedt B (2017) Large-scale user modeling with recurrent neural networks for music discovery on multiple time scales. \n                    arXiv:1708.06520"},{"key":"3322_CR7","unstructured":"Gal Y, Ghahramani Z (2016) A theoretically grounded application of dropout in recurrent neural networks. NIPS. \n                    arXiv:1512.05287"},{"key":"3322_CR8","volume-title":"Deep learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow I, Bengio Y, Courville A (2016) Deep learning. MIT Press, London"},{"key":"3322_CR9","unstructured":"Graves A (2013) Generating sequences with recurrent neural networks. \n                    arXiv:1308.0850"},{"key":"3322_CR10","unstructured":"Greff K, Srivastava RK, Koutn\u00edk J, Steunebrink BR, Schmidhuber J (2015) LSTM: a search space Odyssey. \n                    arXiv:1503.04069"},{"key":"3322_CR11","unstructured":"Gregor K, Danihelka I, Mnih A, Blundell C, Wierstra D (2013) Deep autoregressive networks. \n                    arXiv:1310.8499"},{"key":"3322_CR12","unstructured":"Gregor K, Danihelka I, Graves A, Wierstra D (2015) DRAW: a recurrent neural network for image generation. \n                    arXiv:1502.04623"},{"key":"3322_CR13","unstructured":"Ha D, Dai A, Le QV (2016) Hypernetworks. \n                    arXiv:1609.09106"},{"key":"3322_CR14","unstructured":"Hidasi B, Karatzoglou A, Baltrunas L, Tikk D (2016) Session-based recommendations with recurrent neural networks. \n                    arXiv:1511.06939"},{"key":"3322_CR15","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9:1735\u20131780","journal-title":"Neural Comput"},{"key":"3322_CR16","unstructured":"Hochreiter S, Bengio Y, Frasconi P, Schmidhuber J (2001) Gradient flow in recurrent nets: the difficulty of learning long-term dependencies. In: A Field Guide to Dynamical Recurrent Neural Networks, IEEE Press"},{"key":"3322_CR17","unstructured":"Hutter M (2012) The human knowledge compression contest"},{"key":"3322_CR18","unstructured":"Inan H, Khosravi K, Socher R (2016) Tying word vectors and word classifiers\u2014a loss framework for language modeling. \n                    arXiv:1611.01462"},{"key":"3322_CR19","unstructured":"Karpathy A, Johnson J, Fei-Fei L (2015) Visualizing and understanding recurrent networks. \n                    arXiv:1506.02078"},{"key":"3322_CR20","unstructured":"Kim Y, Jernite Y, Sontag D, Rush AM (2015) Character-aware neural language models. \n                    arXiv:1508.06615"},{"key":"3322_CR21","unstructured":"Kingma D, Ba J (2015) Adam: a method for stochastic optimization. In: ICLR. \n                    arXiv:1412.6980"},{"key":"3322_CR22","unstructured":"Krause B, Lu L, Murray I, Renals S (2016) Multiplicative LSTM for sequence modelling. \n                    arXiv:1609.07959"},{"key":"3322_CR23","first-page":"313","volume":"19","author":"M Marcus","year":"1993","unstructured":"Marcus M, Santorini B, Marcinkiewicz MA (1993) Building a large annotated corpus of English: the Penn Treebank. Comput Linguist 19:313\u2013330","journal-title":"Comput Linguist"},{"key":"3322_CR24","unstructured":"Melis G, Dyer C, Blunsom P (2017) On the state of the art of evaluation in neural language models. \n                    arXiv:1707.05589"},{"key":"3322_CR25","unstructured":"Merity S, Xiong C, Bradbury J, Socher R (2016) Pointer sentinel mixture models. \n                    arXiv:1609.07843"},{"key":"3322_CR26","unstructured":"Merity S, Keskar NS, Socher R (2017) Regularizing and optimizing LSTM language models. \n                    arXiv:1708.02182"},{"key":"3322_CR27","doi-asserted-by":"crossref","unstructured":"Mikolov T, Zweig G (2012) Context dependent recurrent neural network language model. In: 2012 IEEE spoken language technology workshop (SLTW)","DOI":"10.1109\/SLT.2012.6424228"},{"key":"3322_CR28","doi-asserted-by":"crossref","unstructured":"Mikolov T, Karafi\u00e1t M, Burget L, Cernocky J, Khudanpur S (2010) Recurrent neural network based language model. In: Interspeech","DOI":"10.1109\/ICASSP.2011.5947611"},{"key":"3322_CR29","unstructured":"Mujika A, Meier F, Steger A (2017) Fast\u2013slow recurrent neural networks. \n                    arXiv:1705.08639"},{"key":"3322_CR30","unstructured":"Oord A, Kalchbrenner N, Kavukcuoglu K (2016) Pixel recurrent neural networks. \n                    arXiv:1601.06759"},{"issue":"3","key":"3322_CR31","first-page":"1","volume":"5","author":"DE Rumelhart","year":"1988","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1988) Learning representations by back-propagating errors. Cogn Model 5(3):1","journal-title":"Cogn Model"},{"key":"3322_CR32","doi-asserted-by":"crossref","unstructured":"Saon G, Sercu T, Rennie S, Kuo HKJ (2016) The IBM 2016 English conversational telephone speech recognition system. \n                    arXiv:1505.05899","DOI":"10.21437\/Interspeech.2016-1460"},{"key":"3322_CR33","doi-asserted-by":"crossref","unstructured":"Sercu T, Goel V (2016) Advances in very deep convolutional neural networks for LVCSR. In: Interspeech. \n                    arXiv:1604.01792","DOI":"10.21437\/Interspeech.2016-1033"},{"key":"3322_CR34","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton GE, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout\u2014 a simple way to prevent neural networks from overfitting. J Mach Learn Res 15:1929\u20131958","journal-title":"J Mach Learn Res"},{"key":"3322_CR35","unstructured":"Sturm BL, Santos JF, Ben-Tal O, Korshunova I (2016) Music transcription modelling and composition using deep learning. \n                    arXiv:1604.08723"},{"key":"3322_CR36","unstructured":"Sutskever I (2013) Training recurrent neural networks. Ph.D. thesis"},{"key":"3322_CR37","doi-asserted-by":"crossref","unstructured":"Tan YK, Xu X, Liu Y (2016) Improved recurrent neural networks for session-based recommendations. \n                    arXiv:1606.08117","DOI":"10.1145\/2988450.2988452"},{"key":"3322_CR38","unstructured":"Van Den Oord A, Dieleman S, Zen H, Simonyan K, Vinyals O, Graves A, Kalchbrenner N, Senior A, Kavukcuoglu K (2016) WaveNet: a generative model for raw audio. \n                    arXiv:1609.03499"},{"key":"3322_CR39","unstructured":"Wu Y, Zhang S, Zhang Y, Bengio Y, Salakhutdinov R (2016) On multiplicative integration with recurrent neural networks. \n                    arXiv:1606.06630"},{"key":"3322_CR40","unstructured":"Yang Z, Dai Z, Salakhutdinov R, Cohen WW (2017) Breaking the softmax bottleneck: a high-rank RNN language model. \n                    arXiv:1711.03953"},{"key":"3322_CR41","unstructured":"Zaremba W, Sutskever I, Vinyals O (2014) Recurrent neural network regularization. \n                    arXiv:1409.2329"},{"key":"3322_CR42","unstructured":"Zilly JG, Srivastava RK, Koutn\u00edk J, Schmidhuber J (2017) Recurrent highway networks. \n                    arXiv:1607.03474"},{"key":"3322_CR43","unstructured":"Zoph B, Le QV (2016) Neural architecture search with reinforcement learning. \n                    arXiv:1611.01578"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-017-3322-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-017-3322-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-017-3322-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,6]],"date-time":"2019-09-06T14:24:50Z","timestamp":1567779890000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-017-3322-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,1,10]]},"references-count":43,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["3322"],"URL":"https:\/\/doi.org\/10.1007\/s00521-017-3322-z","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2018,1,10]]},"assertion":[{"value":"18 April 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 December 2017","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 January 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"Cedric De Boom is funded by a Ph.D. grant of the Research Foundation\u2014Flanders (FWO). The other authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}