{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T11:20:22Z","timestamp":1770290422869,"version":"3.49.0"},"publisher-location":"Berlin, Heidelberg","reference-count":36,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642352881","type":"print"},{"value":"9783642352898","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-35289-8_38","type":"book-chapter","created":{"date-parts":[[2012,11,14]],"date-time":"2012-11-14T12:03:17Z","timestamp":1352894597000},"page":"709-733","source":"Crossref","is-referenced-by-count":8,"title":["Solving Partially Observable Reinforcement Learning Problems with Recurrent Neural Networks"],"prefix":"10.1007","author":[{"given":"Siegmund","family":"Duell","sequence":"first","affiliation":[]},{"given":"Steffen","family":"Udluft","sequence":"additional","affiliation":[]},{"given":"Volkmar","family":"Sterzing","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"38_CR1","unstructured":"Bakker, B.: Reinforcement Learning with Long Short-Term Memory. In: Becker, S., Dietterich, T.G., Ghahramani, Y. (eds.) Advances in Neural Information Processing Systems, pp. 1475\u20131482. MIT Press (2002)"},{"key":"38_CR2","unstructured":"Bellman, R.E.: Dynamic Programming. Princeton University Press (1957)"},{"issue":"2","key":"38_CR3","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y. Bengio","year":"1994","unstructured":"Bengio, Y., Simard, P., Frasconi, P.: Learning long-term dependencies with gradient descent is difficult. IEEE Transactions on Neural Networks\u00a05(2), 157\u2013166 (1994)","journal-title":"IEEE Transactions on Neural Networks"},{"key":"38_CR4","unstructured":"Duell, S., Hans, A., Udluft, S.: The Markov Decision Process Extraction Network. In: Proc. of the 18th European Symposium on Artificial Neural Networks (2010)"},{"key":"38_CR5","unstructured":"Duell, S., Weichbrodt, L., Hans, A., Udluft, S.: Recurrent Neural State Estimation in Domains with Long-Term Dependencies. In: Proc. of the 20th European Symposium on Artificial Neural Networks (2012)"},{"issue":"1","key":"38_CR6","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1162\/neco.1992.4.1.120","volume":"4","author":"P. Frasconi","year":"1992","unstructured":"Frasconi, P., Gori, M., Soda, G.: Local feedback multilayered networks. Neural Computation\u00a04(1), 120\u2013130 (1992)","journal-title":"Neural Computation"},{"key":"38_CR7","doi-asserted-by":"crossref","unstructured":"Gomez, F., Miikkulainen, R.: 2-D Balancing with Recurrent Evolutionary Networks. In: Proceedings of the International Conference on Artificial Neural Networks (ICANN 1998), pp. 425\u2013430. Springer (1998)","DOI":"10.1007\/978-1-4471-1599-1_63"},{"key":"38_CR8","unstructured":"Gomez, F.: Robust Non-Linear Control through Neuroevolution. PhD thesis, Departement of Computer Sciences Technical Report AI-TR-03-3003 (2003)"},{"key":"38_CR9","unstructured":"Haykin, S.: Neural networks and learning machines, vol.\u00a03. Prentice-Hall (2009)"},{"key":"38_CR10","doi-asserted-by":"crossref","unstructured":"Haykin, S., Principe, J., Sejnowski, T., McWhirter, J.: New directions in statistical signal processing: from systems to brain. MIT Press (2007)","DOI":"10.7551\/mitpress\/4977.001.0001"},{"key":"38_CR11","unstructured":"Kolen, J.F., Kremer, S.C.: A field guide to dynamical recurrent networks. IEEE Press (2001)"},{"key":"38_CR12","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"L.P. Kaelbling","year":"1998","unstructured":"Kaelbling, L.P., Littman, M.L., Cassandra, A.R.: Planning and acting in partially observable stochastic domains. Artificial Intelligence\u00a0101, 99\u2013134 (1998)","journal-title":"Artificial Intelligence"},{"key":"38_CR13","doi-asserted-by":"crossref","unstructured":"Kietzmann, T.C., Riedmiller, M.: The Neuro Slot Car Racer: Reinforcement Learning in a Real World Setting. In: Proc. of the Int. Conf. on Machine Learning and Applications. IEEE (2009)","DOI":"10.1109\/ICMLA.2009.15"},{"key":"38_CR14","doi-asserted-by":"crossref","unstructured":"Lin, T., Horne, B.G., Tino, P., Giles, C.L.: Learning long-term dependencies in NARX recurrent neural networks. IEEE Transactions on Neural Networks\u00a07(6) (1996)","DOI":"10.1109\/72.548162"},{"key":"38_CR15","doi-asserted-by":"crossref","unstructured":"Medsker, L., Jain, L.: Recurrent Neural Networks: Design and Application. International Series on Comp.\u00a0Intelligence, vol.\u00a0I. CRC Press (1999)","DOI":"10.1201\/9781420049176"},{"key":"38_CR16","unstructured":"Mozer, M.C.: Induction of multiscale temporal structure. In: Advances in Neural Information Processing Systems, vol.\u00a04, pp. 275\u2013282 (1992)"},{"key":"38_CR17","unstructured":"Meuleau, N., Peshkin, L., Kee-Eung, K., Kaebling, L.P.: Learning Finite-State Controllers for Partially Observable Environments. In: Proceedings of the Fifteenth International Conference on Uncertainty in Artificial Intelligence (UAI 1999), pp. 427\u2013436 (1999)"},{"key":"38_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1007\/3-540-49430-8_18","volume-title":"Neural Networks: Tricks of the Trade","author":"R. Neuneier","year":"1998","unstructured":"Neuneier, R., Zimmermann, H.-G.: How to Train Neural Networks. In: Orr, G.B., M\u00fcller, K.-R. (eds.) NIPS-WS 1996. LNCS, vol.\u00a01524, pp. 373\u2013423. Springer, Heidelberg (1998)"},{"key":"38_CR19","doi-asserted-by":"crossref","unstructured":"Peters, J., Schaal, A.: Reinforcement learning of motor skills with policy gradients. Neural Networks\u00a021(4) (2008)","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"38_CR20","unstructured":"Ramachandran, D.: Knowledge and Ignorance in Reinforcement Learning. PhD thesis, University of Illinois (2011)"},{"key":"38_CR21","unstructured":"Rosenstein, M.T., Barto, A.G., Si, J., Powell, W., Wunsch, D.: Supervised actor-critic reinforcement learning. In: Handbook of Learning and Approximate Dynamic Programming, pp. 359\u2013380 (2012)"},{"issue":"9","key":"38_CR22","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"D.E. Rumelhart","year":"1986","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning representations by back-propagating errors. Nature\u00a0323(9), 533\u2013536 (1986)","journal-title":"Nature"},{"key":"38_CR23","series-title":"LNCS","first-page":"735","volume-title":"NN: Tricks of the Trade","author":"M. Riedmiller","year":"2012","unstructured":"Riedmiller, M.: 10 Steps and Some Tricks to Set Up Neural Reinforcement Controllers. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) NN: Tricks of the Trade, 2nd edn. LNCS, vol.\u00a07700, pp. 735\u2013757. Springer, Heidelberg (2012)","edition":"2"},{"key":"38_CR24","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural Fitted Q Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"key":"38_CR25","doi-asserted-by":"crossref","unstructured":"Samuel, A.L.: Some studies in machine learning using the game of checkers. IBM Journal on Research and Developement, 210\u2013229 (1959)","DOI":"10.1147\/rd.33.0210"},{"key":"38_CR26","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"38_CR27","unstructured":"Schneegass, D.: Steigerung der Informationseffizienz im Reinforcement-Learning. PhD thesis, Luebeck University (2008)"},{"key":"38_CR28","doi-asserted-by":"crossref","unstructured":"Sch\u00e4fer, A.M., Schneegass, D., Sterzing, V., Udluft, S.: A Neural Reinforcement Learning Approach to Gas Turbine Control. In: Proc. of the Int. Joint Conf. on Neural Networks (2007)","DOI":"10.1109\/IJCNN.2007.4371212"},{"key":"38_CR29","unstructured":"Sch\u00e4fer, A.M., Udluft, S.: Solving Partially Observable Reinforcement Learning Problems with Recurrent Neural Networks. In: Workshop Proc. of the European Conf. on Machine Learning (2005)"},{"key":"38_CR30","unstructured":"Schneegass, D., Udluft, S., Martinetz, T.: Neural Rewards Regression for Near-Optimal Policy Identification in Markovian and Partial Observable Environments. In: Proc. of the European Symposium on Artificial Neural Networks, pp. 301\u2013306 (2007)"},{"key":"38_CR31","unstructured":"Sch\u00e4fer, A.M., Udluft, S., Zimmermann, H.G.: The Recurrent Control Neural Network. In: Proc. of the European Symposium on Artificial Neural Networks, pp. 319\u2013324 (2007)"},{"key":"38_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"632","DOI":"10.1007\/11840817_66","volume-title":"Artificial Neural Networks \u2013 ICANN 2006","author":"A.M. Sch\u00e4fer","year":"2006","unstructured":"Sch\u00e4fer, A.M., Zimmermann, H.-G.: Recurrent Neural Networks Are Universal Approximators. In: Kollias, S.D., Stafylopatis, A., Duch, W., Oja, E. (eds.) ICANN 2006. LNCS, vol.\u00a04131, pp. 632\u2013640. Springer, Heidelberg (2006)"},{"key":"38_CR33","first-page":"366","volume":"898","author":"F. Takens","year":"1981","unstructured":"Takens, F.: Detecting strange attractors in turbulence. Dynamical Systems and Turbulence\u00a0898, 366\u2013381 (1981)","journal-title":"Dynamical Systems and Turbulence"},{"key":"38_CR34","doi-asserted-by":"crossref","unstructured":"Zimmermann, H.G., Grothmann, R., Sch\u00e4fer, A.M., Tietz, C.: Identification and Forecasting of Large Dynamical Systems by Dynamical Consistent Neural Networks. In: New Directions in Statistical Signal Processing: From Systems to Brain, pp. 203\u2013242. MIT Press (2006)","DOI":"10.7551\/mitpress\/4977.003.0010"},{"key":"38_CR35","unstructured":"Zimmermann, H.G., Neuneier, R.: Neural network architectures for the modeling of dynamical systems. In: Kolen, J.F., Kremer, S.C. (eds.) A Field Guide to Dynamical Recurrent Networks, pp. 311\u2013350. IEEE Press (2001)"},{"key":"38_CR36","series-title":"LNCS","first-page":"687","volume-title":"NN: Tricks of the Trade","author":"H.G. Zimmermann","year":"2012","unstructured":"Zimmermann, H.G., Tietz, C., Grothmann, R.: Forecasting with Recurrent Neural Networks: 12 Tricks. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) NN: Tricks of the Trade, 2nd edn. LNCS, vol.\u00a07700, pp. 687\u2013707. Springer, Heidelberg (2012)","edition":"2"}],"container-title":["Lecture Notes in Computer Science","Neural Networks: Tricks of the Trade"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-35289-8_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T10:52:52Z","timestamp":1714560772000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-35289-8_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642352881","9783642352898"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-35289-8_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}