{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T20:06:53Z","timestamp":1767902813481,"version":"3.49.0"},"publisher-location":"Berlin, Heidelberg","reference-count":22,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540746898","type":"print"},{"value":"9783540746904","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2007]]},"DOI":"10.1007\/978-3-540-74690-4_71","type":"book-chapter","created":{"date-parts":[[2007,9,13]],"date-time":"2007-09-13T13:03:00Z","timestamp":1189688580000},"page":"697-706","source":"Crossref","is-referenced-by-count":66,"title":["Solving Deep Memory POMDPs with Recurrent Policy Gradients"],"prefix":"10.1007","author":[{"given":"Daan","family":"Wierstra","sequence":"first","affiliation":[]},{"given":"Alexander","family":"Foerster","sequence":"additional","affiliation":[]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[]},{"given":"J\u00fcrgen","family":"Schmidhuber","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"71_CR1","doi-asserted-by":"crossref","unstructured":"Benbrahim, H., Franklin, J.: Biped dynamic walking using reinforcement learning. Robotics and Autonomous Systems Journal (1997)","DOI":"10.1016\/S0921-8890(97)00043-2"},{"issue":"4","key":"71_CR2","doi-asserted-by":"publisher","first-page":"875","DOI":"10.1109\/72.935097","volume":"12","author":"J. Moody","year":"2001","unstructured":"Moody, J., Saffell, M.: Learning to Trade via Direct Reinforcement. IEEE Transactions on Neural Networks\u00a012(4), 875\u2013889 (2001)","journal-title":"IEEE Transactions on Neural Networks"},{"key":"71_CR3","volume-title":"ADPRL","author":"D. Prokhorov","year":"2007","unstructured":"Prokhorov, D.: Toward effective combination of off-line and on-line training in adp framework. In: ADPRL. Proceedings of the IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning, IEEE Computer Society Press, Los Alamitos (2007)"},{"key":"71_CR4","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P., Weaver, L.: Experiments with infinite-horizon, policy- gradient estimation. Journal of Artificial Intelligence Research\u00a015, 351\u2013381 (2001)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"71_CR5","doi-asserted-by":"crossref","unstructured":"Peters, J., Schaal, S.: Policy gradient methods for robotics. In: IROS. Proceedings of the IEEE\/RSJ International Conference on Intelligent Robots and Systems, Beijing, China, pp. 2219\u20132225 (2006)","DOI":"10.1109\/IROS.2006.282564"},{"key":"71_CR6","first-page":"229","volume":"8","author":"R.J. Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"},{"issue":"6","key":"71_CR7","doi-asserted-by":"publisher","first-page":"671","DOI":"10.1016\/0893-6080(90)90056-Q","volume":"3","author":"V. Gullapalli","year":"1990","unstructured":"Gullapalli, V.: A stochastic reinforcement learning algorithm for learning real-valued functions. Neural Networks\u00a03(6), 671\u2013692 (1990)","journal-title":"Neural Networks"},{"key":"71_CR8","volume-title":"Advances in Neural Information Processing Systems","author":"N. Schraudolph","year":"2006","unstructured":"Schraudolph, N., Yu, J., Aberdeen, D.: Fast online policy gradient learning with smd gain vector adaptation. In: Weiss, Y., Sch\u00f6lkopf, B., Platt, J. (eds.) Advances in Neural Information Processing Systems, vol.\u00a018, MIT Press, Cambridge, MA (2006)"},{"key":"71_CR9","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1007\/11564096_29","volume-title":"Machine Learning: ECML 2005","author":"J. Peters","year":"2005","unstructured":"Peters, J., Vijayakumar, S., Schaal, S.: Natural actor-critic. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 280\u2013291. Springer, Heidelberg (2005)"},{"key":"71_CR10","unstructured":"Sutton, R., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation (2001)"},{"key":"71_CR11","unstructured":"Gullapalli, V.: Reinforcement learning and its application to control (1992)"},{"key":"71_CR12","doi-asserted-by":"publisher","first-page":"1550","DOI":"10.1109\/5.58337","volume":"78","author":"P. Werbos","year":"1990","unstructured":"Werbos, P.: Back propagation through time: What it does and how to do it. Proceedings of the IEEE\u00a078, 1550\u20131560 (1990)","journal-title":"Proceedings of the IEEE"},{"key":"71_CR13","doi-asserted-by":"crossref","unstructured":"Singh, S.P., Jaakkola, T., Jordan, M.I.: Learning without state-estimation in partially observable markovian decision processes. In: International Conference on Machine Learning, pp. 284\u2013292 (1994)","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"71_CR14","unstructured":"Aberdeen, D.: Policy-Gradient Algorithms for Partially Observable Markov Decision Processes. PhD thesis, Australian National University (2003)"},{"key":"71_CR15","first-page":"427","volume-title":"UAI \u201999","author":"N. Meuleau","year":"1999","unstructured":"Meuleau, N., Peshkin, L., Kim, K.-E., Kaelbling, L.P.: Learning finite-state controllers for partially observable environments. In: UAI \u201999. Proc. Fifteenth Conference on Uncertainty in Artificial Intelligence, pp. 427\u2013436. Morgan Kaufmann, San Francisco (1999)"},{"issue":"8","key":"71_CR16","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S. Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Computation\u00a09(8), 1735\u20131780 (1997)","journal-title":"Neural Computation"},{"key":"71_CR17","unstructured":"Bakker, B.: Reinforcement learning with long short-term memory. In: Advances in Neural Information Processing Syst., vol.\u00a014 (2002)"},{"key":"71_CR18","unstructured":"Baxter, J., Bartlett, P.: Direct gradient-based reinforcement learning (1999)"},{"key":"71_CR19","volume-title":"A Field Guide to Dynamical Recurrent Neural Networks","author":"S. Hochreiter","year":"2001","unstructured":"Hochreiter, S., Bengio, Y., Frasconi, P., Schmidhuber, J.: Gradient flow in recurrent nets: the difficulty of learning long-term dependencies. In: Kremer, S.C., Kolen, J.F. (eds.) A Field Guide to Dynamical Recurrent Neural Networks, IEEE Press, NJ, New York (2001)"},{"key":"71_CR20","unstructured":"Schmidhuber, J.: RNN overview (2004), http:\/\/www.idsia.ch\/~juergen\/rnn.html"},{"key":"71_CR21","first-page":"667","volume-title":"Proceedings of the International Joint Conference on Neural Networks","author":"A. Wieland","year":"1991","unstructured":"Wieland, A.: Evolving neural network controllers for unstable systems. In: Proceedings of the International Joint Conference on Neural Networks, Seattle, WA, pp. 667\u2013673. IEEE Service Center, Piscataway, NJ (1991)"},{"key":"71_CR22","unstructured":"Torcs: Torcs, the open racing car simulator (2007), http:\/\/torcs.sourceforge.net\/"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks \u2013 ICANN 2007"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-74690-4_71","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,3]],"date-time":"2019-05-03T01:45:13Z","timestamp":1556847913000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-74690-4_71"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007]]},"ISBN":["9783540746898","9783540746904"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-74690-4_71","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2007]]}}}