{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T23:10:50Z","timestamp":1725491450751},"publisher-location":"Berlin, Heidelberg","reference-count":15,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540749578"},{"type":"electronic","value":"9783540749585"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-74958-5_43","type":"book-chapter","created":{"date-parts":[[2007,9,7]],"date-time":"2007-09-07T16:33:00Z","timestamp":1189182780000},"page":"466-477","source":"Crossref","is-referenced-by-count":4,"title":["Policy Gradient Critics"],"prefix":"10.1007","author":[{"given":"Daan","family":"Wierstra","sequence":"first","affiliation":[]},{"given":"J\u00fcrgen","family":"Schmidhuber","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"43_CR1","volume-title":"Reinforcement learning: An introduction","author":"R. Sutton","year":"1998","unstructured":"Sutton, R., Barto, A.: Reinforcement learning: An introduction. MIT Press, Cambridge, MA (1998)"},{"doi-asserted-by":"crossref","unstructured":"Singh, S., Jaakkola, T., Jordan, M.: Learning without state-estimation in partially observable markovian decision processes. In: International Conference on Machine Learning, pp. 284\u2013292 (1994)","key":"43_CR2","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"unstructured":"Aberdeen, D.: Policy-Gradient Algorithms for Partially Observable Markov Decision Processes. PhD thesis, Australian National University (2003)","key":"43_CR3"},{"key":"43_CR4","first-page":"427","volume-title":"Proc. Fifteenth Conference on Uncertainty in Artificial Intelligence (UAI 1999)","author":"N.L. Meuleau","year":"1999","unstructured":"Meuleau, N.L., Kim, K., Kaelbling, L.P.: Learning finite-state controllers for partially observable environments. In: Proc. Fifteenth Conference on Uncertainty in Artificial Intelligence (UAI 1999), pp. 427\u2013436. Morgan Kaufmann, San Francisco (1999)"},{"key":"43_CR5","volume-title":"Proc. of the 2005 conference on genetic and evolutionary computation (GECCO), Washington, D. C.","author":"F.J. Gomez","year":"2005","unstructured":"Gomez, F.J., Schmidhuber, J.: Co-evolving recurrent neurons learn deep memory POMDPs. In: Proc. of the 2005 conference on genetic and evolutionary computation (GECCO), Washington, D. C., ACM Press, New York (2005)"},{"issue":"8","key":"43_CR6","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S. Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Computation\u00a09(8), 1735\u20131780 (1997)","journal-title":"Neural Computation"},{"key":"43_CR7","doi-asserted-by":"publisher","first-page":"1550","DOI":"10.1109\/5.58337","volume":"78","author":"P. Werbos","year":"1990","unstructured":"Werbos, P.: Back propagation through time: What it does and how to do it. Proceedings of the IEEE\u00a078, 1550\u20131560 (1990)","journal-title":"Proceedings of the IEEE"},{"issue":"2","key":"43_CR8","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1162\/neco.1989.1.2.270","volume":"1","author":"R.J. Williams","year":"1989","unstructured":"Williams, R.J., Zipser, D.: A learning algorithm for continually running fully recurrent networks. Neural Computation\u00a01(2), 270\u2013280 (1989)","journal-title":"Neural Computation"},{"key":"43_CR9","volume-title":"A Field Guide to Dynamical Recurrent Neural Networks","author":"S. Hochreiter","year":"2001","unstructured":"Hochreiter, S., Bengio, Y., Frasconi, P., Schmidhuber, J.: Gradient flow in recurrent nets: the difficulty of learning long-term dependencies. In: Kremer, S.C., Kolen, J.F. (eds.) A Field Guide to Dynamical Recurrent Neural Networks, IEEE Press, Los Alamitos (2001)"},{"unstructured":"Schmidhuber, J.: RNN overview, with links to a dozen journal publications (2004) \n                    \n                      http:\/\/www.idsia.ch\/~juergen\/rnn.html","key":"43_CR10"},{"key":"43_CR11","first-page":"362","volume-title":"Machine Learning: Proceedings of the Twelfth International Conference","author":"M. Littman","year":"1995","unstructured":"Littman, M., Cassandra, A., Kaelbling, L.: Learning policies for partially observable environments: Scaling up. In: Prieditis, A., Russell, S. (eds.) Machine Learning: Proceedings of the Twelfth International Conference, pp. 362\u2013370. Morgan Kaufmann Publishers, San Francisco, CA (1995)"},{"key":"43_CR12","first-page":"667","volume-title":"Proceedings of the International Joint Conference on Neural Networks","author":"A. Wieland","year":"1991","unstructured":"Wieland, A.: Evolving neural network controllers for unstable systems. In: Proceedings of the International Joint Conference on Neural Networks, Seattle, WA, pp. 667\u2013673. IEEE, Piscataway, NJ (1991)"},{"unstructured":"Bakker, B.: Reinforcement learning with long short-term memory. Advances in Neural Information Processing Syst.\u00a014 (2002)","key":"43_CR13"},{"key":"43_CR14","first-page":"323","volume-title":"Proc. 15th International Conf. on Machine Learning","author":"J. Loch","year":"1998","unstructured":"Loch, J., Singh, S.: Using eligibility traces to find the best memoryless policy in partially observable Markov decision processes. In: Proc. 15th International Conf. on Machine Learning, pp. 323\u2013331. Morgan Kaufmann, San Francisco, CA (1998)"},{"unstructured":"Bakker, B.: The State of Mind: Reinforcement Learning with Recurrent Neural Networks. PhD thesis, Leiden University (2004)","key":"43_CR15"}],"container-title":["Lecture Notes in Computer Science","Machine Learning: ECML 2007"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-74958-5_43.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T10:48:39Z","timestamp":1619520519000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-74958-5_43"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540749578","9783540749585"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-74958-5_43","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[]}}