{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T06:41:24Z","timestamp":1725518484446},"publisher-location":"Berlin, Heidelberg","reference-count":15,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540875352"},{"type":"electronic","value":"9783540875369"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-87536-9_42","type":"book-chapter","created":{"date-parts":[[2008,9,5]],"date-time":"2008-09-05T15:23:30Z","timestamp":1220628210000},"page":"407-416","source":"Crossref","is-referenced-by-count":1,"title":["Episodic Reinforcement Learning by Logistic Reward-Weighted Regression"],"prefix":"10.1007","author":[{"given":"Daan","family":"Wierstra","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tom","family":"Schaul","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Juergen","family":"Schmidhuber","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"42_CR1","doi-asserted-by":"crossref","unstructured":"Kaelbling, L.P., Littman, M.L., Cassandra, A.R.: Planning and acting in partially observable stochastic domains. Artificial Intelligence\u00a0101 (1998)","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"42_CR2","volume-title":"Optimization of Stochastic Systems","author":"M. Aoki","year":"1967","unstructured":"Aoki, M.: Optimization of Stochastic Systems. Academic Press, New York (1967)"},{"key":"42_CR3","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/S0954-1810(01)00028-0","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P.: Infinite-horizon policy-gradient estimation. Journal of Artificial Intelligence Research\u00a015, 319\u2013350 (2001)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"42_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"697","DOI":"10.1007\/978-3-540-74690-4_71","volume-title":"Artificial Neural Networks \u2013 ICANN 2007","author":"D. Wierstra","year":"2007","unstructured":"Wierstra, D., Foerster, A., Peters, J., Schmidhuber, J.: Solving deep memory pomdps with recurrent policy gradients. In: de S\u00e1, J.M., Alexandre, L.A., Duch, W., Mandic, D.P. (eds.) ICANN 2007. LNCS, vol.\u00a04668, pp. 697\u2013706. Springer, Heidelberg (2007)"},{"key":"42_CR5","doi-asserted-by":"crossref","unstructured":"Peters, J., Schaal, S.: Reinforcement learning by reward-weighted regression for operational space control. In: Proceedings of the International Conference on Machine Learning (ICML) (2007)","DOI":"10.1145\/1273496.1273590"},{"issue":"2","key":"42_CR6","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1162\/neco.1997.9.2.271","volume":"9","author":"P. Dayan","year":"1997","unstructured":"Dayan, P., Hinton, G.E.: Using expectation-maximization for reinforcement learning. Neural Computation\u00a09(2), 271\u2013278 (1997)","journal-title":"Neural Computation"},{"issue":"8","key":"42_CR7","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S. Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Computation\u00a09(8), 1735\u20131780 (1997)","journal-title":"Neural Computation"},{"key":"42_CR8","volume-title":"A Field Guide to Dynamical Recurrent Neural Networks","author":"S. Hochreiter","year":"2001","unstructured":"Hochreiter, S., Bengio, Y., Frasconi, P., Schmidhuber, J.: Gradient flow in recurrent nets: the difficulty of learning long-term dependencies. In: Kremer, S.C., Kolen, J.F. (eds.) A Field Guide to Dynamical Recurrent Neural Networks. IEEE Press, Los Alamitos (2001)"},{"key":"42_CR9","unstructured":"Schmidhuber, J.: RNN overview (2004), \n                    \n                      http:\/\/www.idsia.ch\/~juergen\/rnn.html"},{"key":"42_CR10","doi-asserted-by":"publisher","first-page":"1550","DOI":"10.1109\/5.58337","volume":"78","author":"P. Werbos","year":"1990","unstructured":"Werbos, P.: Back propagation through time: What it does and how to do it. Proceedings of the IEEE\u00a078, 1550\u20131560 (1990)","journal-title":"Proceedings of the IEEE"},{"key":"42_CR11","unstructured":"Chernoff, H., Moses, L.E.: Elementary Decision Theory. Dover Publications (1987)"},{"key":"42_CR12","volume-title":"Logistic Regression","author":"D.G. Kleinbaum","year":"2002","unstructured":"Kleinbaum, D.G., Klein, M., Pryor, E.R.: Logistic Regression, 2nd edn. Springer, Heidelberg (2002)","edition":"2"},{"key":"42_CR13","doi-asserted-by":"crossref","unstructured":"James, M.R., Singh, S., Littman, M.L.: Planning with predictive state representations. In: Proceedings 2004 International Conference on Machine Learning and Applications, pp. 304\u2013311 (2004)","DOI":"10.1109\/ICMLA.2004.1383528"},{"key":"42_CR14","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1145\/1143844.1143861","volume-title":"ICML 2006: Proceedings of the 23rd international conference on Machine learning","author":"M. Bowling","year":"2006","unstructured":"Bowling, M., McCracken, P., James, M., Neufeld, J., Wilkinson, D.: Learning predictive state representations using non-blind policies. In: ICML 2006: Proceedings of the 23rd international conference on Machine learning, pp. 129\u2013136. ACM, New York (2006)"},{"key":"42_CR15","unstructured":"Bakker, B.: Reinforcement learning with long short-term memory. In: Advances in Neural Information Processing Syst., vol. 14 (2002)"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks - ICANN 2008"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-87536-9_42.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,27]],"date-time":"2021-04-27T12:01:00Z","timestamp":1619524860000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-87536-9_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540875352","9783540875369"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-87536-9_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[]}}