{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T21:58:20Z","timestamp":1773093500086,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":15,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540875352","type":"print"},{"value":"9783540875369","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-87536-9_40","type":"book-chapter","created":{"date-parts":[[2008,9,5]],"date-time":"2008-09-05T15:23:30Z","timestamp":1220628210000},"page":"387-396","source":"Crossref","is-referenced-by-count":23,"title":["Policy Gradients with Parameter-Based Exploration for Control"],"prefix":"10.1007","author":[{"given":"Frank","family":"Sehnke","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Osendorfer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"R\u00fcckstie\u00df","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alex","family":"Graves","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J\u00fcrgen","family":"Schmidhuber","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"40_CR1","doi-asserted-by":"crossref","unstructured":"Benbrahim, H., Franklin, J.: Biped dynamic walking using reinforcement learning. Robotics and Autonomous Systems Journal (1997)","DOI":"10.1016\/S0921-8890(97)00043-2"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Peters, J., Schaal, S.: Policy gradient methods for robotics. In: IROS-2006, Beijing, China, pp. 2219\u20132225 (2006)","DOI":"10.1109\/IROS.2006.282564"},{"key":"40_CR3","volume-title":"Advances in Neural Information Processing Systems","author":"N. Schraudolph","year":"2006","unstructured":"Schraudolph, N., Yu, J., Aberdeen, D.: Fast online policy gradient learning with smd gain vector adaptation. In: Weiss, Y., Sch\u00f6lkopf, B., Platt, J. (eds.) Advances in Neural Information Processing Systems, vol.\u00a018. MIT Press, Cambridge (2006)"},{"key":"40_CR4","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"280","DOI":"10.1007\/11564096_29","volume-title":"Machine Learning: ECML 2005","author":"J. Peters","year":"2005","unstructured":"Peters, J., Vijayakumar, S., Schaal, S.: Natural actor-critic. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 280\u2013291. Springer, Heidelberg (2005)"},{"key":"40_CR5","first-page":"229","volume":"8","author":"R. Williams","year":"1992","unstructured":"Williams, R.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning\u00a08, 229\u2013256 (1992)","journal-title":"Machine Learning"},{"key":"40_CR6","first-page":"41","volume-title":"Proc. 17th International Conf. on Machine Learning","author":"J. Baxter","year":"2000","unstructured":"Baxter, J., Bartlett, P.L.: Reinforcement learning in POMDPs via direct gradient ascent. In: Proc. 17th International Conf. on Machine Learning, pp. 41\u201348. Morgan Kaufmann, San Francisco (2000)"},{"key":"40_CR7","unstructured":"Aberdeen, D.: Policy-Gradient Algorithms for Partially Observable Markov Decision Processes. PhD thesis, Australian National University (2003)"},{"key":"40_CR8","unstructured":"Sutton, R., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: NIPS 1999, pp. 1057\u20131063 (2000)"},{"key":"40_CR9","volume-title":"Evolution and optimum seeking","author":"H. Schwefel","year":"1995","unstructured":"Schwefel, H.: Evolution and optimum seeking. Wiley, New York (1995)"},{"issue":"4","key":"40_CR10","first-page":"482","volume":"19","author":"J. Spall","year":"1998","unstructured":"Spall, J.: An overview of the simultaneous perturbation method for efficient optimization. Johns Hopkins APL Technical Digest\u00a019(4), 482\u2013492 (1998)","journal-title":"Johns Hopkins APL Technical Digest"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Peters, J., Schaal, S.: Evaluation of policy gradient methods and variants on the cart-pole benchmark. In: ADPRL 2007 (2007)","DOI":"10.1109\/ADPRL.2007.368196"},{"key":"40_CR12","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"220","DOI":"10.1007\/978-3-540-74565-5_18","volume-title":"KI 2007: Advances in Artificial Intelligence","author":"H. M\u00fcller","year":"2007","unstructured":"M\u00fcller, H., Lauer, M., Hafner, R., Lange, S., Merke, A., Riedmiller, M.: Making a robot learn to play soccer. In: Hertzberg, J., Beetz, M., Englert, R. (eds.) KI 2007. LNCS (LNAI), vol.\u00a04667, pp. 220\u2013234. Springer, Heidelberg (2007)"},{"key":"40_CR13","unstructured":"Jordan, M.: Attractor dynamics and parallelism in a connectionist sequential machine. In: Proc. of the Eighth Annual Conference of the Cognitive Science Society, vol.\u00a08, pp. 531\u2013546 (1986)"},{"key":"40_CR14","unstructured":"Ulbrich, H.: Institute of Applied Mechanics, TU M\u00fcnchen, Germany (2008), http:\/\/www.amm.mw.tum.de\/"},{"issue":"2","key":"40_CR15","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1162\/106365601750190398","volume":"9","author":"N. Hansen","year":"2001","unstructured":"Hansen, N., Ostermeier, A.: Completely Derandomized Self-Adaptation in Evolution Strategies. Evolutionary Computation\u00a09(2), 159\u2013195 (2001)","journal-title":"Evolutionary Computation"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks - ICANN 2008"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-87536-9_40.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,24]],"date-time":"2020-11-24T02:38:35Z","timestamp":1606185515000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-87536-9_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540875352","9783540875369"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-87536-9_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[]}}