{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T20:36:11Z","timestamp":1725568571725},"publisher-location":"Berlin, Heidelberg","reference-count":16,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642162916"},{"type":"electronic","value":"9783642162923"}],"license":[{"start":{"date-parts":[[2010,1,1]],"date-time":"2010-01-01T00:00:00Z","timestamp":1262304000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-16292-3_21","type":"book-chapter","created":{"date-parts":[[2010,11,2]],"date-time":"2010-11-02T09:56:53Z","timestamp":1288691813000},"page":"207-218","source":"Crossref","is-referenced-by-count":3,"title":["Revisiting Natural Actor-Critics with Value Function Approximation"],"prefix":"10.1007","author":[{"given":"Matthieu","family":"Geist","sequence":"first","affiliation":[]},{"given":"Olivier","family":"Pietquin","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"21_CR1","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.W.: Neuronlike adaptive elements that can solve difficult learning control problems, pp. 535\u2013549 (1988)"},{"key":"21_CR2","unstructured":"Watkins, C.: Learning from Delayed Rewards. PhD thesis, Cambridge University, Cambridge, England (1989)"},{"key":"21_CR3","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y.: Policy Gradient Methods for Reinforcement Learning with Function Approximation. In: Advances in Neural Information Processing Systems (NIPS 12), pp. 1057\u20131063 (2000)"},{"key":"21_CR4","unstructured":"Konda, V.R., Tsitsiklis, J.N.: Actor-Critic Algorithms. In: Advances in Neural Information Processing Systems, NIPS 12 (2000)"},{"key":"21_CR5","unstructured":"Peters, J., Vijayakumar, S., Schaal, S.: Reinforcement Learning for Humanoid Robotics. In: Third IEEE-RAS International Conference on Humanoid Robots, Humanoids 2003 (2003)"},{"key":"21_CR6","volume-title":"Adaptive Computation and Machine Learning","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. In: Adaptive Computation and Machine Learning, 3rd edn. The MIT Press, Cambridge (1998)","edition":"3"},{"key":"21_CR7","unstructured":"Bhatnagar, S., Sutton, R.S., Ghavamzadeh, M., Lee, M.: Incremental Natural Actor-Critic Algorithms. In: Advances in Neural Information Processing Systems (NIPS 21), Vancouver, Canada (2007)"},{"key":"21_CR8","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"S.I. Amari","year":"1998","unstructured":"Amari, S.I.: Natural gradient works efficiently in learning. Neural Computation\u00a010, 251\u2013276 (1998)","journal-title":"Neural Computation"},{"key":"21_CR9","unstructured":"Kakade, S.: A Natural Policy Gradient. In: Advances in Neural Information Processing Systems (NIPS 14), pp. 1531\u20131538 (2002)"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Geist, M., Pietquin, O., Fricout, G.: Kalman Temporal Differences: the deterministic case. In: Proceedings of the IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL 2009), Nashville, TN, USA (2009)","DOI":"10.1109\/ADPRL.2009.4927543"},{"key":"21_CR11","unstructured":"Morimura, T., Uchibe, E., Doya, K.: Utilizing the Natural Gradient in Temporal Difference Reinforcement Learning with Eligibility Traces. In: 2nd Internatinal Symposium on Information Geometry and its Applications, Tokyo, Japan, pp. 256\u2013263 (2005)"},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Wiering, M., van Hasselt, H.: The QV Family Compared to Other Reinforcement Learning Algorithms. In: IEEE International Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL 2009), Nashville, TN, USA (2009)","DOI":"10.1109\/ADPRL.2009.4927532"},{"key":"21_CR13","first-page":"33","volume":"22","author":"S.J. Bradtke","year":"1996","unstructured":"Bradtke, S.J., Barto, A.G.: Linear Least-Squares algorithms for temporal difference learning. Machine Learning\u00a022, 33\u201357 (1996)","journal-title":"Machine Learning"},{"key":"21_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1007\/978-3-642-10677-4_57","volume-title":"Neural Information Processing","author":"M. Geist","year":"2009","unstructured":"Geist, M., Pietquin, O., Fricout, G.: Tracking in reinforcement learning. In: Leung, C.S., Lee, M., Chan, J.H. (eds.) ICONIP 2009. LNCS, vol.\u00a05863, pp. 502\u2013511. Springer, Heidelberg (2009)"},{"key":"21_CR15","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1007\/11596448_9","volume-title":"Computational Intelligence and Security","author":"J. Park","year":"2005","unstructured":"Park, J., Kim, J., Kang, D.: An RLS-Based Natural Actor-Critic Algorithm for Locomotion of a Two-Linked Robot Arm. In: Hao, Y., Liu, J., Wang, Y.-P., Cheung, Y.-m., Yin, H., Jiao, L., Ma, J., Jiao, Y.-C. (eds.) CIS 2005. LNCS (LNAI), vol.\u00a03801, pp. 65\u201372. Springer, Heidelberg (2005)"},{"key":"21_CR16","first-page":"1107","volume":"4","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-Squares Policy Iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"}],"container-title":["Lecture Notes in Computer Science","Modeling Decisions for Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-16292-3_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,21]],"date-time":"2019-03-21T20:00:01Z","timestamp":1553198401000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-16292-3_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642162916","9783642162923"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-16292-3_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2010]]}}}