{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,17]],"date-time":"2026-05-17T02:02:14Z","timestamp":1778983334242,"version":"3.51.4"},"reference-count":12,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2008,12,1]],"date-time":"2008-12-01T00:00:00Z","timestamp":1228089600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Artif Life Robotics"],"published-print":{"date-parts":[[2008,12]]},"DOI":"10.1007\/s10015-008-0514-8","type":"journal-article","created":{"date-parts":[[2008,12,13]],"date-time":"2008-12-13T10:27:57Z","timestamp":1229164077000},"page":"275-279","source":"Crossref","is-referenced-by-count":3,"title":["Natural actor-critic with baseline adjustment for variance reduction"],"prefix":"10.1007","volume":"13","author":[{"given":"Tetsuro","family":"Morimura","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eiji","family":"Uchibe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kenji","family":"Doya","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2008,12,14]]},"reference":[{"key":"514_CR1","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter J, Bartlett P (2001) Infinite-horizon policy-gradient estimation. J Artif Intell Res 15:319\u2013350","journal-title":"J Artif Intell Res"},{"key":"514_CR2","volume-title":"Advances in neural information processing systems, vol 14","author":"S. Kakade","year":"2002","unstructured":"Kakade S (2002) A natural policy gradient. In: Advances in neural information processing systems, vol 14, MIT Press, Cambridge"},{"key":"514_CR3","unstructured":"Peters J, Vijayakumar S, Schaal S (2003) Reinforcement learning for humanoid robotics. IEEE-RAS International Conference on Humanoid Robots, Karlsruhe, Germany"},{"key":"514_CR4","unstructured":"Morimura T, Uchibe E, Doya K (2005) Utilizing natural gradient in temporal difference reinforcement learning with eligibility traces. In: International Symposium on Information Geometry and its Applications, Tokyo, pp 256\u2013263"},{"key":"514_CR5","first-page":"1471","volume":"5","author":"E. Greensmith","year":"2004","unstructured":"Greensmith E, Bartlett P, Baxter J (2004) Variance reduction techniques for gradient estimates in reinforcement learning. J Mach Learning Res 5:1471\u20131530","journal-title":"J Mach Learning Res"},{"key":"514_CR6","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"S. Amari","year":"1998","unstructured":"Amari S (1998) Natural gradient works efficiently in learning. Neural Comput 10:251\u2013276","journal-title":"Neural Comput"},{"key":"514_CR7","volume-title":"Dynamic programming and optimal control, vol 1 and 2","author":"D.P. Bertsekas","year":"1995","unstructured":"Bertsekas DP (1995) Dynamic programming and optimal control, vol 1 and 2. Athena Scientific, Belmont, MA"},{"key":"514_CR8","volume-title":"Reinforcement learning","author":"R.S. Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning. MIT Press, Cambridge"},{"key":"514_CR9","unstructured":"Bagnell D, Schneider J (2003) Covariant policy search. Proceedings of the International Joint Conference on Artificial Intelligence, July, Morgan Kaufmann, Acapulco, Mexico"},{"key":"514_CR10","doi-asserted-by":"crossref","unstructured":"Peters J, Schaal S (2006) Policy gradient methods for robotics. IEEE International Conference on Intelligent Robots and Systems, Beijing, China","DOI":"10.1109\/IROS.2006.282564"},{"key":"514_CR11","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter J, Bartlett P, Weaver L (2001) Experiments with infinitehorizon policy-gradient estimation. J Artif Intell Res 15:351\u2013381","journal-title":"J Artif Intell Res"},{"key":"514_CR12","unstructured":"Kimura H, Kobayashi S (1998) An analysis of actor\/critic algorithms using eligibility traces: reinforcement learning with imperfect value function. International Conference on Machine Learning, Morgan Kaufmann, WI, pp 278\u2013286"}],"container-title":["Artificial Life and Robotics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-008-0514-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10015-008-0514-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10015-008-0514-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,29]],"date-time":"2019-05-29T12:36:01Z","timestamp":1559133361000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10015-008-0514-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,12]]},"references-count":12,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2008,12]]}},"alternative-id":["514"],"URL":"https:\/\/doi.org\/10.1007\/s10015-008-0514-8","relation":{},"ISSN":["1433-5298","1614-7456"],"issn-type":[{"value":"1433-5298","type":"print"},{"value":"1614-7456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2008,12]]}}}