{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T11:57:22Z","timestamp":1743076642633,"version":"3.40.3"},"publisher-location":"Dordrecht","reference-count":14,"publisher":"Springer Netherlands","isbn-type":[{"type":"print","value":"9789048136551"},{"type":"electronic","value":"9789048136568"}],"license":[{"start":{"date-parts":[[2009,12,15]],"date-time":"2009-12-15T00:00:00Z","timestamp":1260835200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2009,12,15]],"date-time":"2009-12-15T00:00:00Z","timestamp":1260835200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-90-481-3656-8_82","type":"book-chapter","created":{"date-parts":[[2010,1,29]],"date-time":"2010-01-29T15:09:40Z","timestamp":1264777780000},"page":"455-460","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Actor-Critic Algorithms for Variance Minimization"],"prefix":"10.1007","author":[{"given":"Yogesh P.","family":"Awate","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2009,12,15]]},"reference":[{"key":"82_CR1","volume-title":"Neuro-Dynamic Programming","author":"D.P. Bertsekas","year":"1996","unstructured":"D.P. Bertsekas and J.N. Tsitsiklis, Neuro-Dynamic Programming, Athena Scientific, MA, 1996."},{"key":"82_CR2","doi-asserted-by":"crossref","unstructured":"L. Baird, \u201cResidual algorithms: reinforcement learning with function approximation\u201d, Proc. 12\n              \n                th\n              \n              International Conf. on Machine Learning, 1995, pp30-37.","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"82_CR3","first-page":"1057","volume":"12","author":"R. Sutton","year":"2000","unstructured":"R. Sutton, D. McAllester, S. Singh and Y. Mansour, \u201cPolicy gradient methods for reinforcement learning with function approximation\u201d, Adv. in Neural Info. Proc. Systems, 2000, 12:1057-1063.","journal-title":"Adv. in Neural Info. Proc. Systems"},{"key":"82_CR4","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1109\/9.905687","volume":"46","author":"P. Marbach","year":"2001","unstructured":"P. Marbach and J.N. Tsitsiklis, \u201cSimulation-based optimization of Markov reward processes\u201d, IEEE Trans. on Automatic Control, 2001, 46:191-209.","journal-title":"IEEE Trans. on Automatic Control"},{"key":"82_CR5","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/S0954-1810(01)00028-0","volume":"15","author":"J. Baxter","year":"2001","unstructured":"J. Baxter and P.L. Bartlett, \u201cInfinite-horizon policy-gradient estimation\u201d, Journal of Artificial Intelligence Research, 2001, 15:319-350.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"82_CR6","first-page":"1471","volume":"5","author":"E. Greensmith","year":"2004","unstructured":"E. Greensmith, P.L. Bartlett and J. Baxter, \u201cVariance reduction techniques for gradient estimates in reinforcement learning\u201d Journal of Machine Learning Research, 2004, Vol. 5, pp. 1471\u20131530.","journal-title":"Journal of Machine Learning Research"},{"key":"82_CR7","unstructured":"S. Bhatnagar, R.S. Sutton, M. Ghavamzadeh and M. Lee, \u201cNaturalgradient actor-critic algorithms\u201d Automatica, 2007 (to appear, http:\/\/drona.csa.iisc.ernet.in\/\u223cshalabh\/pubs\/ac_bhatnagar.pdf)."},{"key":"82_CR8","unstructured":"S. Bhatnagar, R.S. Sutton, M. Ghavamzadeh and M. Lee, \u201cIncremental natural actor-critic algorithms\u201d, Proc. 21\n              \n                st\n              \n              Annual Conference on Neural Information Processing Systems, 2007."},{"key":"82_CR9","unstructured":"S. Kakade, \u201cA natural policy gradient\u201d, Adv. in Neural Info. Proc. Systems, 2002, 14."},{"key":"82_CR10","doi-asserted-by":"crossref","unstructured":"J. Peters, S. Vijayakumar and S. Schaal, \u201cNatural actor-critic\u201d, Proc. 16\n              \n                th\n              \n              European Conference on Machine Learning, 2005, pp. 280-291.","DOI":"10.1007\/11564096_29"},{"issue":"2","key":"82_CR11","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1109\/72.125867","volume":"3","author":"S. Amari","year":"1992","unstructured":"S. Amari, K. Kurata and H. Nagaoka, \u201cInformation geometry of Boltzmann machines\u201d IEEE Trans. on Neural Networks, 1992, Vol. 3, No. 2, pp 260-271.","journal-title":"IEEE Trans. on Neural Networks"},{"issue":"2","key":"82_CR12","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"S. Amari","year":"1998","unstructured":"S. Amari, \u201cNatural gradient works efficiently in learning\u201d, Neural Computation, 1998, 10(2):251-276.","journal-title":"Neural Computation"},{"key":"82_CR13","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1016\/S0167-6911(97)90015-3","volume":"29","author":"V.S. Borkar","year":"1997","unstructured":"V.S. Borkar, \u201cStochastic approximation with two timescales\u201d, Systems and Control Letters, 1997, 29:291-294.","journal-title":"Systems and Control Letters"},{"issue":"4","key":"82_CR14","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V.R. Konda","year":"2003","unstructured":"V.R. Konda and J.N. Tsitsiklis, \u201cOn actor-critic algorithms\u201d, SIAM Journal on Control and Optimization, 2003, 42(4):1143-1166.","journal-title":"SIAM Journal on Control and Optimization"}],"container-title":["Technological Developments in Education and Automation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-90-481-3656-8_82","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,18]],"date-time":"2023-02-18T02:59:59Z","timestamp":1676689199000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-90-481-3656-8_82"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,12,15]]},"ISBN":["9789048136551","9789048136568"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-90-481-3656-8_82","relation":{},"subject":[],"published":{"date-parts":[[2009,12,15]]},"assertion":[{"value":"15 December 2009","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}