{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T08:09:06Z","timestamp":1729670946917,"version":"3.28.0"},"reference-count":14,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009,3]]},"DOI":"10.1109\/adprl.2009.4927536","type":"proceedings-article","created":{"date-parts":[[2009,5,19]],"date-time":"2009-05-19T19:50:44Z","timestamp":1242762644000},"page":"130-136","source":"Crossref","is-referenced-by-count":0,"title":["Algorithms for variance reduction in a policy-gradient based actor-critic framework"],"prefix":"10.1109","author":[{"given":"Yogesh P.","family":"Awate","sequence":"first","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]}],"member":"263","reference":[{"key":"13","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6911(97)90015-3"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/72.125867"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1162\/089976698300017746"},{"key":"3","first-page":"1057","article-title":"policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"2000","journal-title":"Adv in Neural Info Proc Systems"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"1"},{"key":"10","first-page":"280","article-title":"natural actor-critic","author":"peters","year":"2005","journal-title":"Proceedings of 10th European Conference on Machine Learning"},{"key":"7","article-title":"naturalgradient actor-critic algorithms","author":"bhatnagar","year":"2007","journal-title":"Automatica"},{"key":"6","first-page":"1471","article-title":"variance reduction techniques for gradient estimates in reinforcement learning","volume":"5","author":"greensmith","year":"2004","journal-title":"Journal of Machine Learning Research"},{"key":"5","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","article-title":"infinite-horizon policy-gradient estimation","volume":"15","author":"baxter","year":"2001","journal-title":"Journal of Artificial Intelligence Research"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/9.905687"},{"key":"9","article-title":"a natural policy gradient","volume":"14","author":"kakade","year":"2002","journal-title":"Adv in Neural Info Proc Systems"},{"key":"8","article-title":"incremental natural actor-critic algorithms","author":"bhatnagar","year":"2007","journal-title":"Proc 21st Annual Conference on Neural Information Processing Systems"}],"event":{"name":"2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","start":{"date-parts":[[2009,3,30]]},"location":"Nashville, TN, USA","end":{"date-parts":[[2009,4,2]]}},"container-title":["2009 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4910084\/4927513\/04927536.pdf?arnumber=4927536","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T06:51:38Z","timestamp":1558335098000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4927536\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,3]]},"references-count":14,"URL":"https:\/\/doi.org\/10.1109\/adprl.2009.4927536","relation":{},"subject":[],"published":{"date-parts":[[2009,3]]}}}