{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T14:58:01Z","timestamp":1729609081618,"version":"3.28.0"},"reference-count":11,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,6]]},"DOI":"10.1109\/acc.2012.6315368","type":"proceedings-article","created":{"date-parts":[[2014,7,16]],"date-time":"2014-07-16T21:55:22Z","timestamp":1405547722000},"page":"1376-1381","source":"Crossref","is-referenced-by-count":0,"title":["The importance of variance reduction in policy gradient method"],"prefix":"10.1109","author":[{"family":"Tak Kit Lau","sequence":"first","affiliation":[]},{"family":"Yun-hui Liu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"3","doi-asserted-by":"crossref","DOI":"10.1002\/0471722138","author":"spall","year":"2003","journal-title":"Introduction to Stochastic Search and Optimization Estimation Simulation and Control"},{"key":"2","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"10","article-title":"PEGASUS: A policy search method for large MDPs and POMDPs","author":"ng","year":"2000","journal-title":"Proc of Conf on Uncertainty in Artificial Intelligence"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2006.282564"},{"key":"7","article-title":"Variance reduction techniques for gradient estimates in reinforcement learning","volume":"5","author":"greensmith","year":"2004","journal-title":"Journal of Machine Learning Research"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.2514\/3.3166"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1145\/318371.318612"},{"key":"9","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2009.V.027","article-title":"Policy search via the signed derivative","author":"kolter","year":"2009","journal-title":"Proc of Robotics Science and Systems"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2011.6161518"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980510"}],"event":{"name":"2012 American Control Conference - ACC 2012","start":{"date-parts":[[2012,6,27]]},"location":"Montreal, QC","end":{"date-parts":[[2012,6,29]]}},"container-title":["2012 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6297579\/6314593\/06315368.pdf?arnumber=6315368","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,12]],"date-time":"2019-08-12T22:25:17Z","timestamp":1565648717000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6315368\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6]]},"references-count":11,"URL":"https:\/\/doi.org\/10.1109\/acc.2012.6315368","relation":{},"subject":[],"published":{"date-parts":[[2012,6]]}}}