{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T21:30:11Z","timestamp":1729632611478,"version":"3.28.0"},"reference-count":17,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,12]]},"DOI":"10.1109\/wsc.2013.6721456","type":"proceedings-article","created":{"date-parts":[[2014,1,31]],"date-time":"2014-01-31T17:35:58Z","timestamp":1391189758000},"page":"623-630","source":"Crossref","is-referenced-by-count":7,"title":["Relative value iteration for average reward semi-Markov control via simulation"],"prefix":"10.1109","author":[{"given":"Abhijit","family":"Gosavi","sequence":"first","affiliation":[]}],"member":"263","reference":[{"key":"17","doi-asserted-by":"publisher","DOI":"10.1016\/0022-247X(63)90017-9"},{"key":"15","first-page":"11","article-title":"Aunified analysis of value-function-based reinforcement-learning algorithms","volume":"11","author":"szepesvari","year":"1998","journal-title":"Neural Computation"},{"journal-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"16"},{"key":"13","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","author":"puterman","year":"1994","journal-title":"Markov Decision Processes"},{"journal-title":"Reinforcement Learning","year":"1998","author":"sutton","key":"14"},{"key":"11","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4684-9352-8","author":"kushner","year":"1978","journal-title":"Stochastic Approximation Methods for Constrained and Unconstrained Systems"},{"key":"12","article-title":"Autonomous helicopter flight via reinforcement learning","volume":"17","author":"ng","year":"2004","journal-title":"Advances in neural information processing systems"},{"journal-title":"Dynamic Programming and Optimal Control Volumes I and II","year":"2012","author":"bertsekas","key":"3"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012999361974"},{"key":"1","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1007\/978-3-642-00196-3_45","article-title":"Autonomous autorotation of an RC helicopter","volume":"54","author":"abbeel","year":"2009","journal-title":"Experimental Robotics Eleventh International Symposium Series"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"7","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-3766-0","author":"gosavi","year":"2003","journal-title":"Simulation-based Optimization Parametric Optimization Techniques and Reinforcement Learning"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.45.4.560"},{"key":"5","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84628-690-2","author":"chang","year":"2007","journal-title":"Simulation-Based Algorithms for Markov Decision Processes"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"4"},{"journal-title":"Introduction to Probability","year":"1997","author":"grinstead","key":"9"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1016\/S0377-2217(02)00874-3"}],"event":{"name":"2013 Winter Simulation Conference - (WSC 2013)","start":{"date-parts":[[2013,12,8]]},"location":"Washington, DC, USA","end":{"date-parts":[[2013,12,11]]}},"container-title":["2013 Winter Simulations Conference (WSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6709837\/6721384\/06721456.pdf?arnumber=6721456","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,7]],"date-time":"2019-08-07T01:54:58Z","timestamp":1565142898000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6721456\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,12]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1109\/wsc.2013.6721456","relation":{},"subject":[],"published":{"date-parts":[[2013,12]]}}}