{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,12]],"date-time":"2025-04-12T05:47:32Z","timestamp":1744436852765,"version":"3.28.0"},"reference-count":12,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1109\/cdc.2002.1184395","type":"proceedings-article","created":{"date-parts":[[2003,8,27]],"date-time":"2003-08-27T11:38:00Z","timestamp":1061984280000},"page":"3367-3371","source":"Crossref","is-referenced-by-count":1,"title":["Gradient-based policy iteration: an example"],"prefix":"10.1109","volume":"3","author":[{"family":"Xi-Ren Cao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"family":"Hai-Tao Fang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"3","doi-asserted-by":"crossref","first-page":"771","DOI":"10.1016\/S0005-1098(99)00207-1","article-title":"A unified approach to Markov decision problems and performance sensitivity analysis","volume":"36","author":"cao","year":"2000","journal-title":"Automatica"},{"key":"2","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1023\/A:1008260528575","article-title":"The relations among potentials, perturbation analysis, and Markov decision processes","volume":"8","author":"cao","year":"1998","journal-title":"Discrete Event Dynamic Systems Theory and Applications"},{"journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming","year":"1994","author":"puterman","key":"10"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"1"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-4070-7"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1017\/S0305004100030231"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/9.633827"},{"article-title":"From perturbation analysis to Markov decision processes and reinforcement learning","year":"2002","author":"cao","key":"4"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1109\/9.650016"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-4024-3"},{"journal-title":"Markov Chains","year":"1984","author":"revuz","key":"11"},{"journal-title":"Reinforcement Learning An Introduction","year":"1998","author":"sutton","key":"12"}],"event":{"name":"IEEE Conference on Decision and Control","acronym":"CDC-02","location":"Las Vegas, NV, USA"},"container-title":["Proceedings of the 41st IEEE Conference on Decision and Control, 2002."],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/8437\/26568\/01184395.pdf?arnumber=1184395","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,15]],"date-time":"2017-06-15T20:09:25Z","timestamp":1497557365000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/1184395\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"references-count":12,"URL":"https:\/\/doi.org\/10.1109\/cdc.2002.1184395","relation":{},"subject":[]}}