{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T02:57:32Z","timestamp":1725677852487},"reference-count":12,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2007,7]]},"DOI":"10.1109\/acc.2007.4282587","type":"proceedings-article","created":{"date-parts":[[2007,8,8]],"date-time":"2007-08-08T16:01:54Z","timestamp":1186588914000},"page":"534-539","source":"Crossref","is-referenced-by-count":3,"title":["Parametrized Actor-Critic Algorithms for Finite-Horizon MDPs"],"prefix":"10.1109","author":[{"given":"Mohammed Shahid","family":"Abdulla","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shalabh","family":"Bhatnagar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref4","first-page":"49","article-title":"Least-squares Temporal Difference Learning","author":"boyan","year":"1999","journal-title":"Proceedings of the Sixteenth International Conference on Machine Learning (ICML)"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/9.793723"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/9.119632"},{"key":"ref6","article-title":"Reinforcement learning based algorithms for finite horizon markov decision processes","author":"bhatnagar","year":"2005","journal-title":"submitted"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4684-9352-8","article-title":"Stochastic Approximation Methods for Constrained and Unconstrained Systems","author":"kushner","year":"1978"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012901385691"},{"article-title":"Introduction to Probability Models, 7\/e","year":"2000","author":"ross","key":"ref12"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/9.905687"},{"key":"ref7","article-title":"Reinforcement learning based algorithms for average cost markov decision processes","author":"abdulla","year":"2006","journal-title":"Accepted for publication in Discrete Event Dynamical Systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref9","article-title":"An actor-critic algorithm for finite horizon markov decision processes","author":"bhatnagar","year":"2006","journal-title":"Proceedings of the 45th IEEE-CDC Dec 11&#x2013; 13"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0005-1098(99)00099-0"}],"event":{"name":"2007 American Control Conference","start":{"date-parts":[[2007,7,9]]},"location":"New York, NY, USA","end":{"date-parts":[[2007,7,13]]}},"container-title":["2007 American Control Conference"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/4282134\/4282135\/04282587.pdf?arnumber=4282587","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,17]],"date-time":"2017-06-17T21:50:02Z","timestamp":1497736202000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/4282587\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,7]]},"references-count":12,"URL":"https:\/\/doi.org\/10.1109\/acc.2007.4282587","relation":{},"ISSN":["0743-1619"],"issn-type":[{"type":"print","value":"0743-1619"}],"subject":[],"published":{"date-parts":[[2007,7]]}}}