{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,30]],"date-time":"2022-03-30T12:16:16Z","timestamp":1648642576444},"reference-count":8,"publisher":"Elsevier BV","issue":"2","license":[{"start":{"date-parts":[[2003,2,1]],"date-time":"2003-02-01T00:00:00Z","timestamp":1044057600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Automatica"],"published-print":{"date-parts":[[2003,2]]},"DOI":"10.1016\/s0005-1098(02)00231-5","type":"journal-article","created":{"date-parts":[[2003,2,17]],"date-time":"2003-02-17T17:19:30Z","timestamp":1045502370000},"page":"373-376","source":"Crossref","is-referenced-by-count":0,"title":["Self-learning control of finite Markov chains"],"prefix":"10.1016","volume":"39","author":[{"given":"Benjamin","family":"Van Roy","sequence":"first","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S0005-1098(02)00231-5_BIB1","unstructured":"Bertsekas, D. P., & Tsitsiklis, J. N. (1995). Neuro-dynamic programming. Athena Scientific."},{"key":"10.1016\/S0005-1098(02)00231-5_BIB2","doi-asserted-by":"crossref","unstructured":"Borkar, V. S., & Konda, V. R. (1997). Actor-critic algorithm as multi-time scale stochastic approximation algorithm. Sadhana, Indian Academy of Sciences, Proceedings in Engineering Sciences, Vol. 22 (pp. 525\u2013543).","DOI":"10.1007\/BF02745577"},{"key":"10.1016\/S0005-1098(02)00231-5_BIB3","doi-asserted-by":"crossref","unstructured":"Fiechter, C. N. (1994). Efficient reinforcement learning. COLT94: Proceedings of the seventh annual ACM conference on computational learning theory (pp. 88\u201397). New York: ACM Press.","DOI":"10.1145\/180139.181019"},{"key":"10.1016\/S0005-1098(02)00231-5_BIB4","unstructured":"Gullapalli, V., & Barto, A. G. (1994). Convergence of indirect adaptive asynchronous value iteration algorithms. In J.D. Cowman, G. Tesauro & J. Alspector (eds.), Advances in neural information processing systems, Vol. 6 (pp. 695\u2013702). Los Altos, CA; Morgan Kauffman."},{"key":"10.1016\/S0005-1098(02)00231-5_BIB5","unstructured":"Kearns, M., & Singh, S. (2002). Near-optimal reinforcement learning in polynomial time, Vol. 49 London: Kluwer Academic Publisher, (pp. 209\u2013232)."},{"key":"10.1016\/S0005-1098(02)00231-5_BIB6","doi-asserted-by":"crossref","first-page":"94","DOI":"10.1137\/S036301299731669X","article-title":"Actor-critic type learning algorithms for Markov decision processes","volume":"38","author":"Konda","year":"1999","journal-title":"SIAM Journal on Control and Optimization"},{"key":"10.1016\/S0005-1098(02)00231-5_BIB7","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1023\/A:1007678930559","article-title":"Convergence results for single-step on-policy reinforcement-learning algorithms","volume":"38","author":"Singh","year":"2000","journal-title":"Machine Learning"},{"key":"10.1016\/S0005-1098(02)00231-5_BIB8","series-title":"Reinforcement learning: An introduction","author":"Sutton","year":"1998"}],"container-title":["Automatica"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0005109802002315?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0005109802002315?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,3,12]],"date-time":"2020-03-12T02:25:13Z","timestamp":1583979913000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0005109802002315"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003,2]]},"references-count":8,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2003,2]]}},"alternative-id":["S0005109802002315"],"URL":"https:\/\/doi.org\/10.1016\/s0005-1098(02)00231-5","relation":{},"ISSN":["0005-1098"],"issn-type":[{"value":"0005-1098","type":"print"}],"subject":[],"published":{"date-parts":[[2003,2]]}}}