{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T04:43:02Z","timestamp":1741668182086,"version":"3.38.0"},"reference-count":26,"publisher":"SAGE Publications","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["RDA"],"published-print":{"date-parts":[[2017,5,31]]},"DOI":"10.3233\/rda-160116","type":"journal-article","created":{"date-parts":[[2017,6,2]],"date-time":"2017-06-02T19:38:47Z","timestamp":1496432327000},"page":"79-95","source":"Crossref","is-referenced-by-count":1,"title":["Estimate and approximate policy iteration algorithm for discounted Markov decision models with bounded costs and Borel spaces"],"prefix":"10.1177","volume":"6","author":[{"given":"M. Teresa","family":"Robles-Alcar\u00e1z","sequence":"first","affiliation":[{"name":"Departamento de Matem\u00e1ticas, Universidad de Sonora, Rosales s\/n, Col. Centro, 83000 Hermosillo, Sonora, Mexico"}]},{"given":"\u00d3scar","family":"Vega-Amaya","sequence":"additional","affiliation":[{"name":"Departamento de Matem\u00e1ticas, Universidad de Sonora, Rosales s\/n, Col. Centro, 83000 Hermosillo, Sonora, Mexico"}]},{"given":"J. Adolfo","family":"Minj\u00e1rez-Sosa","sequence":"additional","affiliation":[{"name":"Departamento de Matem\u00e1ticas, Universidad de Sonora, Rosales s\/n, Col. Centro, 83000 Hermosillo, Sonora, Mexico"}]}],"member":"179","reference":[{"key":"10.3233\/RDA-160116_ref1","first-page":"541","article-title":"Approximate fixed point iteration with an application to infinite horizon Markov decision processes","volume":"46","author":"Almudevar","year":"2008","journal-title":"SIAM Journal on Control and Optimization"},{"key":"10.3233\/RDA-160116_ref2","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","article-title":"Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path","volume":"71","author":"Antos","year":"2008","journal-title":"Machine Learning"},{"key":"10.3233\/RDA-160116_ref3","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1007\/s11768-011-1005-3","article-title":"Approximate policy iteration: A survey and some new methods","volume":"9","author":"Bersetkas","year":"2011","journal-title":"Journal of Control Theory and Applications"},{"key":"10.3233\/RDA-160116_ref4","unstructured":"D.P.\u00a0Bertsekas and J.N.\u00a0Tsitsiklis, Neuro-Dynamic Programming, Athena Scientific, Belmont, MA, 1995."},{"key":"10.3233\/RDA-160116_ref5","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1017\/S0269964803172051","article-title":"Convergence of simulation-based policy iteration","volume":"17","author":"Cooper","year":"2003","journal-title":"Probability in the Engineering and Informational Sciences"},{"key":"10.3233\/RDA-160116_ref6","unstructured":"L.\u00a0Devroye and L.\u00a0Gy\u00f6rfi, Nonparametric Density Estimation: The L1 View, Wiley, New York, 1985."},{"key":"10.3233\/RDA-160116_ref7","doi-asserted-by":"crossref","unstructured":"L.\u00a0Devroye and G.\u00a0Lugosi, Combinatorial Methods in Density Estimation, Springer, New York, 2001.","DOI":"10.1007\/978-1-4613-0125-7"},{"key":"10.3233\/RDA-160116_ref8","unstructured":"A.\u00a0Farahmand, M.\u00a0Ghavamzadeh, C.\u00a0Szepesvari and S.\u00a0Mannor, Regularized policy iteration, in: Advances in Neural Information Processing Systems, Vancouver, BC, Canada, 2008, pp.\u00a0441\u2013448."},{"key":"10.3233\/RDA-160116_ref9","first-page":"217","article-title":"Adaptive control for discrete-time Markov processes with unbounded costs: Discounted criterion","volume":"34","author":"Gordienko","year":"1998","journal-title":"Kybernetika"},{"key":"10.3233\/RDA-160116_ref10","doi-asserted-by":"crossref","unstructured":"O.\u00a0Hern\u00e1ndez-Lerma, Adaptive Markov Control Processes, Springer, New York, 1989.","DOI":"10.1007\/978-1-4419-8714-3"},{"key":"10.3233\/RDA-160116_ref11","doi-asserted-by":"crossref","unstructured":"O.\u00a0Hern\u00e1ndez-Lerma and J.B.\u00a0Lasserre, Discrete-Time Markov Control Processes: Basic Optimality Criteria, Springer, New York, 1996.","DOI":"10.1007\/978-1-4612-0729-0"},{"key":"10.3233\/RDA-160116_ref12","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1007\/s001860100170","article-title":"Adaptive policies for time-varying stochastic systems under discounted criterion","volume":"54","author":"Hilgert","year":"2001","journal-title":"Math. Methods Oper. Res."},{"key":"10.3233\/RDA-160116_ref13","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1007\/s00186-005-0024-6","article-title":"Adaptive control of stochastic systems with unknown disturbance distribution: Discounted criteria","volume":"63","author":"Hilgert","year":"2006","journal-title":"Math. Methods Oper. Res."},{"key":"10.3233\/RDA-160116_ref14","doi-asserted-by":"crossref","unstructured":"J.\u00a0Ma and W.B.\u00a0Powell, A convergent recursive least squares approximate policy iteration algorithm for multi-dimensional Markov decision process with continuous state and action spaces, in: IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning, New York, 2009, pp.\u00a066\u201373.","DOI":"10.1109\/ADPRL.2009.4927527"},{"key":"10.3233\/RDA-160116_ref16","first-page":"681","article-title":"Approximation and estimation in Markov control processes under a discounted criterion","volume":"40","author":"Minj\u00e1rez-Sosa","year":"2004","journal-title":"Kybernetika"},{"key":"10.3233\/RDA-160116_ref17","first-page":"2303","article-title":"Performance bounds in L p -norm for approximate value iteration","volume":"47","author":"Munos","year":"2007","journal-title":"SIAM Journal on Control and Optimization"},{"key":"10.3233\/RDA-160116_ref18","doi-asserted-by":"crossref","unstructured":"W.B.\u00a0Powell, Approximate Dynamic Programming: Solving the Curse of Dimensionality, Wiley, 2007.","DOI":"10.1002\/9780470182963"},{"key":"10.3233\/RDA-160116_ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-012-1077-6"},{"key":"10.3233\/RDA-160116_ref20","doi-asserted-by":"crossref","first-page":"336","DOI":"10.1007\/s11768-011-0313-y","article-title":"A review of stochastic algorithms with continuous value function approximation and some new approximate policy iteration algorithms for multidimensional continuous applications","volume":"9","author":"Powell","year":"2011","journal-title":"Journal of Control Theory and Applications"},{"key":"10.3233\/RDA-160116_ref21","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1007\/BF01168566","article-title":"Measurable selection theorems for optimization problems","volume":"24","author":"Rieder","year":"1978","journal-title":"Manuscripta Math."},{"key":"10.3233\/RDA-160116_ref22","doi-asserted-by":"crossref","unstructured":"J.\u00a0Rust, Numerical dynamic programming in economics, in: Handbook of Computational Economics, H.M.\u00a0Amman, D.A.\u00a0Kendrick and J.\u00a0Rust, eds, Vol.\u00a01, Elsevier, 1996, pp.\u00a0619\u2013728.","DOI":"10.1016\/S1574-0021(96)01016-7"},{"key":"10.3233\/RDA-160116_ref24","doi-asserted-by":"crossref","first-page":"2094","DOI":"10.1137\/S0363012902399824","article-title":"Convergence properties of policy iteration","volume":"42","author":"Santos","year":"2004","journal-title":"SIAM Journal on Control and Optimization"},{"key":"10.3233\/RDA-160116_ref25","unstructured":"R.J.\u00a0Serfling, Approximation Theorems of Mathematical Statistics, Wiley, 2002."},{"key":"10.3233\/RDA-160116_ref26","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/s10614-007-9111-5","article-title":"Continuous state dynamic programming via nonexpansive approximation","volume":"31","author":"Stachurski","year":"2008","journal-title":"Computational Economics"},{"key":"10.3233\/RDA-160116_ref28","doi-asserted-by":"crossref","first-page":"451","DOI":"10.1007\/BF01198405","article-title":"Application of average dynamic programming to inventory systems","volume":"47","author":"Vega-Amaya","year":"1998","journal-title":"Math. Methods Oper. Res."},{"key":"10.3233\/RDA-160116_ref29","doi-asserted-by":"crossref","first-page":"973","DOI":"10.1109\/TNN.2007.899161","article-title":"Kernel-based least squares policy iteration for reinforcement learning","volume":"18","author":"Xu","year":"2007","journal-title":"IEEE Transactions on Neural Networks"}],"container-title":["Risk and Decision Analysis"],"original-title":[],"link":[{"URL":"https:\/\/content.iospress.com\/download?id=10.3233\/RDA-160116","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T01:17:37Z","timestamp":1741655857000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/full\/10.3233\/RDA-160116"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5,31]]},"references-count":26,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.3233\/rda-160116","relation":{},"ISSN":["1875-9173","1569-7371"],"issn-type":[{"type":"electronic","value":"1875-9173"},{"type":"print","value":"1569-7371"}],"subject":[],"published":{"date-parts":[[2017,5,31]]}}}