{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T12:05:10Z","timestamp":1725537910810},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642040191"},{"type":"electronic","value":"9783642040207"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-04020-7_30","type":"book-chapter","created":{"date-parts":[[2009,9,14]],"date-time":"2009-09-14T14:43:19Z","timestamp":1252939399000},"page":"276-285","source":"Crossref","is-referenced-by-count":0,"title":["Using Control Theory for Analysis of Reinforcement Learning and Optimal Policy Properties in Grid-World Problems"],"prefix":"10.1007","author":[{"given":"S. Mostapha","family":"Kalami Heris","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohammad-Bagher","family":"Naghibi Sistani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Naser","family":"Pariz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"30_CR1","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"2005","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, Chichester (2005)"},{"key":"30_CR2","unstructured":"Cassandra, A.: Exact and Approximate Algorithms for Partially Observable Markov Decision Processes. Ph.D. Thesis, Brown University (1998)"},{"key":"30_CR3","unstructured":"Pyeatt, L.: Integration of Partially Observable Markov Decision Processes and Reinforcement Learning for Simulated Robot Navigation. Ph.D. Thesis, Colorado State University (1999)"},{"key":"30_CR4","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"30_CR5","volume-title":"Reinforcement Learning: An Introduction","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press, Cambridge (1998)"},{"key":"30_CR6","volume-title":"Dynamic Programming: A Computational Tool","author":"A. Lew","year":"2007","unstructured":"Lew, A., Mauch, H.: Dynamic Programming: A Computational Tool. Springer, Berlin (2007)"},{"key":"30_CR7","unstructured":"Reynolds, S.I.: Reinforcement Learning with Exploration. Ph.D. Thesis, School of Computer Science, University of Birmingham, UK (2002)"},{"key":"30_CR8","volume-title":"Handbook of Markov Decision Processes: Methods and Applications","author":"B. Roy Van","year":"2002","unstructured":"Van Roy, B.: Neuro-Dynamic Programming: Overview and Recent Trends. In: Feinberg, E.A., Schwartz, A. (eds.) Handbook of Markov Decision Processes: Methods and Applications. Kluwer Academic, Dordrecht (2002)"},{"key":"30_CR9","doi-asserted-by":"publisher","DOI":"10.1109\/9780470544785","volume-title":"Handbook of Learning and Approximate Dynamic Programming","author":"J. Si","year":"2004","unstructured":"Si, J., et al.: Handbook of Learning and Approximate Dynamic Programming. Wiley InterScience, Hoboken (2004)"},{"issue":"1","key":"30_CR10","doi-asserted-by":"crossref","first-page":"59","DOI":"10.4310\/CIS.2007.v7.n1.a4","volume":"7","author":"H. Soo Chang","year":"2007","unstructured":"Soo Chang, H., et al.: A survey of some Simulation-Based Algorithms for Markov Decision Processes. Communications in Information and Systems\u00a07(1), 59\u201392 (2007)","journal-title":"Communications in Information and Systems"},{"key":"30_CR11","doi-asserted-by":"publisher","first-page":"796","DOI":"10.1287\/opre.50.5.796.365","volume":"50","author":"J.E. Smith","year":"2002","unstructured":"Smith, J.E., Mc Cardle, K.F.: Structural Properties of Stochastic Dynamic Programs. Operations Research\u00a050, 796\u2013809 (2002)","journal-title":"Operations Research"},{"key":"30_CR12","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1287\/opre.48.2.327.13375","volume":"46","author":"M.C. Fu","year":"2000","unstructured":"Fu, M.C., et al.: Monotone optimal policies for queuing staffing problem. Operations Research\u00a046, 327\u2013331 (2000)","journal-title":"Operations Research"},{"key":"30_CR13","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/S0004-3702(00)00047-3","volume":"122","author":"R. Givan","year":"2000","unstructured":"Givan, R., et al.: Bounded Markov Decision Processes. Artificial Intelligence\u00a0122, 71\u2013109 (2000)","journal-title":"Artificial Intelligence"},{"key":"30_CR14","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L.P. Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement Learning: A Survey. Journal of Artificial Intelligence Research\u00a04, 237\u2013285 (1996)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"30_CR15","unstructured":"Gordon, G.J.: Approximate Solution to Markov Decision Processes. Ph.D. Thesis, School of Computer Science, Carnegie Mellon University (1999)"},{"issue":"3","key":"30_CR16","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1023\/A:1004641123405","volume":"105","author":"D.P. Farias de","year":"2000","unstructured":"de Farias, D.P., Van Roy, B.: On the Existance of Fixed-points for Approximate Value Iteration and Temporal-Difference Learning. Journal of Optimization theory and Applications\u00a0105(3), 589\u2013608 (2000)","journal-title":"Journal of Optimization theory and Applications"},{"key":"30_CR17","volume-title":"Real Analysis","author":"H. Royden","year":"1988","unstructured":"Royden, H.: Real Analysis, 3rd edn. Prentice Hall, Englewood Cliffs (1988)","edition":"3"},{"key":"30_CR18","volume-title":"Markov Decision Processes with Their Applications.","author":"Q. Hu","year":"2008","unstructured":"Hu, Q., Yue, W.: Markov Decision Processes with Their Applications. Springer Science+Busines Media, LLC (2008)"},{"key":"30_CR19","volume-title":"Simulation-based Algorithms for Markov Decision Processes","author":"H. Soo","year":"2007","unstructured":"Soo, H., et al.: Simulation-based Algorithms for Markov Decision Processes. Springer, London (2007)"},{"key":"30_CR20","unstructured":"Fernandez, F., Veloso, M.: Exploration and Policy Reuse. Technical Report, School of Computer Science, Carnegie Mellon University (2005)"},{"key":"30_CR21","unstructured":"Fernandez, F., Veloso, M.: Probabilistic Reuse of Past policies. Technical Report, School of Computer Science, Carnegie Mellon University (2005)"},{"key":"30_CR22","unstructured":"Fernandez, F., Veloso, M.: Building a Library of Policies through Policy Reuse. Technical Report, School of Computer Science, Carnegie Mellon University (2005)"},{"key":"30_CR23","unstructured":"Bernstein, D.S.: Reusing Old Policies to Accelerate Learning on New Markov Decision Processes. Technical Report, University of Massachusetts (1999)"},{"key":"30_CR24","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1613\/jair.761","volume":"14","author":"N.L. Zhang","year":"2001","unstructured":"Zhang, N.L., Zhang, W.: Speeding Up the Convergence of Value Iteration in Partially Observable Markov Decision Processes. Journal of Artificial Intelligence Research\u00a014, 29\u201351 (2001)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"30_CR25","unstructured":"Hansen, E.A.: An Improved Policy Iteration for Partially Observable Markov Decision Processes. In: Proceedings of 10th Neural Information Processing Systems Conference (1997)"},{"key":"30_CR26","unstructured":"Sallans, B.: Reinforcement Learning for Factored Markov Decision Processes. Ph.D. Thesis, Graduate Department of Computer Science, University of Toronto (2002)"},{"key":"30_CR27","volume-title":"Discrete-Time Control Systems","author":"K. Ogata","year":"1994","unstructured":"Ogata, K.: Discrete-Time Control Systems, 2nd edn. Prentice Hall, Englewood Cliffs (1994)","edition":"2"}],"container-title":["Lecture Notes in Computer Science","Emerging Intelligent Computing Technology and Applications. With Aspects of Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-04020-7_30.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,30]],"date-time":"2021-04-30T06:54:14Z","timestamp":1619765654000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-04020-7_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642040191","9783642040207"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-04020-7_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}