{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T06:03:37Z","timestamp":1725516217753},"publisher-location":"Berlin, Heidelberg","reference-count":19,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642334856"},{"type":"electronic","value":"9783642334863"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-33486-3_14","type":"book-chapter","created":{"date-parts":[[2012,9,10]],"date-time":"2012-09-10T16:39:17Z","timestamp":1347295157000},"page":"211-226","source":"Crossref","is-referenced-by-count":4,"title":["Policy Iteration Based on a Learned Transition Model"],"prefix":"10.1007","author":[{"given":"Vivek","family":"Ramavajjala","sequence":"first","affiliation":[]},{"given":"Charles","family":"Elkan","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"14_CR1","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1007\/PL00011391","volume":"89","author":"R.H. Byrd","year":"1996","unstructured":"Byrd, R.H., Gilbert, J.C., Nocedal, J.: A trust region method based on interior point techniques for nonlinear programming. Mathematical Programming\u00a089, 149\u2013185 (1996)","journal-title":"Mathematical Programming"},{"key":"14_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1007\/978-3-642-29946-9_11","volume-title":"Recent Advances in Reinforcement Learning","author":"C. Elkan","year":"2012","unstructured":"Elkan, C.: Reinforcement Learning with a Bilinear Q Function. In: Sanner, S., Hutter, M. (eds.) EWRL 2011. LNCS, vol.\u00a07188, pp. 78\u201388. Springer, Heidelberg (2012)"},{"issue":"1","key":"14_CR3","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06(1), 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"unstructured":"Hannah, L., Dunson, D.B.: Approximate dynamic programming for storage problems. In: Proceedings of the 28th International Conference on Machine Learning (ICML), pp. 337\u2013344 (2011)","key":"14_CR4"},{"unstructured":"Hesami, A.: Matlab implementation of inverted pendulum, http:\/\/webdocs.cs.ualberta.ca\/~sutton\/pole.zip","key":"14_CR5"},{"issue":"7","key":"14_CR6","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1287\/mnsc.14.7.503","volume":"14","author":"R.A. Howard","year":"1968","unstructured":"Howard, R.A.: Comments on the origin and application of Markov decision processes. Management Science\u00a014(7), 503\u2013507 (1968)","journal-title":"Management Science"},{"doi-asserted-by":"crossref","unstructured":"Jong, N., Stone, P.: Model-based function approximation in reinforcement learning. In: Proceedings of the Sixth International Conference on Autonomous Agents and Multiagent Systems (AAMAS), pp. 658\u2013665. ACM (2007)","key":"14_CR7","DOI":"10.1145\/1329125.1329242"},{"key":"14_CR8","first-page":"1107","volume":"4","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R., Bartlett, L.: Least-squares policy iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"},{"doi-asserted-by":"crossref","unstructured":"Mahadevan, S., Maggioni, M.: Proto-value functions: A Laplacian framework for learning representation and control in Markov decision processes. Journal of Machine Learning Research, 2169\u20132231 (2007)","key":"14_CR9","DOI":"10.1561\/9781601982391"},{"key":"14_CR10","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1007\/978-3-540-87481-2_5","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"F.S. Melo","year":"2008","unstructured":"Melo, F.S., Lopes, M.: Fitted Natural Actor-Critic: A New Algorithm for Continuous State-Action MDPs. In: Daelemans, W., Goethals, B., Morik, K. (eds.) ECML PKDD 2008, Part II. LNCS (LNAI), vol.\u00a05212, pp. 66\u201381. Springer, Heidelberg (2008)"},{"issue":"1","key":"14_CR11","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/s10479-005-5732-z","volume":"134","author":"I. Menache","year":"2005","unstructured":"Menache, I., Mannor, S., Shimkin, N.: Basis function adaptation in temporal difference reinforcement learning. Annals of Operations Research\u00a0134(1), 215\u2013238 (2005)","journal-title":"Annals of Operations Research"},{"doi-asserted-by":"crossref","unstructured":"Parr, R., Li, L., Taylor, G., Painter-Wakefield, C., Littman, M.: An analysis of linear models, linear value-function approximation, and feature selection for reinforcement learning. In: Proceedings of the 25th International Conference on Machine Learning (ICML), pp. 752\u2013759 (2008)","key":"14_CR12","DOI":"10.1145\/1390156.1390251"},{"issue":"1","key":"14_CR13","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1287\/ijoc.1090.0349","volume":"22","author":"W.B. Powell","year":"2010","unstructured":"Powell, W.B.: Merging AI and OR to solve high-dimensional stochastic optimization problems using approximate dynamic programming. INFORMS Journal on Computing\u00a022(1), 2\u201317 (2010)","journal-title":"INFORMS Journal on Computing"},{"unstructured":"Smart, W.D., Kaelbling, L.P.: Practical reinforcement learning in continuous spaces. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 903\u2013910. Morgan Kaufmann (2000)","key":"14_CR14"},{"doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Reinforcement learning architectures for animats. In: Proceedings of the International Workshop on the Simulation of Adaptive Behavior: From Animals to Animats, pp. 288\u2013296. MIT Press (1991)","key":"14_CR15","DOI":"10.7551\/mitpress\/3115.003.0040"},{"doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. Cambridge University Press (1998)","key":"14_CR16","DOI":"10.1109\/TNN.1998.712192"},{"issue":"1","key":"14_CR17","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1287\/ijoc.1090.0350","volume":"22","author":"J.N. Tsitsiklis","year":"2010","unstructured":"Tsitsiklis, J.N.: Commentary\u2014perspectives on stochastic optimization over time. INFORMS Journal on Computing\u00a022(1), 18\u201319 (2010)","journal-title":"INFORMS Journal on Computing"},{"unstructured":"Uc Cetina, V.: Multilayer perceptrons with radial basis functions as value functions in reinforcement learning. In: Proceedings of the 16th European Symposium on Artificial Neural Networks (ESANN), pp. 161\u2013166 (2008)","key":"14_CR18"},{"issue":"1","key":"14_CR19","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/91.481841","volume":"4","author":"H.O. Wang","year":"1996","unstructured":"Wang, H.O., Tanaka, K., Griffin, M.F.: An approach to fuzzy control of nonlinear systems: stability and design issues. IEEE Transactions on Fuzzy Systems\u00a04(1), 14\u201323 (1996)","journal-title":"IEEE Transactions on Fuzzy Systems"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-33486-3_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,29]],"date-time":"2024-04-29T02:48:14Z","timestamp":1714358894000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-33486-3_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642334856","9783642334863"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-33486-3_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}