{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T15:26:09Z","timestamp":1767713169802},"publisher-location":"Berlin, Heidelberg","reference-count":23,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540352945"},{"type":"electronic","value":"9783540352969"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11776420_42","type":"book-chapter","created":{"date-parts":[[2006,9,28]],"date-time":"2006-09-28T14:49:15Z","timestamp":1159454955000},"page":"574-588","source":"Crossref","is-referenced-by-count":8,"title":["Learning Near-Optimal Policies with Bellman-Residual Minimization Based Fitted Policy Iteration and a Single Sample Path"],"prefix":"10.1007","author":[{"given":"Andr\u00e1s","family":"Antos","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Csaba","family":"Szepesv\u00e1ri","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R\u00e9mi","family":"Munos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"42_CR1","doi-asserted-by":"publisher","first-page":"1107","DOI":"10.1162\/jmlr.2003.4.6.1107","volume":"4","author":"M. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"42_CR2","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning near-optimal policies with fitted policy iteration and a single sample path: approximate iterative policy evaluation. In: ICML 2006 (submitted, 2006)"},{"key":"42_CR3","volume-title":"Stochastic Optimal Control (The Discrete Time Case)","author":"D.P. Bertsekas","year":"1978","unstructured":"Bertsekas, D.P., Shreve, S.E.: Stochastic Optimal Control (The Discrete Time Case). Academic Press, New York (1978)"},{"key":"42_CR4","volume-title":"Proc. of the Ninth Annual Conference of Cognitive Science Society","author":"R.S. Sutton","year":"1987","unstructured":"Sutton, R.S., Barto, A.G.: Toward a modern theory of adaptive networks: Expectation and prediction. In: Proc. of the Ninth Annual Conference of Cognitive Science Society, Erlbaum, Hillsdale (1987)"},{"key":"42_CR5","unstructured":"Munos, R.: Error bounds for approximate policy iteration. In: 19th International Conference on Machine Learning, pp. 560\u2013567 (2003)"},{"key":"42_CR6","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-3267-7","volume-title":"Markov Chains and Stochastic Stability","author":"S.P. Meyn","year":"1993","unstructured":"Meyn, S.P., Tweedie, R.: Markov Chains and Stochastic Stability. Springer, New York (1993)"},{"key":"42_CR7","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511624216","volume-title":"Neural Network Learning: Theoretical Foundations","author":"M. Anthony","year":"1999","unstructured":"Anthony, M., Bartlett, P.L.: Neural Network Learning: Theoretical Foundations. Cambridge University Press, Cambridge (1999)"},{"issue":"1","key":"42_CR8","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1214\/aop\/1176988849","volume":"22","author":"B. Yu","year":"1994","unstructured":"Yu, B.: Rates of convergence for empirical processes of stationary mixing sequences. The Annals of Probability\u00a022(1), 94\u2013116 (1994)","journal-title":"The Annals of Probability"},{"issue":"3","key":"42_CR9","doi-asserted-by":"publisher","first-page":"1084","DOI":"10.1214\/aos\/1032526958","volume":"24","author":"A. Nobel","year":"1996","unstructured":"Nobel, A.: Histogram regression estimation using data-dependent partitions. Annals of Statistics\u00a024(3), 1084\u20131105 (1996)","journal-title":"Annals of Statistics"},{"key":"42_CR10","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1016\/0097-3165(95)90052-7","volume":"69","author":"D. Haussler","year":"1995","unstructured":"Haussler, D.: Sphere packing numbers for subsets of the boolean n-cube with bounded Vapnik-Chervonenkis dimension. Journal of Combinatorial Theory Series A\u00a069, 217\u2013232 (1995)","journal-title":"Journal of Combinatorial Theory Series A"},{"key":"#cr-split#-42_CR11.1","doi-asserted-by":"crossref","unstructured":"Samuel, A.L.: Some studies in machine learning using the game of checkers. IBM Journal on Research and Development, 210???229 (1963);","DOI":"10.1147\/rd.33.0210"},{"key":"#cr-split#-42_CR11.2","doi-asserted-by":"crossref","unstructured":"Samuel, A.L.: Some studies in machine learning using the game of checkers. IBM Journal on Research and Development, 210\u2013229 (1963); Reprinted in Computers and Thought, Feigenbaum, E.A., Feldman, J. (eds.). McGraw-Hill, New York (1963)","DOI":"10.1147\/rd.33.0210"},{"key":"42_CR12","doi-asserted-by":"publisher","first-page":"247","DOI":"10.2307\/2002797","volume":"13","author":"R.E. Bellman","year":"1959","unstructured":"Bellman, R.E., Dreyfus, S.E.: Functional approximation and dynamic programming. Math. Tables and other Aids Comp.\u00a013, 247\u2013251 (1959)","journal-title":"Math. Tables and other Aids Comp."},{"key":"42_CR13","unstructured":"Bertsekas, D.P., Tsitsiklis, J.: Neuro-Dynamic Programming. Athena Scientific (1996)"},{"key":"42_CR14","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. Bradford Book (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"42_CR15","first-page":"261","volume-title":"Proceedings of the Twelfth International Conference on Machine Learning","author":"G.J. Gordon","year":"1995","unstructured":"Gordon, G.J.: Stable function approximation in dynamic programming. In: Prieditis, A., Russell, S. (eds.) Proceedings of the Twelfth International Conference on Machine Learning, pp. 261\u2013268. Morgan Kaufmann, San Francisco (1995)"},{"key":"42_CR16","first-page":"59","volume":"22","author":"J.N. Tsitsiklis","year":"1996","unstructured":"Tsitsiklis, J.N., Van Roy, B.: Feature-based methods for large scale dynamic programming. Machine Learning\u00a022, 59\u201394 (1996)","journal-title":"Machine Learning"},{"key":"42_CR17","unstructured":"Guestrin, C., Koller, D., Parr, R.: Max-norm projections for factored mdps. In: Proceedings of the International Joint Conference on Artificial Intelligence (2001)"},{"key":"42_CR18","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Geurts, P., Wehenkel, L.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06, 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"42_CR19","unstructured":"Wang, X., Dietterich, T.G.: Efficient value function approximation using regression trees. In: Proceedings of the IJCAI Workshop on Statistical Machine Learning for Large-Scale Optimization, Stockholm, Sweden (1999)"},{"key":"42_CR20","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/1120.001.0001","volume-title":"Advances in Neural Information Processing Systems 14","author":"T.G. Dietterich","year":"2002","unstructured":"Dietterich, T.G., Wang, X.: Batch value function approximation via support vectors. In: Dietterich, T.G., Becker, S., Ghahramani, Z. (eds.) Advances in Neural Information Processing Systems 14. MIT Press, Cambridge (2002)"},{"key":"42_CR21","doi-asserted-by":"crossref","unstructured":"Szepesv\u00e1ri, C., Munos, R.: Finite time bounds for sampling based fitted value iteration. In: ICML 2005 (2005)","DOI":"10.1145\/1102351.1102462"},{"issue":"1","key":"42_CR22","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1007602715810","volume":"39","author":"R. Meir","year":"2000","unstructured":"Meir, R.: Nonparametric time series prediction through adaptive model selection. Machine Learning\u00a039(1), 5\u201334 (2000)","journal-title":"Machine Learning"}],"container-title":["Lecture Notes in Computer Science","Learning Theory"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11776420_42.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T20:06:58Z","timestamp":1605643618000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11776420_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540352945","9783540352969"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/11776420_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}