{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T07:20:49Z","timestamp":1774941649314,"version":"3.50.1"},"reference-count":27,"publisher":"Elsevier","isbn-type":[{"value":"9781558603776","type":"print"}],"license":[{"start":{"date-parts":[[1995,1,1]],"date-time":"1995-01-01T00:00:00Z","timestamp":788918400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[1995]]},"DOI":"10.1016\/b978-1-55860-377-6.50040-2","type":"book-chapter","created":{"date-parts":[[2014,7,1]],"date-time":"2014-07-01T03:00:14Z","timestamp":1404183614000},"page":"261-268","source":"Crossref","is-referenced-by-count":141,"title":["Stable Function Approximation in Dynamic Programming"],"prefix":"10.1016","author":[{"given":"Geoffrey J.","family":"Gordon","sequence":"first","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib1","doi-asserted-by":"crossref","first-page":"247","DOI":"10.2307\/2002797","article-title":"Functional approximations and dynamic programming","volume":"13","author":"Bellman","year":"1959","journal-title":"Mathematical Tables and Aids to Computation"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib2","first-page":"155","article-title":"Polynomial approximation \u2013 a new computational technique in dynamic programming: allocation processes","volume":"17","author":"Bellman","year":"1963","journal-title":"Mathematics of Computation"},{"issue":"1","key":"10.1016\/B978-1-55860-377-6.50040-2_bib3","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1090\/qam\/102435","article-title":"On a routing problem","volume":"16","author":"Bellman","year":"1958","journal-title":"Quarterly of Applied Mathematics"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib4","series-title":"Adaptive Control Processes","author":"Bellman","year":"1961"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib5","series-title":"Parallel and Distributed Computation: Numerical Methods","author":"Bertsekas","year":"1989"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib6","doi-asserted-by":"crossref","first-page":"226","DOI":"10.1214\/aoms\/1177700285","article-title":"Discounted dynamic programming","volume":"36","author":"Blackwell","year":"1965","journal-title":"Annals of Mathematical Statistics"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib7","article-title":"Generalization in reinforcement learning: safely approximating the value function","volume":"volume 7","author":"Boyan","year":"1995"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib8","unstructured":"C.-S. Chow and J. N. Tsitsiklis. An optimal multigrid algorithm for discrete-time stochastic control. Technical Report P-135, Center for Intelligent Control Systems, 1989."},{"issue":"3\u20134","key":"10.1016\/B978-1-55860-377-6.50040-2_bib9","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1007\/BF00992701","article-title":"The convergence of TD(\u03bb) for general lambda","volume":"8","author":"Dayan","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib10","series-title":"Flows in Networks","author":"Ford","year":"1962"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib11","doi-asserted-by":"crossref","unstructured":"G. J. Gordon. Stable function approximation in dynamic programming. Technical Report CS-95\u2013103, CMU, 1995.","DOI":"10.1016\/B978-1-55860-377-6.50040-2"},{"issue":"6","key":"10.1016\/B978-1-55860-377-6.50040-2_bib12","doi-asserted-by":"crossref","first-page":"1185","DOI":"10.1162\/neco.1994.6.6.1185","article-title":"On the convergence of stochastic iterative dynamic programming algorithms","volume":"6","author":"Jaakkola","year":"1994","journal-title":"Neural Computation"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib13","series-title":"Introductory Real Analysis","author":"Kolmogorov","year":"1970"},{"issue":"3\u20134","key":"10.1016\/B978-1-55860-377-6.50040-2_bib14","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1007\/BF00992699","article-title":"Self-improving reactive agents based on reinforcement learning, planning, and teaching","volume":"8","author":"Lin","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib15","unstructured":"A. W. Moore. Variable resolution dynamic programming: efficiently learning action maps in multivariate real-valued state-spaces. In L. Birnbaum and G. Collins, editors, Machine Learning: Proceedings of the eighth international workshop. Morgan Kaufmann, 1991."},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib16","article-title":"The parti-game algorithm for variable resolution reinforcement learning in multidimensional state spaces","volume":"volume 6","author":"Moore","year":"1994"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib17","unstructured":"P. Sabes. Approximating Q-values with basis function representations. In Proceedings of the Fourth Connectionist Models Summer School, Hillsdale, NJ, 1993. Lawrence Erlbaum."},{"issue":"3","key":"10.1016\/B978-1-55860-377-6.50040-2_bib18","doi-asserted-by":"crossref","first-page":"210","DOI":"10.1147\/rd.33.0210","article-title":"Some studies in machine learning using the game of checkers","volume":"3","author":"Samuels","year":"1959","journal-title":"IBM Journal of Research and Development"},{"issue":"3","key":"10.1016\/B978-1-55860-377-6.50040-2_bib19","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1007\/BF00993308","article-title":"Technical note: an upper bound on the loss from approximate optimal-value functions","volume":"16","author":"Singh","year":"1994","journal-title":"Machine Learning"},{"issue":"1","key":"10.1016\/B978-1-55860-377-6.50040-2_bib20","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1007\/BF00115009","article-title":"Learning to predict by the methods of temporal differences","volume":"3","author":"Sutton","year":"1988","journal-title":"Machine Learning"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib21","doi-asserted-by":"crossref","unstructured":"G. Tesauro. Neurogammon: a neural network backgammon program. In IJCNN Proceedings III, pages 33\u201339, 1990.","DOI":"10.1109\/IJCNN.1990.137821"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib22","unstructured":"S. Thrun and A. Schwartz. Issues in using function approximation for reinforcement learning. In Proceedings of the Fourth Connectionist Models Summer School, Hillsdale, NJ, 1993. Lawrence Erlbaum."},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib23","unstructured":"J. N. Tsitsiklis and B. Van Roy. Feature-based methods for large-scale dynamic programming. Technical Report P-2277, Laboratory for Information and Decision Systems, 1994."},{"issue":"3","key":"10.1016\/B978-1-55860-377-6.50040-2_bib24","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1007\/BF00993306","article-title":"Asynchronous stochastic approximation and Q-learning","volume":"16","author":"Tsitsiklis","year":"1994","journal-title":"Machine Learning"},{"issue":"3\u20134","key":"10.1016\/B978-1-55860-377-6.50040-2_bib25","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Machine Learning"},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib26","unstructured":"C. J. C. H. Watkins. Learning from Delayed Rewards. PhD thesis, King's College, Cambridge, England, 1989."},{"key":"10.1016\/B978-1-55860-377-6.50040-2_bib27","doi-asserted-by":"crossref","first-page":"286","DOI":"10.1016\/S0019-9958(77)90354-0","article-title":"An adaptive optimal controller for discrete-time Markov environments","volume":"34","author":"Witten","year":"1977","journal-title":"Information and Control"}],"container-title":["Machine Learning Proceedings 1995"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B9781558603776500402?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:B9781558603776500402?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2022,4,10]],"date-time":"2022-04-10T02:42:05Z","timestamp":1649558525000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/B9781558603776500402"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1995]]},"ISBN":["9781558603776"],"references-count":27,"URL":"https:\/\/doi.org\/10.1016\/b978-1-55860-377-6.50040-2","relation":{},"subject":[],"published":{"date-parts":[[1995]]}}}