{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T00:00:23Z","timestamp":1767916823223,"version":"3.49.0"},"publisher-location":"Boston, MA","reference-count":12,"publisher":"Springer US","isbn-type":[{"value":"9780387747583","type":"print"},{"value":"9780387747590","type":"electronic"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-0-387-74759-0_440","type":"book-chapter","created":{"date-parts":[[2008,8,25]],"date-time":"2008-08-25T11:11:28Z","timestamp":1219662688000},"page":"2555-2560","source":"Crossref","is-referenced-by-count":17,"title":["Neuro-Dynamic Programming"],"prefix":"10.1007","author":[{"given":"Dimitri P.","family":"Bertsekas","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"440_CR1_440","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1016\/0004-3702(94)00011-O","volume":"72","author":"A.G. Barto","year":"1995","unstructured":"Barto AG, Bradtke SJ, Singh SP (1995) Real-time learning and control using asynchronous dynamic programming. Artif Intell 72:81\u2013138","journal-title":"Artif. Intell."},{"key":"440_CR2_440","volume-title":"Dynamic programming and optimal control","author":"D.P. Bertsekas","year":"1995","unstructured":"Bertsekas DP (1995) Dynamic programming and optimal control, vol II, Athena Sci., Belmont"},{"key":"440_CR3_440","volume-title":"Neuro-dynamic programming","author":"D.P. Bertsekas","year":"1996","unstructured":"Bertsekas DP, Tsitsiklis JN (1996) Neuro-dynamic programming. Athena Sci., Belmont"},{"key":"440_CR4_440","doi-asserted-by":"crossref","first-page":"1185","DOI":"10.1162\/neco.1994.6.6.1185","volume":"6","author":"T. Jaakkola","year":"1994","unstructured":"Jaakkola T, Jordan MI, Singh SP (1994) On the convergence of stochastic iterative dynamic programming algorithms. Neural Computation 6:1185\u20131201","journal-title":"Neural Computation"},{"key":"440_CR5_440","first-page":"9","volume":"3","author":"R.S. Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Machine Learning 3:9\u201344","journal-title":"Machine Learning"},{"key":"440_CR6_440","volume-title":"Reinforcement learning","author":"R.S. Sutton","year":"1998","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning. MIT, Cambridge"},{"key":"440_CR7_440","first-page":"257","volume":"8","author":"G. Tesauro","year":"1992","unstructured":"Tesauro G (1992) Practical issues in temporal difference learning. Machine Learning 8:257\u2013277","journal-title":"Machine Learning"},{"key":"440_CR8_440","first-page":"185","volume":"16","author":"J.N. Tsitsiklis","year":"1994","unstructured":"Tsitsiklis JN (1994) Asynchronous stochastic approximation and Q-learning. Machine Learning 16:185\u2013202","journal-title":"Machine Learning"},{"key":"440_CR9_440","unstructured":"Watkins CJCH (1989) Learning from delayed rewards. PhD Thesis Cambridge Univ, Cambridge"},{"key":"440_CR10_440","volume-title":"Handbook of Intelligent Control","author":"P.J. Werb\u00f6s","year":"1992","unstructured":"Werb\u00f6s PJ (1992) Approximate dynamic programming for real-time control and neural modeling. In: White DA, Sofge DA (eds) Handbook of Intelligent Control. v. Nostrand, Princeton"},{"key":"440_CR11_440","volume-title":"Handbook of Intelligent Control","author":"P.J. Werb\u00f6s","year":"1992","unstructured":"Werb\u00f6s PJ (1992) Neurocontrol and supervised learning: An overview and valuation. In: White DA, Sofge DA (eds) Handbook of Intelligent Control. v. Nostrand, Princeton"},{"key":"440_CR12_440","volume-title":"Handbook of Intelligent Control","year":"1992","unstructured":"White DA, Sofge DA (eds) (1992) Handbook of Intelligent Control. v. Nostrand, Princeton"}],"container-title":["Encyclopedia of Optimization"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-387-74759-0_440","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T09:56:12Z","timestamp":1720691772000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-0-387-74759-0_440"}},"subtitle":["NDP"],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9780387747583","9780387747590"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-0-387-74759-0_440","relation":{},"subject":[],"published":{"date-parts":[[2008]]}}}