{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T22:41:48Z","timestamp":1779230508888,"version":"3.51.4"},"reference-count":14,"publisher":"Springer Science and Business Media LLC","issue":"3-4","license":[{"start":{"date-parts":[[1992,5,1]],"date-time":"1992-05-01T00:00:00Z","timestamp":704678400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[1992,5]]},"DOI":"10.1007\/bf00992698","type":"journal-article","created":{"date-parts":[[2005,1,9]],"date-time":"2005-01-09T16:35:16Z","timestamp":1105288516000},"page":"279-292","source":"Crossref","is-referenced-by-count":7434,"title":["Q-learning"],"prefix":"10.1007","volume":"8","author":[{"given":"Christopher J. C. H.","family":"Watkins","sequence":"first","affiliation":[]},{"given":"Peter","family":"Dayan","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"CR1","series-title":"COINS technical report","volume-title":"Real-time learning and control using asynchronous dynamic programming","author":"A.G. Barto","year":"1991","unstructured":"Barto, A.G., Bradtke, S.J. & Singh, S.P. (1991).Real-time learning and control using asynchronous dynamic programming. (COINS technical report 91-57). Amherst: University of Massachusetts."},{"key":"CR2","volume-title":"Proceedings of the 1990 Connectionist Models Summer School","author":"A.G. Barto","year":"1990","unstructured":"Barto, A.G. & Singh, S.P. (1990). On the computational economics of reinforcement learning. In D.S. Touretzky, J. Elman, T.J. Sejnowski & G.E. Hinton, (Eds.),Proceedings of the 1990 Connectionist Models Summer School. San Mateo, CA: Morgan Kaufmann."},{"key":"CR3","doi-asserted-by":"crossref","unstructured":"Bellman, R.E. & Dreyfus, S.E. (1962).Applied dynamic programming. RAND Corporation.","DOI":"10.1515\/9781400874651"},{"key":"CR4","unstructured":"Chapman, D. & Kaelbling, L.P. (1991). Input generalization in delayed reinforcement learning: An algorithm and performance comparisons.Proceedings of the 1991 International Joint Conference on Artificial Intelligence (pp. 726?731)."},{"key":"CR5","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4684-9352-8","volume-title":"Stochastic approximation methods for constrained and unconstrained systems","author":"H. Kushner","year":"1978","unstructured":"Kushner, H. & Clark, D. (1978).Stochastic approximation methods for constrained and unconstrained systems. Berlin, Germany: Springer-Verlag."},{"key":"CR6","doi-asserted-by":"crossref","unstructured":"Lin, L. (1992). Self-improving reactive agents based on reinforcement learning, planning and teaching.Machine Learning, 8.","DOI":"10.1007\/BF00992699"},{"key":"CR7","unstructured":"Mahadevan & Connell (1991). Automatic programming of behavior-based robots using reinforcement learning.Proceedings of the 1991 National Conference on AI (pp. 768?773)."},{"key":"CR8","volume-title":"Introduction to stochastic dynamic programming","author":"S. Ross","year":"1983","unstructured":"Ross, S. (1983).Introduction to stochastic dynamic programming. New York, Academic Press."},{"key":"CR9","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1109\/21.21595","volume":"18","author":"M. Sato","year":"1988","unstructured":"Sato, M., Abe, K. & Takeda, H. (1988). Learning control of finite Markov chains with explicit trade-off between estimation and control.IEEE Transactions on Systems, Man and Cybernetics, 18, pp. 677?684.","journal-title":"IEEE Transactions on Systems, Man and Cybernetics"},{"key":"CR10","volume-title":"Temporal credit assignment in reinforcement learning","author":"R.S. Sutton","year":"1984","unstructured":"Sutton, R.S. (1984).Temporal credit assignment in reinforcement learning. PhD Thesis, University of Massachusetts, Amherst, MA."},{"key":"CR11","first-page":"9","volume":"3","author":"R.S. Sutton","year":"1988","unstructured":"Sutton, R.S. (1988). Learning to predict by the methods of temporal difference.Machine Learning, 3, pp. 9?44.","journal-title":"Machine Learning"},{"key":"CR12","volume-title":"Proceedings of the Seventh International Conference on Machine Learning","author":"R.S. Sutton","year":"1990","unstructured":"Sutton, R.S. (1990). Integrated architectures for learning, planning, and reacting based on approximating dynamic programming.Proceedings of the Seventh International Conference on Machine Learning. San Mateo, CA: Morgan Kaufmann."},{"key":"CR13","volume-title":"Learning from delayed rewards","author":"C.J.C.H. Watkins","year":"1989","unstructured":"Watkins, C.J.C.H. (1989).Learning from delayed rewards. PhD Thesis, University of Cambridge, England."},{"key":"CR14","first-page":"25","volume":"22","author":"P.J. Werbos","year":"1977","unstructured":"Werbos, P.J. (1977). Advanced forecasting methods for global crisis warning and models of intelligence.General Systems Yearbook, 22, pp. 25?38.","journal-title":"General Systems Yearbook"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/BF00992698.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/BF00992698\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/BF00992698","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,5]],"date-time":"2020-04-05T08:10:54Z","timestamp":1586074254000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/BF00992698"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1992,5]]},"references-count":14,"journal-issue":{"issue":"3-4","published-print":{"date-parts":[[1992,5]]}},"alternative-id":["BF00992698"],"URL":"https:\/\/doi.org\/10.1007\/bf00992698","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[1992,5]]}}}