{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T18:04:50Z","timestamp":1772301890491,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":12,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783540423430","type":"print"},{"value":"9783540445814","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2001]]},"DOI":"10.1007\/3-540-44581-1_39","type":"book-chapter","created":{"date-parts":[[2007,8,10]],"date-time":"2007-08-10T10:13:49Z","timestamp":1186740829000},"page":"589-604","source":"Crossref","is-referenced-by-count":32,"title":["Learning Rates for Q-Learning"],"prefix":"10.1007","author":[{"given":"Eyal","family":"Even-Dar","sequence":"first","affiliation":[]},{"given":"Yishay","family":"Mansour","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2001,9,13]]},"reference":[{"key":"39_CR1","unstructured":"F. Beleznay, T. Grobler, and Cs. Szepesvari. Comparing value-function estimation algorithms in undiscounted problems. Technical Report TR-99-02, Mindmaker Ltd, 1999."},{"issue":"2","key":"39_CR2","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1137\/S0363012997331639","volume":"38","author":"V.S. Borkar","year":"2000","unstructured":"V.S. Borkar and S.P. Meyn. The o.d.e method for convergence of stochstic approximation and reinforcement learning. Siam J. control, 38(2):447\u201369, 2000.","journal-title":"Siam J. control"},{"key":"39_CR3","volume-title":"Neuro-Dynamic Programming","author":"D. P. Bertsekas","year":"1996","unstructured":"Dimitri P. Bertsekas and Jhon N. Tsitsklis. Neuro-Dynamic Programming. Athena Scientific, Belmont, MA, 1996."},{"key":"39_CR4","doi-asserted-by":"crossref","unstructured":"T. Jaakkola, M.I. Jordan, and S.P. Singh. On the convergence of stochastic iterative dynamic programming algorithms. Neural Computation, 6, 1994.","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"39_CR5","unstructured":"Michael Kearns and Stinder Singh. Finite-sample convergence rates for qlearning and indirect algorithms. In Neural Information Processing Systems 10, 1998."},{"key":"39_CR6","unstructured":"Littman M. and Cs. Szepesvari. A generalized reinforcement learning model: convergence and applications. In In International Conference on Machine Learning, 1996."},{"key":"39_CR7","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes-Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"1994","unstructured":"M.L Puterman. Markov Decision Processes-Discrete Stochastic Dynamic Programming. Jhon Wiley & Sons. Inc., New York, NY, 1994."},{"key":"39_CR8","unstructured":"Richard S. Sutton and Andrew G. Bato. Reinforcement Learning. Mit press, 1998."},{"key":"39_CR9","unstructured":"Cs. Szepesvari. The asymptotic convergence-rate of q-learning. In Neural Information Processing Systems 10, pages 1064\u20131070, 1997."},{"key":"39_CR10","first-page":"185","volume":"16","author":"J. N. Tsitsklis","year":"1994","unstructured":"Jhon N. Tsitsklis. Asynchronous stochastic approximation and q-learning. Machine Learning, 16:185\u2013202, 1994.","journal-title":"Machine Learning"},{"key":"39_CR11","unstructured":"C. Watkins. Learning from Delayed Rewards. PhD thesis, Cambridge University, 1989."},{"issue":"3\/4","key":"39_CR12","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1023\/A:1022676722315","volume":"8","author":"C. Watking","year":"1992","unstructured":"C. Watking and P. Dyan. Q-learning. Machine Learning, 8(3\/4):279\u2013292, 1992.","journal-title":"Machine Learning"}],"container-title":["Lecture Notes in Computer Science","Computational Learning Theory"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-44581-1_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,21]],"date-time":"2021-08-21T14:34:48Z","timestamp":1629556488000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-44581-1_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001]]},"ISBN":["9783540423430","9783540445814"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/3-540-44581-1_39","relation":{},"ISSN":["0302-9743"],"issn-type":[{"value":"0302-9743","type":"print"}],"subject":[],"published":{"date-parts":[[2001]]}}}