{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T20:31:58Z","timestamp":1774729918263,"version":"3.50.1"},"reference-count":16,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[1994,9,1]],"date-time":"1994-09-01T00:00:00Z","timestamp":778377600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[1994,9]]},"DOI":"10.1007\/bf00993306","type":"journal-article","created":{"date-parts":[[2005,1,14]],"date-time":"2005-01-14T17:53:30Z","timestamp":1105725210000},"page":"185-202","source":"Crossref","is-referenced-by-count":358,"title":["Asynchronous stochastic approximation and Q-learning"],"prefix":"10.1007","volume":"16","author":[{"given":"John N.","family":"Tsitsiklis","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"CR1","series-title":"Technical Report","volume-title":"Real-time Learning and Control Using Asynchronous Dynamic Programming","author":"A.G. Barto","year":"1991","unstructured":"Barto, A.G., Bradtke, S.J., and S.P., Singh (1991).Real-time Learning and Control Using Asynchronous Dynamic Programming, (Technical Report 91-57). Amherst, MA: University of Massachusetts, Computer Science Dept."},{"key":"CR2","doi-asserted-by":"crossref","first-page":"610","DOI":"10.1109\/TAC.1982.1102980","volume":"AC-27","author":"D.P. Bertsekas","year":"1982","unstructured":"Bertsekas, D.P. (1982). Distributed Dynamic Programming.IEEE Transactions on Automatic Control, AC-27, 610?616.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"CR3","volume-title":"Parallel and Distributed Computation: Numerical Methods","author":"D.P. Bertsekas","year":"1989","unstructured":"Bertsekas, D.P. and Tsitsiklis, J.N. (1989).Parallel and Distributed Computation: Numerical Methods, Englewood Cliffs, NJ: Prentice Hall."},{"key":"CR4","doi-asserted-by":"crossref","first-page":"580","DOI":"10.1287\/moor.16.3.580","volume":"16","author":"D.P. Bertsekas","year":"1991","unstructured":"Bertsekas, D.P. and Tsitsiklis, J.N. (1991). An Analysis of Stochastic Shortest Path Problems.Mathematics of Operations Research, 16, 580?595.","journal-title":"Mathematics of Operations Research"},{"key":"CR5","first-page":"341","volume":"8","author":"P. Dayan","year":"1992","unstructured":"Dayan, P. (1992). The Convergence of TD(?) for general ?.Machine Learning, 8, 341?362.","journal-title":"Machine Learning"},{"key":"CR6","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4684-9352-8","volume-title":"Stochastic Approximation Methods for Constrained and Unconstrained Problems","author":"H.J. Kushner","year":"1978","unstructured":"Kushner, H.J. and Clark, D.S. (1978).Stochastic Approximation Methods for Constrained and Unconstrained Problems, New York, NY: Springer Verlag."},{"key":"CR7","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1080\/17442508708833475","volume":"22","author":"H.J. Kushner","year":"1987","unstructured":"Kushner, H.J. and Yin, G., (1987). Stochastic Approximation Algorithms for Parallel and Distributed Processing,Stochastics, 22, 219?250.","journal-title":"Stochastics"},{"key":"CR8","doi-asserted-by":"crossref","first-page":"1266","DOI":"10.1137\/0325070","volume":"25","author":"H.J. Kushner","year":"1987","unstructured":"Kushner, H.J. and Yin, G. (1987). Asymptotic Properties of Distributed and Communicating Stochastic Approximation Algorithms.SIAM J. Control and Optimization, 25, 1266?1290.","journal-title":"SIAM J. Control and Optimization"},{"key":"CR9","doi-asserted-by":"crossref","first-page":"612","DOI":"10.1109\/TAC.1987.1104684","volume":"32","author":"S. Li","year":"1987","unstructured":"Li, S. and Basar, T. (1987). Asymptotic Agreement and Convergence of Asynchronous Stochastic Algorithms.IEEE Transactions on Automatic Control, 32, 612?618.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"CR10","doi-asserted-by":"crossref","unstructured":"Moore, A.W. and Atkeson, C.G. (1992).Memory-based Reinforcement Learning: Converging with Less Data and Less Real Time, preprint, July 1992.","DOI":"10.1007\/978-1-4615-3184-5_4"},{"key":"CR11","first-page":"83","volume":"12","author":"B.T. Poljak","year":"1973","unstructured":"Poljak, B.T. and Tsypkin, Y.Z. (1973). Pseudogradient Adaptation and Training Algorithms.Automation and Remote Control, 12, 83?94.","journal-title":"Automation and Remote Control"},{"key":"CR12","first-page":"9","volume":"3","author":"R.S. Sutton","year":"1988","unstructured":"Sutton, R.S. (1988). Learning to Predict by the Method of Temporal Differences.Machine Learning, 3, 9?44.","journal-title":"Machine Learning"},{"key":"CR13","unstructured":"Sutton, R.S., Barto, A.G., and Williams, R.J. (1992). Reinforcement Learning is Direct Adaptive Control.IEEE Control Systems Magazine, April, 19?22."},{"key":"CR14","doi-asserted-by":"crossref","first-page":"803","DOI":"10.1109\/TAC.1986.1104412","volume":"31","author":"J.N. Tsitsiklis","year":"1986","unstructured":"Tsitsiklis, J.N., Bertsekas, D.P., and Athans, M. (1986). Distributed Deterministic and Stochastic Gradient Optimization Algorithms.IEEE Transactions on Automatic Control, 31, 803?812.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"CR15","volume-title":"Learning from Delayed Rewards","author":"C.I.C.H. Watkins","year":"1989","unstructured":"Watkins, C.I.C.H. (1989). Learning from Delayed Rewards. Doctoral dissertation. University of Cambridge, Cambridge, United Kingdom."},{"key":"CR16","first-page":"279","volume":"8","author":"C.I.C.H. Watkins","year":"1992","unstructured":"Watkins, C.I.C.H. and Dayan, P. (1992). Q-learning.Machine Learning, 8, 279?292.","journal-title":"Machine Learning"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/BF00993306.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/BF00993306\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/BF00993306","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,5]],"date-time":"2020-04-05T09:50:52Z","timestamp":1586080252000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/BF00993306"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1994,9]]},"references-count":16,"journal-issue":{"issue":"3","published-print":{"date-parts":[[1994,9]]}},"alternative-id":["BF00993306"],"URL":"https:\/\/doi.org\/10.1007\/bf00993306","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[1994,9]]}}}