{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:36:50Z","timestamp":1774946210462,"version":"3.50.1"},"reference-count":24,"publisher":"Springer Science and Business Media LLC","issue":"1-3","license":[{"start":{"date-parts":[[1996,1,1]],"date-time":"1996-01-01T00:00:00Z","timestamp":820454400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[1996,1,1]],"date-time":"1996-01-01T00:00:00Z","timestamp":820454400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Learning"],"published-print":{"date-parts":[[1996,1]]},"DOI":"10.1023\/a:1018056104778","type":"journal-article","created":{"date-parts":[[2003,2,6]],"date-time":"2003-02-06T17:07:14Z","timestamp":1044551234000},"page":"33-57","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":44,"title":["Linear Least-Squares Algorithms for Temporal Difference Learning"],"prefix":"10.1007","volume":"22","author":[{"given":"Steven J.","family":"Bradtke","sequence":"first","affiliation":[]},{"given":"Andrew G.","family":"Barto","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"107733_CR1","series-title":"Technical Report","volume-title":"Strategy learning with multilayer connectionist representations","author":"C. W. Anderson","year":"1988","unstructured":"Anderson, C. W. (1988). Strategy learning with multilayer connectionist representations. Technical Report 87-509,3 GTE Laboratories Incorporated, Computer and Intelligent Systems Laboratory, 40 Sylvan Road. Waltham, MA 02254."},{"key":"107733_CR2","first-page":"835","volume":"13","author":"A. G. Barto","year":"1983","unstructured":"Barto, A. G., Sutton, R. S. & Anderson, C. W. (1983). Neuronlike elements that can solve difficult learning control problems. IEEE Transactions on Systems, Man, and Cybernetics 13: 835-846","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"107733_CR3","unstructured":"Bradike, S J., (1994). Incremental Dynamic Programming for On-Line Adaptive Optimal Control. PhD thesis, University of Massachusetts, Computer Science Dept. Technical Report 94-62."},{"key":"107733_CR4","unstructured":"Darken, C. Chang, J. & Moody, J., (1992) Learning rate schedules for faster stochastic gradient search. In Neural Networks or Signal Processing 2 \u2014 Proceedings of the 1992 IEEE Workshop, IEEE Press."},{"key":"107733_CR5","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1022632907294","volume":"8","author":"Dayan","year":"1992","unstructured":"Dayan, (1992) The convergence of TD(\u03bb) or general \u03bb. Machine Learning, 8: 341-362","journal-title":"Machine Learning"},{"key":"107733_CR6","doi-asserted-by":"crossref","unstructured":"Dayan, P. & Sejnowski, T. J., (1994) TD(\u03bb): Convergence with probability I. Mahine Learning.","DOI":"10.1007\/BF00993978"},{"key":"107733_CR7","volume-title":"Adaptive Filtering Prediction and Control","author":"G.C. Goodwin","year":"1984","unstructured":"Goodwin, G.C. & Sin, K.S., (1984). Adaptive Filtering Prediction and Control, Prentice-Hall, Englewood Cliffs, NJ."},{"key":"107733_CR8","doi-asserted-by":"crossref","unstructured":"Jaakkola, T., Jordan, M.I & Singh, S.P, (1994). On the convergence of stochastic iterative dynamic programming algorithms. Neural Computation, 6(6).","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"107733_CR9","volume-title":"Finite Markov Chains","author":"J. G. Kemeny","year":"1976","unstructured":"Kemeny, J. G. & Snell, J.L. (1976). Finite Markov Chains. Springer-Verlag, New York."},{"key":"107733_CR10","volume-title":"Theory and Practice of Recursive Identification","author":"L. Liung","year":"1983","unstructured":"Liung, L. & Soderstrorn, T. (1983). Theory and Practice of Recursive Identification. MIT Press, Cambridge, MA."},{"key":"107733_CR11","first-page":"521","volume":"1","author":"G. Lukes","year":"1990","unstructured":"Lukes, G., Thompson, B. & Werbos, P., (1990) Expectation driven learning with an associative associative memory. In Proceedings of the International Joint Conference on Neural Networks, pages 1: 521-524.","journal-title":"Proceedings of the International Joint Conference on Neural Networks"},{"key":"107733_CR12","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H. Robbins","year":"1951","unstructured":"Robbins, H. & Monro, (1951) A stochastic approxmation method. Annals of Mathematical Statistics. 22: 400-407.","journal-title":"Annals of Mathematical Statistics"},{"key":"107733_CR13","doi-asserted-by":"crossref","DOI":"10.1007\/BFb0009019","volume-title":"Instrumental Variable Methods for System Idenfication","author":"T. Soderstrom","year":"1983","unstructured":"Soderstrom, T. & Sloica, P.G., (1983). Instrumental Variable Methods for System Idenfication. Springer Verlag, Berlin."},{"key":"107733_CR14","volume-title":"Temporal Credit Assignment in Reinforcement Learning","author":"A.S. Sutton","year":"1984","unstructured":"Sutton, A.S., (1984). Temporal Credit Assignment in Reinforcement Learning. PhD thesis, Department of Computer and Information Science, University of Massachusetts at Amherst, Arherst, MA. 01003."},{"key":"107733_CR15","first-page":"9","volume":"3","author":"R.S. Sutton","year":"1988","unstructured":"Sutton, R.S., (1988) Learning to predict by the method of temporal differences. Machine Learning, 3: 9-44.","journal-title":"Machine Learning"},{"issue":"3\/4","key":"107733_CR16","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1023\/A:1022624705476","volume":"8","author":"G.J. Tesauro","year":"1992","unstructured":"Tesauro, G.J., (1992). Practical issues in temporal difference learning. Machine Learning 8(3\/4):257-277.","journal-title":"Machine Learning"},{"key":"107733_CR17","series-title":"Technical Report","volume-title":"Asynchronous stochastic approximation and Q-learning","author":"J.N. Tsitsiklis","year":"1995","unstructured":"Tsitsiklis, J.N. (1995) Asynchronous stochastic approximation and Q-learning. Technical Report IIDS-P-2172, Laboratory for Information and Decision Systems, MIT, Cambridge, MA."},{"key":"107733_CR18","volume-title":"Learning from Delayed Rewards","author":"C. I. C. H. Watkins","year":"1989","unstructured":"Watkins, C. I. C. H., (1989). Learning from Delayed Rewards PhD thesis, Cambridge University, Cambridge, England."},{"issue":"3\/4","key":"107733_CR19","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1023\/A:1022624705476","volume":"8","author":"C. J. C. H. Watkins","year":"1992","unstructured":"Watkins, C. J. C. H. & Dayan, P. (1992). Q-Learning. Machine Learning, 8(3\/4): 257-277, May 1992.","journal-title":"Machine Learning"},{"issue":"1","key":"107733_CR20","first-page":"7","volume":"17","author":"P.J. Werbos","year":"1987","unstructured":"Werbos, P.J. (1987). Building and understanding adaptive systems: A statistical\/numerical approach to factory automation and brain research IEEE: Transaction on Systems, Man, and Cybernetics, 17(1) 7-20.","journal-title":"IEEE: Transaction on Systems, Man, and Cybernetics"},{"issue":"4","key":"107733_CR21","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1016\/0893-6080(88)90007-X","volume":"1","author":"P.J. Werbos","year":"1988","unstructured":"Werbos, P.J. (1988) Generalization of backpropagation with application to a recurrent gas market model. Neural Networks, 1(4): 339-356, 1988.","journal-title":"Neural Networks"},{"issue":"2","key":"107733_CR22","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1016\/0893-6080(90)90088-3","volume":"3","author":"P.J. Werbos","year":"1990","unstructured":"Werbos, P.J. (1990). Consistency of HDP applied to a simple reinforcement learning problem. Neural Networks. 3(2): 179-190","journal-title":"Neural Networks"},{"key":"107733_CR23","first-page":"493","volume-title":"Handbook of Intelligent Cotrol: Neural, Fuzzy, and Adaptive Approaches","author":"P.J. Werbos","year":"1992","unstructured":"Werbos, P.J. (1992) Approximate dynamic programming for real time control and neural modeling. In D. A. White and D. A. Sofge, editors, Handbook of Intelligent Cotrol: Neural, Fuzzy, and Adaptive Approaches, pages 493-525. Van Nostrand Reinhold, New York."},{"key":"107733_CR24","doi-asserted-by":"crossref","unstructured":"Young, P. (1984) Recursive Estimation and Time-series. Analysis. Springer-Verlag.","DOI":"10.1007\/978-3-642-82336-7"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1018056104778.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1018056104778\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1018056104778.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T11:40:53Z","timestamp":1752147653000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1018056104778"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1996,1]]},"references-count":24,"journal-issue":{"issue":"1-3","published-print":{"date-parts":[[1996,1]]}},"alternative-id":["107733"],"URL":"https:\/\/doi.org\/10.1023\/a:1018056104778","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[1996,1]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}