{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T10:52:31Z","timestamp":1752231151138,"version":"3.41.2"},"reference-count":15,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[1998,7,1]],"date-time":"1998-07-01T00:00:00Z","timestamp":899251200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[1998,7,1]],"date-time":"1998-07-01T00:00:00Z","timestamp":899251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Learning"],"published-print":{"date-parts":[[1998,7]]},"DOI":"10.1023\/a:1007495401240","type":"journal-article","created":{"date-parts":[[2002,12,22]],"date-time":"2002-12-22T04:48:21Z","timestamp":1040532501000},"page":"5-40","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Analytical Mean Squared Error Curves for Temporal Difference Learning"],"prefix":"10.1007","volume":"32","author":[{"given":"Satinder","family":"Singh","sequence":"first","affiliation":[]},{"given":"Peter","family":"Dayan","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"2","key":"160198_CR1","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/21.229449","volume":"23","author":"E. Barnard","year":"1993","unstructured":"Barnard, E. (1993). Temporal-difference methods and Markov models. IEEE Transactions on Systems, Man, and Cybernetics, 23(2), 357-365.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"160198_CR2","first-page":"687","volume-title":"Advances in Neural Information Processing Systems 6","author":"A. G. Barto","year":"1994","unstructured":"Barto, A. G. & Duff, M. (1994). Monte Carlo matrix inversion and reinforcement learning. In Advances in Neural Information Processing Systems 6, pages 687-694, San Mateo, CA. Morgan Kaufmann."},{"key":"160198_CR3","first-page":"835","volume":"13","author":"A. G. Barto","year":"1983","unstructured":"Barto, A. G., Sutton, R. S., & Anderson, C. W. (1983). Neuronlike elements that can solve difficult learning control problems. IEEE Transactions on Systems, Man, and Cybernetics, 13, 835-846.","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics"},{"key":"160198_CR4","volume-title":"Large Deviation Techniques in Decision, Simulation and Estimation","author":"J. A. Bucklew","year":"1990","unstructured":"Bucklew, J. A. (1990). Large Deviation Techniques in Decision, Simulation and Estimation. New York: Wiley-Interscience."},{"issue":"3\/4","key":"160198_CR5","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1023\/A:1022632907294","volume":"8","author":"P. Dayan","year":"1992","unstructured":"Dayan, P. (1992). The convergence of TD(\u03bb) for general \u03bb. Machine Learning, 8(3\/4), 341-362.","journal-title":"Machine Learning"},{"key":"160198_CR6","first-page":"295","volume":"14","author":"P. Dayan","year":"1994","unstructured":"Dayan, P. & Sejnowski, T. (1994). TD(\u03bb) converges with probability 1. Machine Learning, 14, 295-301.","journal-title":"Machine Learning"},{"key":"160198_CR7","first-page":"76","volume-title":"Proceedings of the 7th Annual ACM Workshop on Computational Learning Theory","author":"D. Haussler","year":"1994","unstructured":"Haussler, D., Kearns, M., Seung, H. S., & Tishby, N. (1994). Rigorous learning curve bounds from statistical mechanics. In Proceedings of the 7th Annual ACM Workshop on Computational Learning Theory, pages 76-87, San Mateo, CA. Morgan Kauffman."},{"issue":"6","key":"160198_CR8","doi-asserted-by":"crossref","first-page":"1185","DOI":"10.1162\/neco.1994.6.6.1185","volume":"6","author":"T. Jaakkola","year":"1994","unstructured":"Jaakkola, T., Jordan, M. I., & Singh, S. (1994). On the convergence of stochastic iterative dynamic programming algorithms. Neural Computation, 6(6), 1185-1201.","journal-title":"Neural Computation"},{"key":"160198_CR9","doi-asserted-by":"crossref","unstructured":"Saul, L. K.& Singh, S. (1996). Learning curves bounds for Markov decision processes with undiscounted rewards. In Proceedings of COLT.","DOI":"10.1145\/238061.238084"},{"key":"160198_CR10","first-page":"123","volume":"22","author":"S. Singh","year":"1996","unstructured":"Singh, S. & Sutton, R. S. (1996). Reinforcement learning with replacing eligibility traces. Machine Learning, Vol. 22, 123-158.","journal-title":"Machine Learning"},{"key":"160198_CR11","first-page":"9","volume":"3","author":"R. S. Sutton","year":"1988","unstructured":"Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. Machine Learning, 3, 9-44.","journal-title":"Machine Learning"},{"issue":"3","key":"160198_CR12","first-page":"185","volume":"16","author":"J. Tsitsiklis","year":"1994","unstructured":"Tsitsiklis, J. (1994). Asynchronous stochastic approximation and Q-learning. Machine Learning, 16(3), 185-202.","journal-title":"Machine Learning"},{"key":"160198_CR13","doi-asserted-by":"crossref","first-page":"78","DOI":"10.2307\/2002546","volume":"6","author":"W. R. Wasow","year":"1952","unstructured":"Wasow, W. R. (1952). A note on the inversion of matrices by random walks. Math. Tables Other Aids Comput., 6, 78-81.","journal-title":"Math. Tables Other Aids Comput."},{"key":"160198_CR14","volume-title":"Learning from Delayed Rewards","author":"C. J. C. H. Watkins","year":"1989","unstructured":"Watkins, C. J. C. H. (1989). Learning from Delayed Rewards. Ph.D Thesis, Cambridge Univ., Cambridge, England."},{"key":"160198_CR15","volume-title":"Adaptive Signal Processing","author":"B. Widrow","year":"1985","unstructured":"Widrow, B. & Stearns, S. D. (1985). Adaptive Signal Processing. Englewood Cliffs, NJ: Prentice-Hall."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007495401240.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1007495401240\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1007495401240.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T11:32:35Z","timestamp":1752147155000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1007495401240"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1998,7]]},"references-count":15,"journal-issue":{"issue":"1","published-print":{"date-parts":[[1998,7]]}},"alternative-id":["160198"],"URL":"https:\/\/doi.org\/10.1023\/a:1007495401240","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[1998,7]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}