{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T06:12:48Z","timestamp":1773814368410,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,1,4]],"date-time":"2021-01-04T00:00:00Z","timestamp":1609718400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,4]],"date-time":"2021-01-04T00:00:00Z","timestamp":1609718400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s10994-020-05912-5","type":"journal-article","created":{"date-parts":[[2021,1,4]],"date-time":"2021-01-04T21:03:13Z","timestamp":1609794193000},"page":"559-618","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Concentration bounds for temporal difference learning with linear function approximation: the case of batch data and uniform sampling"],"prefix":"10.1007","volume":"110","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0362-6730","authenticated-orcid":false,"given":"L. A.","family":"Prashanth","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nathaniel","family":"Korda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R\u00e9mi","family":"Munos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,1,4]]},"reference":[{"issue":"1","key":"5912_CR1","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","volume":"71","author":"A Antos","year":"2008","unstructured":"Antos, A., Szepesv\u00e1ri, C., & Munos, R. (2008). Learning near-optimal policies with bellman-residual minimization based fitted policy iteration and a single sample path. Machine Learning, 71(1), 89\u2013129.","journal-title":"Machine Learning"},{"key":"5912_CR2","unstructured":"Bach, F., & Moulines, E. (2011). Non-asymptotic analysis of stochastic approximation algorithms for machine learning. In Advances in neural information processing systems (pp. 451\u2013459)."},{"key":"5912_CR3","unstructured":"Bach, F., & Moulines, E. (2013). Non-strongly-convex smooth stochastic approximation with convergence rate o (1\/n). In Advances in neural information processing systems (pp. 773\u2013781)."},{"key":"5912_CR4","unstructured":"Bertsekas, D. P. (2012). Dynamic Programming and Optimal Control, Approximate Dynamic Programming, (4th ed., Vol. II). Belmont: Athena Scientific."},{"key":"5912_CR5","volume-title":"Neuro-Dynamic Programming (Optimization and Neural Computation Series, 3),","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas, D. P., & Tsitsiklis, J. N. (1996). Neuro-Dynamic Programming (Optimization and Neural Computation Series, 3), (Vol. 7). Belmont: Athena Scientific."},{"key":"5912_CR6","unstructured":"Bhandari, J., Russo, D., & Singal, R. (2018). A finite time analysis of temporal difference learning with linear function approximation. In Conference on learning theory pp. 1691\u20131692."},{"key":"5912_CR7","doi-asserted-by":"publisher","DOI":"10.1007\/978-93-86279-38-5","volume-title":"Stochastic approximation: A dynamical systems viewpoint","author":"V Borkar","year":"2008","unstructured":"Borkar, V. (2008). Stochastic approximation: A dynamical systems viewpoint. Cambridge: Cambridge University Press."},{"issue":"2","key":"5912_CR8","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1137\/S0363012997331639","volume":"38","author":"VS Borkar","year":"2000","unstructured":"Borkar, V. S., & Meyn, S. P. (2000). The ode method for convergence of stochastic approximation and reinforcement learning. SIAM Journal on Control and Optimization, 38(2), 447\u2013469.","journal-title":"SIAM Journal on Control and Optimization"},{"key":"5912_CR9","first-page":"33","volume":"22","author":"S Bradtke","year":"1996","unstructured":"Bradtke, S., & Barto, A. (1996). Linear least-squares algorithms for temporal difference learning. Machine Learning, 22, 33\u201357.","journal-title":"Machine Learning"},{"key":"5912_CR10","doi-asserted-by":"crossref","unstructured":"Dalal, G., Sz\u00f6r\u00e9nyi, B., Thoppe, G., & Mannor, S. (2018). Finite sample analyses for td (0) with function approximation. In Thirty-second AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v32i1.12079"},{"key":"5912_CR11","unstructured":"Dani, V., Hayes, T. P., & Kakade, S. M. (2008). Stochastic linear optimization under bandit feedback. In Proceedings of the 21st annual conference on learning theory (COLT) (pp. 355\u2013366)."},{"key":"5912_CR12","unstructured":"Dieuleveut, A., Flammarion, N., & Bach, F. (2016). Harder, better, faster, stronger convergence rates for least-squares regression. arXiv preprint arXiv:160205419."},{"key":"5912_CR13","doi-asserted-by":"crossref","unstructured":"Fathi, M., & Frikha, N. (2013). Transport-entropy inequalities and deviation estimates for stochastic approximation schemes. arXiv preprint arXiv:13017740.","DOI":"10.1214\/EJP.v18-2586"},{"issue":"47","key":"5912_CR14","first-page":"1","volume":"17","author":"N Frikha","year":"2012","unstructured":"Frikha, N., & Menozzi, S. (2012). Concentration bounds for stochastic approximations. Electronic Communications in Probability, 17(47), 1\u201315.","journal-title":"Electronic Communications in Probability"},{"key":"5912_CR15","doi-asserted-by":"crossref","unstructured":"Geramifard, A., Bowling, M., Zinkevich, M., & Sutton, R. S. (2007). iLSTD: Eligibility traces and convergence analysis. In NIPS (Vol.\u00a019, p. 441).","DOI":"10.7551\/mitpress\/7503.003.0060"},{"key":"5912_CR16","unstructured":"Hazan, E., & Kale, S. (2011). Beyond the regret minimization barrier: an optimal algorithm for stochastic strongly-convex optimization. In COLT (pp. 421\u2013436)."},{"key":"5912_CR17","unstructured":"Konda, V. R. (2002). Actor-critic algorithms. PhD thesis, Department of Electrical Engineering and Computer Science, MIT."},{"key":"5912_CR18","doi-asserted-by":"crossref","unstructured":"Korda, N., Prashanth, L. A., & Munos, R. (2015). Fast Gradient Descent for Drifting Least Squares Regression, with Application to Bandits. In Proceedings of the twenty-ninth AAAI conference on artificial intelligence (pp. 2708\u20132714).","DOI":"10.1609\/aaai.v29i1.9619"},{"key":"5912_CR19","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4684-9352-8","volume-title":"Stochastic approximation methods for constrained and unconstrained systems","author":"H Kushner","year":"1978","unstructured":"Kushner, H., & Clark, D. (1978). Stochastic approximation methods for constrained and unconstrained systems. Berlin: Springer-Verlag."},{"key":"5912_CR20","volume-title":"Stochastic approximation and recursive algorithms and applications,","author":"HJ Kushner","year":"2003","unstructured":"Kushner, H. J., & Yin, G. (2003). Stochastic approximation and recursive algorithms and applications, (Vol. 35). Berlin: Springer Verlag."},{"key":"5912_CR21","first-page":"1107","volume":"4","author":"MG Lagoudakis","year":"2003","unstructured":"Lagoudakis, M. G., & Parr, R. (2003). Least-squares policy iteration. The Journal of Machine Learning Research, 4, 1107\u20131149.","journal-title":"The Journal of Machine Learning Research"},{"key":"5912_CR22","first-page":"1347","volume":"84","author":"C Lakshminarayanan","year":"2018","unstructured":"Lakshminarayanan, C., & Szepesvari, C. (2018). Linear stochastic approximation: How far does constant step-size and iterate averaging go? Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics, 84, 1347\u20131355.","journal-title":"Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics"},{"key":"5912_CR23","first-page":"3041","volume":"13","author":"A Lazaric","year":"2012","unstructured":"Lazaric, A., Ghavamzadeh, M., & Munos, R. (2012). Finite-sample analysis of least-squares policy iteration. Journal of Machine Learning Research, 13, 3041\u20133074.","journal-title":"Journal of Machine Learning Research"},{"key":"5912_CR24","doi-asserted-by":"crossref","unstructured":"Li, L., Chu, W., Langford, J., & Schapire, R. E. (2010). A contextual-bandit approach to personalized news article recommendation. In Proceedings of the 19th international conference on world wide web, ACM (pp. 661\u2013670).","DOI":"10.1145\/1772690.1772758"},{"key":"5912_CR25","doi-asserted-by":"crossref","unstructured":"Li, L., Chu, W., Langford, J., & Wang, X. (2011). Unbiased offline evaluation of contextual-bandit-based news article recommendation algorithms. In Proceedings of the fourth ACM international conference on web search and data mining, ACM (pp. 297\u2013306).","DOI":"10.1145\/1935826.1935878"},{"key":"5912_CR26","unstructured":"Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., & Petrik, M. (2015). Finite-sample analysis of proximal gradient TD algorithms. In: Proceedings of the 31st conference on uncertainty in artificial intelligence, Amsterdam, Netherlands"},{"key":"5912_CR27","unstructured":"Mary, J., Garivier, A., Li, L., Munos, R., Nicol, O., Ortner, R., & Preux, P. (2012). ICML exploration and exploitation 3\u2014new challenges. http:\/\/explochallenge.inria.fr."},{"key":"5912_CR28","unstructured":"Narayanan, C., & Szepesv\u00e1ri, C. (2017). Finite time bounds for temporal difference learning with function approximation: Problems with some \u201cstate-of-the-art\u201d results. Technical report, https:\/\/sites.ualberta.ca\/~szepesva\/papers\/TD-issues17.pdf."},{"key":"5912_CR29","volume-title":"Problem complexity and method efficiency in optimization","author":"A Nemirovsky","year":"1983","unstructured":"Nemirovsky, A., & Yudin, D. (1983). Problem complexity and method efficiency in optimization. NY: Wiley-Interscience."},{"key":"5912_CR30","unstructured":"Pires, BA., & Szepesv\u00e1ri, C. (2012). Statistical linear estimation with penalized estimators: An application to reinforcement learning. arXiv preprint arXiv:12066444."},{"issue":"4","key":"5912_CR31","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1137\/0330046","volume":"30","author":"BT Polyak","year":"1992","unstructured":"Polyak, B. T., & Juditsky, A. B. (1992). Acceleration of stochastic approximation by averaging. SIAM Journal on Control and Optimization, 30(4), 838\u2013855.","journal-title":"SIAM Journal on Control and Optimization"},{"issue":"2","key":"5912_CR32","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1109\/TITS.2010.2091408","volume":"12","author":"LA Prashanth","year":"2011","unstructured":"Prashanth, L. A., & Bhatnagar, S. (2011). Reinforcement learning with function approximation for traffic signal control. IEEE Transactions on Intelligent Transportation Systems, 12(2), 412\u2013421.","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"issue":"9","key":"5912_CR33","doi-asserted-by":"publisher","first-page":"3865","DOI":"10.1109\/TVT.2012.2209904","volume":"61","author":"LA Prashanth","year":"2012","unstructured":"Prashanth, L. A., & Bhatnagar, S. (2012). Threshold tuning using stochastic optimization for graded signal control. IEEE Transactions on Vehicular Technology, 61(9), 3865\u20133880.","journal-title":"IEEE Transactions on Vehicular Technology"},{"key":"5912_CR34","doi-asserted-by":"crossref","unstructured":"Prashanth, L. A., Korda, N., & Munos, R. (2014). Fast LSTD using stochastic approximation: Finite time analysis and application to traffic control. In Joint European conference on machine learning and knowledge discovery in databases (pp. 66\u201381).","DOI":"10.1007\/978-3-662-44851-9_5"},{"key":"5912_CR35","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins, H., & Monro, S. (1951). A stochastic approximation method. The Annals of Mathematical Statistics, 22, 400\u2013407","journal-title":"The Annals of Mathematical Statistics"},{"key":"5912_CR36","unstructured":"Roux, N. L., Schmidt, M., & Bach, F. R. (2012). A stochastic gradient method with an exponential convergence rate for finite training sets. In Advances in neural information processing systems (pp. 2663\u20132671)."},{"key":"5912_CR37","unstructured":"Ruppert, D. (1991). Stochastic approximation. In B. K. Ghosh & P. K. Sen (Eds.), Handbook of sequential analysis (pp. 503\u2013529)."},{"key":"5912_CR38","first-page":"1053","volume":"7","author":"D Silver","year":"2007","unstructured":"Silver, D., Sutton, R. S., & M\u00fcller, M. (2007). Reinforcement learning of local shape in the game of go. IJCAI, 7, 1053\u20131058.","journal-title":"IJCAI"},{"key":"5912_CR39","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R. S., & Barto, A. G. (1998). Reinforcement learning: An introduction. Cambridge: Cambridge University Press."},{"key":"5912_CR40","doi-asserted-by":"crossref","unstructured":"Sutton, R. S., Szepesv\u00e1ri, C., & Maei, H. R. (2009a). A convergent O(n) algorithm for off-policy temporal-difference learning with linear function approximation. In NIPS (pp. 1609\u20131616).","DOI":"10.1145\/1553374.1553501"},{"key":"5912_CR41","doi-asserted-by":"crossref","unstructured":"Sutton, R. S., et\u00a0al.(2009b). Fast gradient-descent methods for temporal-difference learning with linear function approximation. In ICML ACM (pp. 993\u20131000).","DOI":"10.1145\/1553374.1553501"},{"key":"5912_CR42","unstructured":"Tagorti, M., & Scherrer, B. (2015). On the Rate of Convergence and Error Bounds for LSTD($$\\lambda$$). In ICML."},{"key":"5912_CR43","unstructured":"Tarr\u00e8s, P., & Yao, Y. (2011). Online learning as stochastic approximation of regularization paths. arXiv preprint arXiv:11035538."},{"issue":"5","key":"5912_CR44","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"JN Tsitsiklis","year":"1997","unstructured":"Tsitsiklis, J. N., & Van Roy, B. (1997). An analysis of temporal-difference learning with function approximation. IEEE Transactions on Automatic Control, 42(5), 674\u2013690.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5912_CR45","doi-asserted-by":"publisher","DOI":"10.1017\/9781108627771","volume-title":"High-dimensional statistics: A non-asymptotic viewpoint","author":"MJ Wainwright","year":"2019","unstructured":"Wainwright, M. J. (2019). High-dimensional statistics: A non-asymptotic viewpoint (Vol. 48). Cambridge: Cambridge University Press."},{"key":"5912_CR46","unstructured":"Webscope, Y. (2011). Yahoo! Webscope dataset ydata-frontpage-todaymodule-clicks-$$\\text{v}2_0$$. http:\/\/research.yahoo.com\/Academic_Relations."},{"key":"5912_CR47","unstructured":"Yu, H. (2015). On convergence of emphatic temporal-difference learning. In COLT (pp. 1724\u20131751)."},{"issue":"7","key":"5912_CR48","doi-asserted-by":"publisher","first-page":"1515","DOI":"10.1109\/TAC.2009.2022097","volume":"54","author":"H Yu","year":"2009","unstructured":"Yu, H., & Bertsekas, D. P. (2009). Convergence results for some temporal difference methods based on least squares. IEEE Transactions on Automatic Control, 54(7), 1515\u20131531.","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5912_CR49","unstructured":"Zinkevich, M. (2003). Online convex programming and generalized infinitesimal gradient ascent. In ICML (pp. 928\u2013925)."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-020-05912-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-020-05912-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-020-05912-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,16]],"date-time":"2023-10-16T21:03:18Z","timestamp":1697490198000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-020-05912-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,4]]},"references-count":49,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["5912"],"URL":"https:\/\/doi.org\/10.1007\/s10994-020-05912-5","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,4]]},"assertion":[{"value":"4 August 2014","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 January 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 September 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 January 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}