{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T22:19:45Z","timestamp":1775341185366,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":25,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783662448502","type":"print"},{"value":"9783662448519","type":"electronic"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-662-44851-9_5","type":"book-chapter","created":{"date-parts":[[2014,9,1]],"date-time":"2014-09-01T13:00:58Z","timestamp":1409576458000},"page":"66-81","source":"Crossref","is-referenced-by-count":11,"title":["Fast LSTD Using Stochastic Approximation: Finite Time Analysis and Application to Traffic Control"],"prefix":"10.1007","author":[{"given":"L. A.","family":"Prashanth","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nathaniel","family":"Korda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R\u00e9mi","family":"Munos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"5_CR1","unstructured":"Bach, F., Moulines, E.: Non-asymptotic analysis of stochastic approximation algorithms for machine learning. In: NIPS (2011)"},{"key":"5_CR2","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, 4th edn. Approximate Dynamic Programming, vol.\u00a0II (2012)"},{"key":"5_CR3","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Optimization and Neural Computation Series 3, vol.\u00a07. Athena Scientific (1996)"},{"key":"5_CR4","first-page":"33","volume":"22","author":"S. Bradtke","year":"1996","unstructured":"Bradtke, S., Barto, A.: Linear least-squares algorithms for temporal difference learning. Machine Learning\u00a022, 33\u201357 (1996)","journal-title":"Machine Learning"},{"key":"5_CR5","unstructured":"Dani, V., Hayes, T.P., Kakade, S.M.: Stochastic linear optimization under bandit feedback. In: COLT, pp. 355\u2013366 (2008)"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Fathi, M., Frikha, N.: Transport-entropy inequalities and deviation estimates for stochastic approximation schemes. arXiv preprint arXiv:1301.7740 (2013)","DOI":"10.1214\/EJP.v18-2586"},{"issue":"47","key":"5_CR7","first-page":"1","volume":"17","author":"N. Frikha","year":"2012","unstructured":"Frikha, N., Menozzi, S.: Concentration Bounds for Stochastic Approximations. Electron. Commun. Probab.\u00a017(47), 1\u201315 (2012)","journal-title":"Electron. Commun. Probab."},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Geramifard, A., Bowling, M., Zinkevich, M., Sutton, R.S.: iLSTD: Eligibility traces and convergence analysis. In: NIPS, vol.\u00a019, p. 441 (2007)","DOI":"10.7551\/mitpress\/7503.003.0060"},{"key":"5_CR9","unstructured":"Hazan, E., Kale, S.: Beyond the regret minimization barrier: an optimal algorithm for stochastic strongly-convex optimization, pp. 421\u2013436 (2011)"},{"key":"5_CR10","unstructured":"Kushner, H.J., Yin, G.: Stochastic approximation and recursive algorithms and applications, vol.\u00a035. Springer (2003)"},{"key":"5_CR11","first-page":"1107","volume":"4","author":"M.G. Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. The Journal of Machine Learning Research\u00a04, 1107\u20131149 (2003)","journal-title":"The Journal of Machine Learning Research"},{"key":"5_CR12","first-page":"3041","volume":"13","author":"A. Lazaric","year":"2012","unstructured":"Lazaric, A., Ghavamzadeh, M., Munos, R.: Finite-sample analysis of least-squares policy iteration. Journal of Machine Learning Research\u00a013, 3041\u20133074 (2012)","journal-title":"Journal of Machine Learning Research"},{"issue":"4","key":"5_CR13","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1137\/0330046","volume":"30","author":"B.T. Polyak","year":"1992","unstructured":"Polyak, B.T., Juditsky, A.B.: Acceleration of stochastic approximation by averaging. SIAM Journal on Control and Optimization\u00a030(4), 838\u2013855 (1992)","journal-title":"SIAM Journal on Control and Optimization"},{"issue":"2","key":"5_CR14","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1109\/TITS.2010.2091408","volume":"12","author":"L. Prashanth","year":"2011","unstructured":"Prashanth, L., Bhatnagar, S.: Reinforcement Learning with Function Approximation for Traffic Signal Control. IEEE Transactions on Intelligent Transportation Systems\u00a012(2), 412\u2013421 (2011)","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"issue":"9","key":"5_CR15","doi-asserted-by":"publisher","first-page":"3865","DOI":"10.1109\/TVT.2012.2209904","volume":"61","author":"L. Prashanth","year":"2012","unstructured":"Prashanth, L., Bhatnagar, S.: Threshold Tuning using Stochastic Optimization for Graded Signal Control. IEEE Transactions on Vehicular Technology\u00a061(9), 3865\u20133880 (2012)","journal-title":"IEEE Transactions on Vehicular Technology"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Prashanth, L., Korda, N., Munos, R.: Fast LSTD using stochastic approximation: Finite time analysis and application to traffic control. arXiv preprint arXiv:1306.2557v4 (2014)","DOI":"10.1007\/978-3-662-44851-9_5"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. In: The Annals of Mathematical Statistics, pp. 400\u2013407 (1951)","DOI":"10.1214\/aoms\/1177729586"},{"key":"5_CR18","unstructured":"Ruppert, D.: Stochastic approximation. In: Handbook of Sequential Analysis, pp. 503\u2013529 (1991)"},{"key":"5_CR19","unstructured":"Silver, D., Sutton, R.S., M\u00fcller, M.: Reinforcement Learning of Local Shape in the Game of Go. In: IJCAI, vol.\u00a07, pp. 1053\u20131058 (2007)"},{"key":"5_CR20","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction, vol.\u00a01. Cambridge Univ. Press (1998)"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Szepesv\u00e1ri, C., Maei, H.R.: A convergent O(n) algorithm for off-policy temporal-difference learning with linear function approximation, pp. 1609\u20131616 (2009)","DOI":"10.1145\/1553374.1553501"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., et al.: Fast gradient-descent methods for temporal-difference learning with linear function approximation. In: ICML, pp. 993\u20131000. ACM (2009)","DOI":"10.1145\/1553374.1553501"},{"issue":"5","key":"5_CR23","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"J.N. Tsitsiklis","year":"1997","unstructured":"Tsitsiklis, J.N., Van Roy, B.: An analysis of temporal-difference learning with function approximation. IEEE Transactions on Automatic Control\u00a042(5), 674\u2013690 (1997)","journal-title":"IEEE Transactions on Automatic Control"},{"key":"5_CR24","unstructured":"Webscope, Y.: Yahoo! Webscope dataset ydata-frontpage-todaymodule-clicks-v2_0 (2011), \u201c http:\/\/research.yahoo.com\/Academic_Relations \u201d"},{"key":"5_CR25","unstructured":"Zinkevich, M.: Online convex programming and generalized infinitesimal gradient ascent. In: ICML, pp. 928\u2013925 (2003)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-44851-9_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,16]],"date-time":"2023-07-16T09:50:44Z","timestamp":1689501044000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-44851-9_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783662448502","9783662448519"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-44851-9_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]}}}