{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T15:21:41Z","timestamp":1772810501770,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":15,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642299452","type":"print"},{"value":"9783642299469","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_13","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T13:01:49Z","timestamp":1337346109000},"page":"102-114","source":"Crossref","is-referenced-by-count":16,"title":["Regularized Least Squares Temporal Difference Learning with Nested \u21132 and \u21131 Penalization"],"prefix":"10.1007","author":[{"given":"Matthew W.","family":"Hoffman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alessandro","family":"Lazaric","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohammad","family":"Ghavamzadeh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R\u00e9mi","family":"Munos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"13_CR1","doi-asserted-by":"crossref","unstructured":"Antos, A., Szepesv\u00e1ri, C., Munos, R.: Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path. Machine Learning\u00a071(1) (2008)","DOI":"10.1007\/s10994-007-5038-2"},{"key":"13_CR2","first-page":"33","volume":"22","author":"S. Bradtke","year":"1996","unstructured":"Bradtke, S., Barto, A.: Linear least-squares algorithms for temporal difference learning. Machine Learning\u00a022, 33\u201357 (1996)","journal-title":"Machine Learning"},{"key":"13_CR3","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1214\/07-EJS008","volume":"1","author":"F. Bunea","year":"2007","unstructured":"Bunea, F., Tsybakov, A., Wegkamp, M.: Sparsity oracle inequalities for the lasso. Electronic Journal of Statistics\u00a01, 169\u2013194 (2007)","journal-title":"Electronic Journal of Statistics"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Efron, B., Hastie, T., Johnstone, I., Tibshirani, R.: Least angle regression. Annals of Statistics\u00a032(2) (2004)","DOI":"10.1214\/009053604000000067"},{"key":"13_CR5","unstructured":"Farahmand, A., Ghavamzadeh, M., Szepesvari, C., Mannor, S.: Regularized policy iteration. In: Advances in Neural Information Processing Systems\u00a021 (2009)"},{"issue":"2","key":"13_CR6","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1214\/07-AOAS131","volume":"1","author":"J. Friedman","year":"2007","unstructured":"Friedman, J., Hastie, T., H\u00f6fling, H., Tibshirani, R.: Pathwise coordinate optimization. The Annals of Applied Statistics\u00a01(2), 302\u2013332 (2007)","journal-title":"The Annals of Applied Statistics"},{"key":"13_CR7","volume-title":"The elements of statistical learning","author":"J. Friedman","year":"2001","unstructured":"Friedman, J., Hastie, T., Tibshirani, R.: The elements of statistical learning. Springer, Heidelberg (2001)"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Geist, M., Scherrer, B.: \u21131-penalized projected bellman residual. In: European Workshop on Reinforcement Learning (2011)","DOI":"10.1007\/978-3-642-29946-9_12"},{"key":"13_CR9","unstructured":"Ghavamzadeh, M., Lazaric, A., Munos, R., Hoffman, M.: Finite-sample analysis of Lasso-TD. In: Proceedings of the International Conference on Machine Learning (2011)"},{"key":"13_CR10","unstructured":"Johns, J., Painter-Wakefield, C., Parr, R.: Linear complementarity for regularized policy evaluation and improvement. In: Advances in Neural Information Processing Systems\u00a023 (2010)"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Ng, A.Y.: Regularization and feature selection in least-squares temporal difference learning. In: Proceedings of the International Conference on Machine Learning (2009)","DOI":"10.1145\/1553374.1553442"},{"key":"13_CR12","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. Journal of Machine Learning Research\u00a04 (2003)"},{"key":"13_CR13","unstructured":"Schmidt, M.: Graphical Model Structure Learning with l1-Regularization. Ph.D. thesis, University of British Columbia (2010)"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction. MIT Press (1998)","DOI":"10.1109\/TNN.1998.712192"},{"issue":"1","key":"13_CR15","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R. Tibshirani","year":"1996","unstructured":"Tibshirani, R.: Regression shrinkage and selection via the lasso. Journal of the Royal Statistical Society. Series B (Methodological)\u00a058(1), 267\u2013288 (1996)","journal-title":"Journal of the Royal Statistical Society. Series B (Methodological)"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_13.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T07:22:22Z","timestamp":1620112942000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}