{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:53:46Z","timestamp":1725684826137},"publisher-location":"Berlin, Heidelberg","reference-count":10,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642299452"},{"type":"electronic","value":"9783642299469"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29946-9_7","type":"book-chapter","created":{"date-parts":[[2012,5,18]],"date-time":"2012-05-18T17:01:49Z","timestamp":1337360509000},"page":"30-41","source":"Crossref","is-referenced-by-count":0,"title":["Gradient Based Algorithms with Loss Functions and Kernels for Improved On-Policy Control"],"prefix":"10.1007","author":[{"given":"Matthew","family":"Robards","sequence":"first","affiliation":[]},{"given":"Peter","family":"Sunehag","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"7_CR1","unstructured":"Baird, L., Moore, A.: Gradient descent for general reinforcement learning. In: Neural Information Processing Systems, vol.\u00a011, pp. 968\u2013974. MIT Press (1998)"},{"key":"7_CR2","doi-asserted-by":"crossref","unstructured":"Engel, Y., Mannor, S., Meir, R.: Reinforcement learning with Gaussian processes. In: 22nd International Conference on Machine Learning (ICML 2005), Bonn, Germany, pp. 201\u2013208 (2005)","DOI":"10.1145\/1102351.1102377"},{"key":"7_CR3","unstructured":"Engel, Y., Mannor, S., Meir, R.: Bayes meets bellman: The gaussian process approach to temporal difference learning. In: Proc. of the 20th International Conference on Machine Learning, pp. 154\u2013161 (2003)"},{"key":"7_CR4","unstructured":"Maei, H., Szepesvri, C., Bhatnagar, S., Sutton, R.: Toward off-policy learning control with function approximation. In: Proceedings of the 27th International Conference on Machine Learning (2010)"},{"key":"7_CR5","doi-asserted-by":"crossref","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"M.L. Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York (1994)"},{"key":"7_CR6","unstructured":"Robards, M., Sunehag, P.: Online convex reinforcement learning. In: Submitted to 9th EWRL (2011)"},{"key":"7_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-23808-6_1","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"M. Robards","year":"2011","unstructured":"Robards, M., Sunehag, P., Sanner, S., Marthi, B.: Sparse Kernel-SARSA(\u03bb) with an Eligibility Trace. In: Gunopulos, D., Hofmann, T., Malerba, D., Vazirgiannis, M. (eds.) ECML PKDD 2011. LNCS, vol.\u00a06913, pp. 1\u201317. Springer, Heidelberg (2011)"},{"key":"7_CR8","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning. The MIT Press (1998)"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Sutton, R., Maei, H., Precup, D., Bhatnagar, S., Silver, D., Szepesvri, C., Wiewiora, E.: Fast gradient-descent methods for temporal-difference learning with linear function approximation. In: Proceedings of the 26th International Conference on Machine Learning (2009)","DOI":"10.1145\/1553374.1553501"},{"key":"7_CR10","doi-asserted-by":"crossref","unstructured":"Sutton, R., Szepesv\u00e1ri, C., Maei, H.: A convergent o(n) temporal-difference algorithm for off-policy learning with linear function approximation. In: NIPS, pp. 1609\u20131616. MIT Press (2008)","DOI":"10.1145\/1553374.1553501"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in Reinforcement Learning"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29946-9_7.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T20:53:19Z","timestamp":1687467199000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29946-9_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642299452","9783642299469"],"references-count":10,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29946-9_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}