{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,8,26]],"date-time":"2023-08-26T07:13:07Z","timestamp":1693033987991},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2017,9,12]],"date-time":"2017-09-12T00:00:00Z","timestamp":1505174400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Universities Natural Science Research Project of Jiangsu Province","award":["17KJB520031"],"award-info":[{"award-number":["17KJB520031"]}]},{"name":"Universities Natural Science Research Project of Anhui Province","award":["KJ2016A664"],"award-info":[{"award-number":["KJ2016A664"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1007\/s10586-017-1165-0","type":"journal-article","created":{"date-parts":[[2017,9,12]],"date-time":"2017-09-12T14:47:37Z","timestamp":1505227657000},"page":"3475-3487","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An off-policy least square algorithms with eligibility trace based on importance reweighting"],"prefix":"10.1007","volume":"20","author":[{"given":"Haifei","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Ying","family":"Hong","sequence":"additional","affiliation":[]},{"given":"Jianlin","family":"Qiu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,9,12]]},"reference":[{"key":"1165_CR1","volume-title":"Reinforcement Learning","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, G.A.: Reinforcement Learning. MIT Press, Cambridge (1998)"},{"key":"1165_CR2","unstructured":"Koller, D., Parr, R.: Policy iteration for factored MDPs. In: Proceedings of the 16th Conference on Uncertain in Artificial Intelligence, Stanford, USA (2000)"},{"issue":"1","key":"1165_CR3","doi-asserted-by":"crossref","first-page":"89","DOI":"10.1007\/s10994-007-5038-2","volume":"71","author":"A Andoh","year":"2008","unstructured":"Andoh, A., Kobayashi, T., Kuzuoka, H., Tsujikawa, T., Suzuki, Y.: Learning near-optimal policies with Bellman-residual minimization based fitted policy iteration and a single sample path. Mach. Learn. 71(1), 89\u2013129 (2008)","journal-title":"Mach. Learn."},{"key":"1165_CR4","doi-asserted-by":"crossref","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"JN Tsitsiklis","year":"1997","unstructured":"Tsitsiklis, J.N., Van Roy, B.: An analysis of temporal-difference learning with function approximation. IEEE Trans. Autom. Control 42, 674\u2013690 (1997)","journal-title":"IEEE Trans. Autom. Control"},{"key":"1165_CR5","doi-asserted-by":"crossref","unstructured":"Geist, M., Pietquin, O.: Parametric value function approximation: a unified view. In: Proceedings of the IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning, Piscataway, USA (2011)","DOI":"10.1109\/ADPRL.2011.5967355"},{"key":"1165_CR6","doi-asserted-by":"crossref","first-page":"249","DOI":"10.1007\/3-540-46014-4_23","volume":"2308","author":"M Lagoudakis","year":"2002","unstructured":"Lagoudakis, M., Parr, R., Littman, M.: Least-squares methods in reinforcement learning for control. Methods Appl. Artif. Intell. 2308, 249\u2013260 (2002)","journal-title":"Methods Appl. Artif. Intell."},{"key":"1165_CR7","first-page":"1107","volume":"4","author":"M Lagoudakis","year":"2003","unstructured":"Lagoudakis, M., Parr, R.: Least squares policy iteration. J. Mach. Learn. Res. 4, 1107\u20131149 (2003)","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"1165_CR8","doi-asserted-by":"crossref","first-page":"219","DOI":"10.1111\/j.1467-9574.1985.tb01140.x","volume":"39","author":"F Wikipedia","year":"1985","unstructured":"Wikipedia, F., Programming, D., Processes, M.: Markov decision process. Stat. Neerl. 39(2), 219\u2013233 (1985)","journal-title":"Stat. Neerl."},{"key":"1165_CR9","unstructured":"Thiery, C., Scherrer, B.: Least squares policy iteration: Bias-variance trade-off in control problems. In: Proceedings of the 27th International Conference on Machine Learning (ICML-10), Haifa, Israel, pp. 1071\u20131078 (2010)"},{"key":"1165_CR10","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the method of temporal differences. Mach. Learn. 3, 9\u201344 (1988)","journal-title":"Mach. Learn."},{"key":"1165_CR11","first-page":"283","volume":"22","author":"J Peng","year":"1996","unstructured":"Peng, J., Williams, R.J.: Incremental multi-step Q-learning. Mach. Learn. 22, 283\u2013290 (1996)","journal-title":"Mach. Learn."},{"key":"1165_CR12","unstructured":"Schoknecht, R.: Optimality of reinforcement learning algorithms with linear function approximation. In: Advances in Neural Information Processing Systems 15 (2002)"},{"key":"1165_CR13","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Szepesvari, Cs., Maei, H.R.: A convergent O(n) algorithm for off-policy temporal-difference learning with linear function approximation. In: Proceedings of the 25th Annual Conference on Neural Information Processing Systems, Granada, Spain (2008)","DOI":"10.1145\/1553374.1553501"},{"issue":"1\u20133","key":"1165_CR14","first-page":"33","volume":"22","author":"SJ Bradtke","year":"1996","unstructured":"Bradtke, S.J., Barto, A.G.: Linear least-squares algorithms for temporal difference learning. Mach. Learn. 22(1\u20133), 33\u201357 (1996)","journal-title":"Mach. Learn."},{"key":"1165_CR15","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1023\/A:1017936530646","volume":"49","author":"J Boyan","year":"2002","unstructured":"Boyan, J.: Technical update: least-squares temporal difference learning. Mach. Learn. 49, 233\u2013246 (2002)","journal-title":"Mach. Learn."},{"key":"1165_CR16","unstructured":"Bertsekas, D.P., Ioffe, S.: Temporal differences-based policy iteration and applications in neuro-dynamic programming. Technical Report, LIDS-P-2349, Massachusetts Institute of Technology, Cambridge, US (1996). http:\/\/web.mit.edu\/dimitrib\/www\/Tempdif.pdf"},{"key":"1165_CR17","first-page":"1107","volume":"4","author":"MG Lagoudakis","year":"2003","unstructured":"Lagoudakis, M.G., Parr, R.: Least-squares policy iteration. J. Mach. Learn. Res. 4, 1107\u20131149 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"1165_CR18","unstructured":"Lagoudakis, M.G., Parr, R.: Model-free least squares policy iteration. In: International Conference on Neural Information Processing Systems: Natural & Synthetic 4(6), 1547\u20131554 (2001)"},{"key":"1165_CR19","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton, R.S.: Learning to predict by the method of temporal differences. Mach. Learn. 3, 9\u201344 (1988)","journal-title":"Mach. Learn."},{"key":"1165_CR20","doi-asserted-by":"crossref","unstructured":"Bu\u015foniu, L., De Schutter, B., Babu\u0161ka, R., Ernst, D.: Using prior knowledge to accelerate online least-squares policy iteration. In: 2010 IEEE International Conference on Automation, Quality and Testing, Robotics (AQTR-2010), Cluj-Napoca, Romania (2010)","DOI":"10.1109\/AQTR.2010.5520917"},{"key":"1165_CR21","doi-asserted-by":"crossref","unstructured":"Bu\u015foniu, L., Ernst, D., De Schutter, B., Babu\u0161ka, R.: Online least-squares policy iteration for reinforcement learning control. In: Proceedings 2010 American Control Conference (ACC-2010), Baltimore, US, pp. 486\u2013491 (2010)","DOI":"10.1109\/ACC.2010.5530856"},{"key":"1165_CR22","doi-asserted-by":"crossref","unstructured":"Jung, T., Polani, D.: Kernelizing LSPE( $$\\lambda )$$ \u03bb ) . In: Proceedings 2007 IEEE Symposium on Approximate Dynamic Programming and Reinforcement Learning (ADPRL-2007), Honolulu, US, pp. 338\u2013345 (2007)","DOI":"10.1109\/ADPRL.2007.368208"},{"key":"1165_CR23","unstructured":"Jung, T., Polani, D.: Learning RoboCup-keepaway with kernels. In: Gaussian Processes in Practice, JMLR Workshop and Conference Proceedings, vol. 1, pp. 33\u201357 (2007)"},{"key":"1165_CR24","unstructured":"Li, L., Littman, M.L., Mansley, C.R.: Online exploration in least-squares policy iteration. In: Proceedings 8th International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS-2009), Budapest, Hungary, vol. 2, pp. 733\u2013739 (2009)"},{"key":"1165_CR25","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. (in progress, draft). MIT Press, Cambridge (2017)"},{"key":"1165_CR26","unstructured":"Yahyaa, S., Manderick, B.: Knowledge gradient exploration in online kernel-based LSPI. In: Proceedings of the 25th Belgium-Netherlands Artificial Intelligence Conference, Delft, The Netherlands, pp. 263\u2013270 (2013)"}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10586-017-1165-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-017-1165-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-017-1165-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T18:32:06Z","timestamp":1692988326000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10586-017-1165-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9,12]]},"references-count":26,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["1165"],"URL":"https:\/\/doi.org\/10.1007\/s10586-017-1165-0","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"value":"1386-7857","type":"print"},{"value":"1573-7543","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,9,12]]}}}