{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T18:06:13Z","timestamp":1761156373891},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2017,10,10]],"date-time":"2017-10-10T00:00:00Z","timestamp":1507593600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"MSIP(Ministry of Science, ICT and Future Planning), Korea, under the Global IT Talent support program","award":["IITP-2016-R01341610330001002"],"award-info":[{"award-number":["IITP-2016-R01341610330001002"]}]},{"name":"National Research Foundation of Korea (NRF) funded by the Ministry of Education, Science, and Technology","award":["2014R1A1A2057735"],"award-info":[{"award-number":["2014R1A1A2057735"]}]},{"name":"Kyung Hee University in 2016","award":["KHU-20160601"],"award-info":[{"award-number":["KHU-20160601"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1007\/s10462-017-9579-x","type":"journal-article","created":{"date-parts":[[2017,10,10]],"date-time":"2017-10-10T10:51:33Z","timestamp":1507632693000},"page":"2039-2059","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Importance sampling policy gradient algorithms in reproducing kernel Hilbert space"],"prefix":"10.1007","volume":"52","author":[{"given":"Tuyen Pham","family":"Le","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vien Anh","family":"Ngo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"P. Marlith","family":"Jaramillo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"TaeChoong","family":"Chung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,10]]},"reference":[{"key":"9579_CR1","unstructured":"Bagnell JA, Schneider J (2003) Policy search in kernel Hilbert space. Technical report, Robotics Institute, Carnegie Mellon University"},{"key":"9579_CR2","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1613\/jair.807","volume":"15","author":"J Baxter","year":"2001","unstructured":"Baxter J, Bartlett PL, Weaver L (2001) Experiments with infinite-horizon, policy-gradient estimation. J Artif Intell Res 15:351\u2013381","journal-title":"J Artif Intell Res"},{"key":"9579_CR3","volume-title":"Pattern recognition and machine learning (Information science and statistics)","author":"CM Bishop","year":"2006","unstructured":"Bishop CM (2006) Pattern recognition and machine learning (Information science and statistics). Springer, New York"},{"key":"9579_CR4","unstructured":"Boyan JA (1999) Least-squares temporal difference learning. In: Proceedings of the sixteenth international conference on machine learning, San Francisco, CA, USA. Morgan Kaufmann Publishers Inc., pp 49\u201356"},{"key":"9579_CR5","doi-asserted-by":"crossref","unstructured":"Daniel C, Neumann G, and Peters J (2012) Learning concurrent motor skills in versatile solution spaces. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 3591\u20133597. IEEE","DOI":"10.1109\/IROS.2012.6386047"},{"issue":"08","key":"9579_CR6","first-page":"08","volume":"13","author":"R Nardi De","year":"2013","unstructured":"De Nardi R (2013) The qrsim quadrotors simulator. RN 13(08):08","journal-title":"RN"},{"issue":"1\u20132","key":"9579_CR7","first-page":"1","volume":"2","author":"M Deisenroth","year":"2013","unstructured":"Deisenroth M, Neumann G, Peters J (2013) A survey on policy search for robotics. Found Trends Robot 2(1\u20132):1\u2013142","journal-title":"Found Trends Robot"},{"issue":"10","key":"9579_CR8","doi-asserted-by":"publisher","first-page":"1399","DOI":"10.1016\/j.neunet.2009.01.002","volume":"22","author":"H Hachiya","year":"2009","unstructured":"Hachiya H, Akiyama T, Sugiayma M, Peters J (2009) Adaptive importance sampling for value function approximation in off-policy reinforcement learning. Neural Netw 22(10):1399\u20131410","journal-title":"Neural Netw"},{"key":"9579_CR9","doi-asserted-by":"publisher","first-page":"1171","DOI":"10.1214\/009053607000000677","volume":"3","author":"T Hofmann","year":"2008","unstructured":"Hofmann T, Sch\u00f6lkopf B, Smola AJ (2008) Kernel methods in machine learning. Ann Stat 3:1171\u20131220","journal-title":"Ann Stat"},{"key":"9579_CR10","doi-asserted-by":"crossref","unstructured":"Kober J, Oztop E, Peters J, Walsh T (2011) Reinforcement learning to adjust robot movements to new situations. In: Twenty-second international joint conference on artificial intelligence (IJCAI 2011), pp 2650\u20132655. AAAI Press","DOI":"10.15607\/RSS.2010.VI.005"},{"issue":"1","key":"9579_CR11","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/s10994-010-5223-6","volume":"84","author":"J Kober","year":"2011","unstructured":"Kober J, Peters J (2011) Policy search for motor primitives in robotics. Mach Learn 84(1):171\u2013203","journal-title":"Mach Learn"},{"issue":"4","key":"9579_CR12","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1007\/s10514-012-9290-3","volume":"33","author":"J Kober","year":"2012","unstructured":"Kober J, Wilhelm A, Oztop E, Peters J (2012) Reinforcement learning to adjust parametrized motor primitives to new situations. Auton Robots 33(4):361\u2013379","journal-title":"Auton Robots"},{"key":"9579_CR13","doi-asserted-by":"crossref","unstructured":"Kolter JZ, Ng AY (2009) Regularization and feature selection in least-squares temporal difference learning. In: Proceedings of the 26th annual international conference on machine learning, pp 521\u2013528. ACM","DOI":"10.1145\/1553374.1553442"},{"key":"9579_CR14","unstructured":"Lawrence G, Cowan N, Russell S (2002) Efficient gradient estimation for motor control learning. In: Proceedings of the Nineteenth conference on uncertainty in artificial intelligence, San Francisco, CA, USA. Morgan Kaufmann Publishers Inc., pp 354\u2013361"},{"key":"9579_CR15","unstructured":"Lever G, Stafford R (2015) Modelling policies in mdps in reproducing kernel Hilbert space. In: Proceedings of the eighteenth international conference on artificial intelligence and statistics, pp 590\u2013598"},{"key":"9579_CR16","unstructured":"Levine S, Koltun V (2013a) Guided policy search. In: Proceedings of the 30th international conference on machine learning, pp 1\u20139"},{"key":"9579_CR17","unstructured":"Levine S, Koltun V (2013b) Variational policy search via trajectory optimization. In: Advances in neural information processing systems 26: 27th annual conference on neural information processing systems 2013. Proceedings of a meeting held on 5-8 December 2013, Lake Tahoe, NV, USA, pp 207\u2013215"},{"issue":"1","key":"9579_CR18","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1162\/0899766052530802","volume":"17","author":"CA Micchelli","year":"2005","unstructured":"Micchelli CA, Pontil M (2005) On learning vector-valued functions. Neural Comput 17(1):177\u2013204","journal-title":"Neural Comput"},{"issue":"1","key":"9579_CR19","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1109\/MSP.2011.2179329","volume":"30","author":"P Milanfar","year":"2013","unstructured":"Milanfar P (2013) A tour of modern image filtering: new insights and methods, both practical and theoretical. IEEE Signal Process Mag 30(1):106\u2013128","journal-title":"IEEE Signal Process Mag"},{"key":"9579_CR20","unstructured":"Neumann G (2011) Variational inference for policy search in changing situations. In: Proceedings of the 28th international conference on machine learning (ICML-11), pp 817\u2013824"},{"key":"9579_CR21","doi-asserted-by":"crossref","unstructured":"Peters J, M\u00fclling K, Altun Y (2010) Relative entropy policy search. In: Proceedings of the twenty-fourth AAAI conference on artificial intelligence, AAAI 2010, Atlanta, GA, USA, 11\u201315 July 2010","DOI":"10.1609\/aaai.v24i1.7727"},{"issue":"4","key":"9579_CR22","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008a) Reinforcement learning of motor skills with policy gradients. Neural Netw 21(4):682\u2013697","journal-title":"Neural Netw"},{"issue":"4","key":"9579_CR23","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008b) Reinforcement learning of motor skills with policy gradients. Neural Netw 21(4):682\u2013697","journal-title":"Neural Netw"},{"key":"9579_CR24","unstructured":"Precup D, Sutton RS, Singh SP (2000) Eligibility traces for off-policy policy evaluation. In: Proceedings of the seventeenth international conference on machine learning, ICML \u201900, San Francisco, CA, USA. Morgan Kaufmann Publishers Inc., pp 759\u2013766"},{"key":"9579_CR25","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/4175.001.0001","volume-title":"Learning with kernels: support vector machines, regularization, optimization, and beyond","author":"B Scholkopf","year":"2001","unstructured":"Scholkopf B, Smola AJ (2001) Learning with kernels: support vector machines, regularization, optimization, and beyond. MIT Press, Cambridge"},{"key":"9579_CR26","unstructured":"Schulman J, Levine S, Abbeel P, Jordan MI, Moritz P (2015) Trust region policy optimization. In: Proceedings of the 32nd international conference on machine learning, ICML 2015, Lille, France, 6\u201311 July 2015, pp 1889\u20131897"},{"key":"9579_CR27","unstructured":"Shelton CR (2001) Policy improvement for POMDPs using normalized importance sampling. In: Proceedings of the seventeenth conference on uncertainty in artificial intelligence, UAI\u201901, San Francisco, CA, USA. Morgan Kaufmann Publishers Inc., pp 496\u2013503"},{"key":"9579_CR28","unstructured":"Sutton RS, McAllester DA, Singh SP, Mansour Y et al (1999) Policy gradient methods for reinforcement learning with function approximation. In: NIPS, vol 99. MIT Press, pp 1057\u20131063"},{"issue":"5","key":"9579_CR29","doi-asserted-by":"publisher","first-page":"674","DOI":"10.1109\/9.580874","volume":"42","author":"JN Tsitsiklis","year":"1997","unstructured":"Tsitsiklis JN, Van Roy B (1997) An analysis of temporal-difference learning with function approximation. IEEE Trans Autom Control 42(5):674\u2013690","journal-title":"IEEE Trans Autom Control"},{"key":"9579_CR30","unstructured":"Vien NA, Englert P, Toussaint M (2016) Policy search in reproducing kernel Hilbert space. In: Proceedings of the 25th international joint conference on artificial intelligence"},{"issue":"1\u20133","key":"9579_CR31","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1023\/A:1013955821559","volume":"48","author":"P Vincent","year":"2002","unstructured":"Vincent P, Bengio Y (2002) Kernel matching pursuit. Mach Learn 48(1\u20133):165\u2013187","journal-title":"Mach Learn"},{"issue":"10","key":"9579_CR32","doi-asserted-by":"publisher","first-page":"1484","DOI":"10.1016\/j.neunet.2009.05.011","volume":"22","author":"P Wawrzy\u0144ski","year":"2009","unstructured":"Wawrzy\u0144ski P (2009) Real-time reinforcement learning by sequential actor-critics and experience replay. Neural Netw 22(10):1484\u20131497","journal-title":"Neural Netw"},{"issue":"3\u20134","key":"9579_CR33","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8(3\u20134):229\u2013256","journal-title":"Mach Learn"},{"issue":"4","key":"9579_CR34","doi-asserted-by":"publisher","first-page":"973","DOI":"10.1109\/TNN.2007.899161","volume":"18","author":"X Xu","year":"2007","unstructured":"Xu X, Hu D, Lu X (2007) Kernel-based least squares policy iteration for reinforcement learning. IEEE Trans Neural Netw 18(4):973\u2013992","journal-title":"IEEE Trans Neural Netw"},{"issue":"6","key":"9579_CR35","doi-asserted-by":"publisher","first-page":"1512","DOI":"10.1162\/NECO_a_00452","volume":"25","author":"T Zhao","year":"2013","unstructured":"Zhao T, Hachiya H, Tangkaratt V, Morimoto J, Sugiyama M (2013) Efficient sample reuse in policy gradients with parameter-based exploration. Neural Comput 25(6):1512\u20131547","journal-title":"Neural Comput"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-017-9579-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10462-017-9579-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-017-9579-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,27]],"date-time":"2024-06-27T16:51:15Z","timestamp":1719507075000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10462-017-9579-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10,10]]},"references-count":35,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2019,10]]}},"alternative-id":["9579"],"URL":"https:\/\/doi.org\/10.1007\/s10462-017-9579-x","relation":{},"ISSN":["0269-2821","1573-7462"],"issn-type":[{"value":"0269-2821","type":"print"},{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,10,10]]},"assertion":[{"value":"10 October 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Informed consent was obtained from all individual participants included in the study.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}]}}