{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T06:33:59Z","timestamp":1764570839438,"version":"3.37.3"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"8-9","license":[{"start":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T00:00:00Z","timestamp":1558310400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T00:00:00Z","timestamp":1558310400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["PA 3179\/1-1"],"award-info":[{"award-number":["PA 3179\/1-1"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010663","name":"H2020 European Research Council","doi-asserted-by":"publisher","award":["645582","640554"],"award-info":[{"award-number":["645582","640554"]}],"id":[{"id":"10.13039\/100010663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s10994-019-05807-0","type":"journal-article","created":{"date-parts":[[2019,5,21]],"date-time":"2019-05-21T17:06:06Z","timestamp":1558458366000},"page":"1443-1466","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Compatible natural gradient policy search"],"prefix":"10.1007","volume":"108","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4469-8191","authenticated-orcid":false,"given":"Joni","family":"Pajarinen","sequence":"first","affiliation":[]},{"given":"Hong Linh","family":"Thai","sequence":"additional","affiliation":[]},{"given":"Riad","family":"Akrour","sequence":"additional","affiliation":[]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[]},{"given":"Gerhard","family":"Neumann","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,5,20]]},"reference":[{"key":"5807_CR1","unstructured":"Abdolmaleki, A., Lioutikov, R., Peters, J., Lau, N., Reis, L., & Neumann, G. (2015). Model-based relative entropy stochastic search. In Advances in Neural Information Processing Systems (NIPS), MIT Press."},{"key":"5807_CR2","unstructured":"Abdolmaleki, A., Springenberg, J. T., Tassa, Y., Munos, R., Heess, N., & Riedmiller, M. (2018). Maximum a posteriori policy optimisation. In Proceedings of the international conference on learning representations (ICLR)."},{"key":"5807_CR3","unstructured":"Akrour, R., Abdolmaleki, A., Abdulsamad, H., & Neumann, G. (2016). Model-free trajectory optimization for reinforcement learning. In Proceedings of the international conference on machine learning (ICML)."},{"issue":"14","key":"5807_CR4","first-page":"1","volume":"19","author":"R Akrour","year":"2018","unstructured":"Akrour, R., Abdolmaleki, A., Abdulsamad, H., Peters, J., & Neumann, G. (2018). Model-free trajectory-based policy optimization with monotonic improvement. Journal of Machine Learning Research, 19(14), 1\u201325.","journal-title":"Journal of Machine Learning Research"},{"issue":"2","key":"5807_CR5","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"S Amari","year":"1998","unstructured":"Amari, S. (1998). Natural gradient works efficiently in learning. Neural Computation, 10(2), 251\u2013276.","journal-title":"Neural Computation"},{"key":"5807_CR6","unstructured":"Bagnell, J. A., & Schneider, J. (2003). Covariant policy search. IJCAI."},{"key":"5807_CR7","unstructured":"Bernacchia, A., Lengyel, M., & Hennequin, G. (2018). Exact natural gradient in deep linear networks and its application to the nonlinear case. In Advances in Neural Information Processing Systems (NIPS), Curran Associates, Inc., pp 5945\u20135954."},{"key":"5807_CR8","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441","volume-title":"Convex optimization","author":"S Boyd","year":"2004","unstructured":"Boyd, S., & Vandenberghe, L. (2004). Convex optimization. Cambridge: Cambridge University Press."},{"issue":"93","key":"5807_CR9","first-page":"1","volume":"17","author":"C Daniel","year":"2016","unstructured":"Daniel, C., Neumann, G., Kroemer, O., & Peters, J. (2016). Hierarchical relative entropy policy search. Journal of Machine Learning Research (JMLR), 17(93), 1\u201350.","journal-title":"Journal of Machine Learning Research (JMLR)"},{"key":"5807_CR10","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., Lopez, A., & Koltun, V. (2017). CARLA: An open urban driving simulator. In Conference on robot learning, pp. 1\u201316."},{"key":"5807_CR11","unstructured":"Duan, Y., Chen, X., Houthooft, R., Schulman, J., & Abbeel, P. (2016). Benchmarking deep reinforcement learning for continuous control. In Proceedings of the 33nd international conference on machine learning, ICML 2016, New York City, NY, USA, June 19\u201324, 2016, pp 1329\u20131338. http:\/\/jmlr.org\/proceedings\/papers\/v48\/duan16.html ."},{"key":"5807_CR12","doi-asserted-by":"crossref","unstructured":"Geist, M., & Pietquin, O. (2010). Revisiting natural actor-critics with value function approximation. In International conference on modeling decisions for artificial intelligence, Springer, pp. 207\u2013218.","DOI":"10.1007\/978-3-642-16292-3_21"},{"issue":"2","key":"5807_CR13","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1162\/106365601750190398","volume":"9","author":"N Hansen","year":"2001","unstructured":"Hansen, N., & Ostermeier, A. (2001). Completely derandomized self-adaptation in evolution strategies. Evolutionary Computation, 9(2), 159\u2013195.","journal-title":"Evolutionary Computation"},{"key":"5807_CR14","first-page":"1531","volume-title":"Advances in neural information processing systems 14 (NIPS 2001)","author":"S Kakade","year":"2001","unstructured":"Kakade, S. (2001). A natural policy gradient. In T. G. Dietterich, S. Becker, & Z. Ghahramani (Eds.), Advances in neural information processing systems 14 (NIPS 2001) (pp. 1531\u20131538). Cambridge: MIT Press."},{"key":"5807_CR15","first-page":"849","volume-title":"Advances in neural information processing systems 21","author":"J Kober","year":"2009","unstructured":"Kober, J., & Peters, J. R. (2009). Policy search for motor primitives in robotics. In D. Koller, D. Schuurmans, Y. Bengio, & L. Bottou (Eds.), Advances in neural information processing systems 21 (pp. 849\u2013856). Red Hook: Curran Associates, Inc."},{"key":"5807_CR16","unstructured":"Lillicrap, T. P., Hunt, J. J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., & Wierstra, D. (2015). Continuous control with deep reinforcement learning. arXiv:1509.02971 ."},{"issue":"7540","key":"5807_CR17","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., et al. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529\u2013533.","journal-title":"Nature"},{"key":"5807_CR18","unstructured":"Mnih, V., Badia, A. P., Mirza, M., Graves, A., Lillicrap, T., Harley, T., Silver, D., & Kavukcuoglu, K. (2016) Asynchronous methods for deep reinforcement learning. In International conference on machine learning, pp. 1928\u20131937."},{"key":"5807_CR19","unstructured":"O\u2019Donoghue, B., Munos, R., Kavukcuoglu, K., & Mnih, V. (2016). PGQ: Combining policy gradient and q-learning. arXiv:1611.01626 ."},{"issue":"7\u20139","key":"5807_CR20","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J Peters","year":"2008","unstructured":"Peters, J., & Schaal, S. (2008). Natural actor-critic. Neurocomputing, 71(7\u20139), 1180\u20131190.","journal-title":"Neurocomputing"},{"key":"5807_CR21","doi-asserted-by":"crossref","unstructured":"Peters, J., M\u00fclling, K., & Altun, Y. (2010). Relative entropy policy search. In AAAI Atlanta, pp. 1607\u20131612.","DOI":"10.1609\/aaai.v24i1.7727"},{"key":"5807_CR22","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1613\/jair.2567","volume":"32","author":"S Ross","year":"2008","unstructured":"Ross, S., Pineau, J., Paquet, S., & Chaib-Draa, B. (2008). Online planning algorithms for POMDPs. Journal of Artificial Intelligence Research, 32, 663\u2013704.","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"2","key":"5807_CR23","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1023\/A:1010091220143","volume":"1","author":"R Rubinstein","year":"1999","unstructured":"Rubinstein, R. (1999). The cross-entropy method for combinatorial and continuous optimization. Methodology and Computing in Applied Probability, 1(2), 127\u2013190.","journal-title":"Methodology and Computing in Applied Probability"},{"key":"5807_CR24","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., & Moritz, P. (2015). Trust region policy optimization. In Proceedings of the 32nd International Conference on Machine Learning (ICML-15), pp. 1889\u20131897."},{"key":"5807_CR25","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv:1707.06347 ."},{"key":"5807_CR26","unstructured":"Silver, D., Lever, G, Heess, N., Degris, T., Wierstra, D., & Riedmiller, M. (2014). Deterministic policy gradient algorithms. In ICML."},{"key":"5807_CR27","unstructured":"Sutton, R. S., McAllester, D., Singh, S., & Mansour, Y. (1999). Policy gradient methods for reinforcement learning with function approximation. In Proceedings of the 12th international conference on neural information processing systems, MIT Press, Cambridge, MA, USA, NIPS\u201999, pp. 1057\u20131063."},{"key":"5807_CR28","unstructured":"Tangkaratt, V., Abdolmaleki, A., & Sugiyama, M. (2018). Guide Actor-Critic for Continuous Control. In Proceedings of the international conference on learning representations (ICLR)."},{"key":"5807_CR29","doi-asserted-by":"crossref","unstructured":"Wierstra, D., Schaul, T., Peters, J., & Schmidhuber, J. (2008). Natural evolution strategies. In IEEE congress on evolutionary computation, IEEE, pp 3381\u20133387.","DOI":"10.1109\/CEC.2008.4631255"},{"issue":"3\u20134","key":"5807_CR30","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R. J. (1992). Simple statistical gradient-following algorithms for connectionist reinforcement learning. Machine Learning, 8(3\u20134), 229\u2013256.","journal-title":"Machine Learning"},{"key":"5807_CR31","unstructured":"Wu, Y., Mansimov, E., Grosse, R. B., Liao, S., & Ba, J. (2017). Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation. In Advances in neural information processing systems (NIPS), pp. 5279\u20135288."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-019-05807-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-019-05807-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-019-05807-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,18]],"date-time":"2022-09-18T10:35:31Z","timestamp":1663497331000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-019-05807-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,20]]},"references-count":31,"journal-issue":{"issue":"8-9","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["5807"],"URL":"https:\/\/doi.org\/10.1007\/s10994-019-05807-0","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2019,5,20]]},"assertion":[{"value":"21 January 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 May 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 May 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}