{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T17:02:02Z","timestamp":1779382922931,"version":"3.53.1"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T00:00:00Z","timestamp":1598918400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1109\/mlsp49062.2020.9231618","type":"proceedings-article","created":{"date-parts":[[2020,10,21]],"date-time":"2020-10-21T13:53:19Z","timestamp":1603288399000},"page":"1-6","source":"Crossref","is-referenced-by-count":26,"title":["PPO-CMA: Proximal Policy Optimization with Covariance Matrix Adaptation"],"prefix":"10.1109","author":[{"given":"Perttu","family":"Hamalainen","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Amin","family":"Babadi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaoxiao","family":"Ma","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jaakko","family":"Lehtinen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref10","author":"chua","year":"2018","journal-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models"},{"key":"ref11","author":"schulman","year":"2015","journal-title":"High-dimensional continuous control using generalized advantage estimation"},{"key":"ref12","article-title":"Advantage-weighted regression: Simple and scalable off-policy reinforcement learning","volume":"abs 1910 177","author":"peng","year":"2019","journal-title":"CoRR"},{"key":"ref13","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-32494-1_4"},{"key":"ref15","author":"loshchilov","year":"2017","journal-title":"Limited-memory matrix adaptation for large scale black-box optimization"},{"key":"ref16","author":"hansen","year":"2016","journal-title":"The CMA evolution strategy A tutorial"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-5724-z"},{"key":"ref18","volume":"2","author":"larra\u00f1aga","year":"2001","journal-title":"Estimation of Distribution Algorithms A New Tool for Evolutionary Computation"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1830761.1830788"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"ref3","author":"schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref6","first-page":"1","article-title":"Guided policy search","author":"levine","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref5","first-page":"1","article-title":"Intelli-gent middle-level game control","author":"babadi","year":"0","journal-title":"2018 IEEE Conference on Computational Intelligence and Games (CIG)"},{"key":"ref8","author":"abdolmaleki","year":"2018","journal-title":"Max-imum a posteriori policy optimisation"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2018.8430799"},{"key":"ref2","author":"peng","year":"2018","journal-title":"Deepmimic Example-guided deep reinforcement learning of physics-based character skills"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3355089.3356536"},{"key":"ref9","author":"abdolmaleki","year":"2018","journal-title":"Relative entropy regularized policy iteration"},{"key":"ref20","year":"2017","journal-title":"Roboschool"},{"key":"ref22","article-title":"Implementation matters in deep rl: A case study on ppo and trpo","author":"engstrom","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref21","author":"brockman","year":"2016","journal-title":"OpenAI Gym"}],"event":{"name":"2020 IEEE 30th International Workshop on Machine Learning for Signal Processing (MLSP)","location":"Espoo, Finland","start":{"date-parts":[[2020,9,21]]},"end":{"date-parts":[[2020,9,24]]}},"container-title":["2020 IEEE 30th International Workshop on Machine Learning for Signal Processing (MLSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9217888\/9231523\/09231618.pdf?arnumber=9231618","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T17:56:24Z","timestamp":1656438984000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9231618\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/mlsp49062.2020.9231618","relation":{},"subject":[],"published":{"date-parts":[[2020,9]]}}}