{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T21:54:20Z","timestamp":1774302860379,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":7,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,7,7]],"date-time":"2021-07-07T00:00:00Z","timestamp":1625616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001665","name":"Agence Nationale de la Recherche","doi-asserted-by":"publisher","award":["ANR-18-CE33-0006"],"award-info":[{"award-number":["ANR-18-CE33-0006"]}],"id":[{"id":"10.13039\/501100001665","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,7,7]]},"DOI":"10.1145\/3449726.3459462","type":"proceedings-article","created":{"date-parts":[[2021,7,8]],"date-time":"2021-07-08T14:50:12Z","timestamp":1625755812000},"page":"97-98","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Reinforcement learning with rare significant events"],"prefix":"10.1145","author":[{"given":"Paul","family":"Ecoffet","sequence":"first","affiliation":[{"name":"Sorbonne Universit\u00e9, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nicolas","family":"Fontbonne","sequence":"additional","affiliation":[{"name":"Sorbonne Universit\u00e9, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jean-Baptiste","family":"Andr\u00e9","sequence":"additional","affiliation":[{"name":"\u00c9cole Normale Sup\u00e9rieure, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nicolas","family":"Bredeche","sequence":"additional","affiliation":[{"name":"Sorbonne Universit\u00e9, Paris, France"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,7,8]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"1937","article-title":"A simulation-based algorithm for ergodic control of Markov chains conditioned on rare events","author":"Bhatnagar Shalabh","year":"2006","unstructured":"Shalabh Bhatnagar , Vivek S Borkar , and Madhukar Akarapu . 2006 . A simulation-based algorithm for ergodic control of Markov chains conditioned on rare events . Journal of Machine Learning Research 7 , Oct (2006), 1937 -- 1962 . Shalabh Bhatnagar, Vivek S Borkar, and Madhukar Akarapu. 2006. A simulation-based algorithm for ergodic control of Markov chains conditioned on rare events. Journal of Machine Learning Research 7, Oct (2006), 1937--1962.","journal-title":"Journal of Machine Learning Research 7"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10810"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning, Jennifer Dy and Andreas Krause (Eds.)","volume":"80","author":"Du Simon","year":"2018","unstructured":"Simon Du and Jason Lee . 2018 . On the Power of Over-parametrization in Neural Networks with Quadratic Activation . In Proceedings of the 35th International Conference on Machine Learning, Jennifer Dy and Andreas Krause (Eds.) , Vol. 80 . PMLR, 1329--1338. Simon Du and Jason Lee. 2018. On the Power of Over-parametrization in Neural Networks with Quadratic Activation. In Proceedings of the 35th International Conference on Machine Learning, Jennifer Dy and Andreas Krause (Eds.), Vol. 80. PMLR, 1329--1338."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Paul Ecoffet Nicolas Fontbonne Jean-Baptiste Andr\u00e9 and Nicolas Bredeche. 2021. Policy Search with Rare Significant Events: Choosing the Right Partner to Cooperate with. (2021). arXiv:cs.LG\/2103.06846  Paul Ecoffet Nicolas Fontbonne Jean-Baptiste Andr\u00e9 and Nicolas Bredeche. 2021. Policy Search with Rare Significant Events: Choosing the Right Partner to Cooperate with. (2021). arXiv:cs.LG\/2103.06846","DOI":"10.1145\/3449726.3459462"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390199"},{"key":"e_1_3_2_1_6_1","volume-title":"Completely derandomized self-adaptation in evolution strategies. Evolutionary computation 9, 2","author":"Hansen Nikolaus","year":"2001","unstructured":"Nikolaus Hansen and Andreas Ostermeier . 2001. Completely derandomized self-adaptation in evolution strategies. Evolutionary computation 9, 2 ( 2001 ), 159--195. Nikolaus Hansen and Andreas Ostermeier. 2001. Completely derandomized self-adaptation in evolution strategies. Evolutionary computation 9, 2 (2001), 159--195."},{"key":"e_1_3_2_1_7_1","volume-title":"Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman , Filip Wolski , Prafulla Dhariwal , Alec Radford , and Oleg Klimov . 2017. Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347 ( 2017 ). arXiv:1707.06347 John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347 (2017). arXiv:1707.06347"}],"event":{"name":"GECCO '21: Genetic and Evolutionary Computation Conference","location":"Lille France","acronym":"GECCO '21","sponsor":["SIGEVO ACM Special Interest Group on Genetic and Evolutionary Computation"]},"container-title":["Proceedings of the Genetic and Evolutionary Computation Conference Companion"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3449726.3459462","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3449726.3459462","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:17:37Z","timestamp":1750191457000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3449726.3459462"}},"subtitle":["direct policy search vs. gradient policy search"],"short-title":[],"issued":{"date-parts":[[2021,7,7]]},"references-count":7,"alternative-id":["10.1145\/3449726.3459462","10.1145\/3449726"],"URL":"https:\/\/doi.org\/10.1145\/3449726.3459462","relation":{},"subject":[],"published":{"date-parts":[[2021,7,7]]},"assertion":[{"value":"2021-07-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}