{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T11:45:37Z","timestamp":1767008737166,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,14]]},"DOI":"10.1145\/3712256.3726335","type":"proceedings-article","created":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T12:26:58Z","timestamp":1751977618000},"page":"1282-1290","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Scaling Policy Gradient Quality-Diversity with Massive Parallelization via Behavioral Variations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-1363-9777","authenticated-orcid":false,"given":"Konstantinos","family":"Mitsides","sequence":"first","affiliation":[{"name":"Imperial College London, London, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4743-9494","authenticated-orcid":false,"given":"Maxence","family":"Faldor","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3190-7073","authenticated-orcid":false,"given":"Antoine","family":"Cully","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2020.3046133"},{"key":"e_1_3_2_2_2_1","unstructured":"Sumeet Batra Bryon Tjanaka Matthew C. Fontaine Aleksei Petrenko Stefanos Nikolaidis and Gaurav Sukhatme. 2024. Proximal Policy Gradient Arborescence for Quality Diversity Reinforcement Learning. arXiv:2305.13795 [cs.LG] https:\/\/arxiv.org\/abs\/2305.13795"},{"key":"e_1_3_2_2_3_1","unstructured":"Felix Chalumeau Bryan Lim Raphael Boige Maxime Allard Luca Grillotti Manon Flageat Valentin Mac\u00e9 Arthur Flajolet Thomas Pierrot and Antoine Cully. 2023. QDax: A Library for Quality-Diversity and Population-based Algorithms with Hardware Acceleration. arXiv:2308.03665 [cs.AI] https:\/\/arxiv.org\/abs\/2308.03665"},{"key":"e_1_3_2_2_4_1","unstructured":"Felix Chalumeau Thomas Pierrot Valentin Mac\u00e9 Arthur Flajolet Karim Beguir Antoine Cully and Nicolas Perrin-Gilbert. 2023. Assessing Quality-Diversity Neuro-Evolution Algorithms Performance in Hard Exploration Problems. arXiv:2211.13742 [cs.NE] https:\/\/arxiv.org\/abs\/2211.13742"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Konstantinos Chatzilygeroudis Antoine Cully Vassilis Vassiliades and Jean-Baptiste Mouret. 2020. Quality-Diversity Optimization: a novel branch of stochastic optimization. arXiv:2012.04322 [cs.NE] https:\/\/arxiv.org\/abs\/2012.04322","DOI":"10.1007\/978-3-030-66515-9_4"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377930.3390217"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14422"},{"key":"e_1_3_2_2_8_1","unstructured":"Antoine Cully and Yiannis Demiris. 2017. Quality and Diversity Optimization: A Unifying Modular Framework. arXiv:1708.09251 [cs.NE] https:\/\/arxiv.org\/abs\/1708.09251"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03157-9"},{"key":"e_1_3_2_2_10_1","unstructured":"Aaron Grattafiori et al. 2024. The Llama 3 Herd of Models. arXiv:2407.21783 [cs.AI] https:\/\/arxiv.org\/abs\/2407.21783"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Maxence Faldor F\u00e9lix Chalumeau Manon Flageat and Antoine Cully. 2023. MAP-Elites with Descriptor-Conditioned Gradients and Archive Distillation into a Single Policy. arXiv:2303.03832 [cs.NE] https:\/\/arxiv.org\/abs\/2303.03832","DOI":"10.1145\/3583131.3590503"},{"key":"e_1_3_2_2_12_1","unstructured":"Maxence Faldor F\u00e9lix Chalumeau Manon Flageat and Antoine Cully. 2024. Synergizing Quality-Diversity with Descriptor-Conditioned Reinforcement Learning. arXiv:2401.08632 [cs.NE] https:\/\/arxiv.org\/abs\/2401.08632"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3638529.3654089"},{"key":"e_1_3_2_2_14_1","unstructured":"Manon Flageat Bryan Lim Luca Grillotti Maxime Allard Sim\u00f3n C. Smith and Antoine Cully. 2022. Benchmarking Quality-Diversity Algorithms on Neuroevolution for Reinforcement Learning. arXiv:2211.02193 [cs.NE] https:\/\/arxiv.org\/abs\/2211.02193"},{"key":"e_1_3_2_2_15_1","volume-title":"Fontaine and Stefanos Nikolaidis","author":"Matthew","year":"2021","unstructured":"Matthew C. Fontaine and Stefanos Nikolaidis. 2021. Differentiable Quality Diversity. CoRR abs\/2106.03894 (2021). arXiv:2106.03894 https:\/\/arxiv.org\/abs\/2106.03894"},{"key":"e_1_3_2_2_16_1","volume-title":"Fontaine and Stefanos Nikolaidis","author":"Matthew","year":"2023","unstructured":"Matthew C. Fontaine and Stefanos Nikolaidis. 2023. Covariance Matrix Adaptation MAP-Annealing. arXiv:2205.10752 [cs.LG] https:\/\/arxiv.org\/abs\/2205.10752"},{"key":"e_1_3_2_2_17_1","volume-title":"Hoover","author":"Fontaine Matthew C.","year":"2019","unstructured":"Matthew C. Fontaine, Julian Togelius, Stefanos Nikolaidis, and Amy K. Hoover. 2019. Covariance Matrix Adaptation for the Rapid Illumination of Behavior Space. CoRR abs\/1912.02400 (2019). arXiv:1912.02400 http:\/\/arxiv.org\/abs\/1912.02400"},{"key":"e_1_3_2_2_18_1","unstructured":"C. Daniel Freeman Erik Frey Anton Raichuk Sertan Girgin Igor Mordatch and Olivier Bachem. 2021. Brax - A Differentiable Physics Engine for Large Scale Rigid Body Simulation. http:\/\/github.com\/google\/brax"},{"key":"e_1_3_2_2_19_1","volume-title":"Addressing Function Approximation Error in Actor-Critic Methods. CoRR abs\/1802.09477","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke van Hoof, and David Meger. 2018. Addressing Function Approximation Error in Actor-Critic Methods. CoRR abs\/1802.09477 (2018). arXiv:1802.09477 http:\/\/arxiv.org\/abs\/1802.09477"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","unstructured":"Adam Gaier Alexander Asteroth and Jean-Baptiste Mouret. 2017. Aerodynamic Design Exploration through Surrogate-Assisted Illumination. 10.2514\/6.2017-3330","DOI":"10.2514\/6.2017-3330"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1162\/evco_a_00231"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3319619.3321897"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2019.8848053"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-32494-1_4"},{"key":"e_1_3_2_2_25_1","volume-title":"Advances in Neural Information Processing Systems","author":"Konda Vijay","year":"1999","unstructured":"Vijay Konda and John Tsitsiklis. 1999. Actor-Critic Algorithms. In Advances in Neural Information Processing Systems, S. Solla, T. Leen, and K. M\u00fcller (Eds.), Vol. 12. MIT Press. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/1999\/file\/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf"},{"key":"e_1_3_2_2_26_1","volume-title":"Accelerated Quality-Diversity for Robotics through Massive Parallelism. CoRR abs\/2202.01258","author":"Lim Bryan","year":"2022","unstructured":"Bryan Lim, Maxime Allard, Luca Grillotti, and Antoine Cully. 2022. Accelerated Quality-Diversity for Robotics through Massive Parallelism. CoRR abs\/2202.01258 (2022). arXiv:2202.01258 https:\/\/arxiv.org\/abs\/2202.01258"},{"key":"e_1_3_2_2_27_1","volume-title":"Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning. CoRR abs\/2108.10470","author":"Makoviychuk Viktor","year":"2021","unstructured":"Viktor Makoviychuk, Lukasz Wawrzyniak, Yunrong Guo, Michelle Lu, Kier Storey, Miles Macklin, David Hoeller, Nikita Rudin, Arthur Allshire, Ankur Handa, and Gavriel State. 2021. Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning. CoRR abs\/2108.10470 (2021). arXiv:2108.10470 https:\/\/arxiv.org\/abs\/2108.10470"},{"key":"e_1_3_2_2_28_1","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Alex Graves Ioannis Antonoglou Daan Wierstra and Martin Riedmiller. 2013. Playing Atari with Deep Reinforcement Learning. http:\/\/arxiv.org\/abs\/1312.5602 arXiv:1312.5602 [cs]."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_2_30_1","unstructured":"Jean-Baptiste Mouret and Jeff Clune. 2015. Illuminating search spaces by mapping elites. arXiv:1504.04909 [cs.AI] https:\/\/arxiv.org\/abs\/1504.04909"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3449639.3459304"},{"key":"e_1_3_2_2_32_1","volume-title":"DIVERSITY POLICY GRADIENT FOR SAMPLE EFFI-CIENT QUALITY-DIVERSITY OPTIMIZATION. In Workshop on Agent Learning in Open-Endedness (ALOE) at ICLR","author":"Pierrot Thomas","year":"2022","unstructured":"Thomas Pierrot, Valentin Mac\u00e9, Felix Chalumeau, Arthur Flajolet, Geoffrey Cideron, Karim Beguir, Antoine Cully, Olivier Sigaud, and Nicolas Perrin-Gilbert. 2022. DIVERSITY POLICY GRADIENT FOR SAMPLE EFFI-CIENT QUALITY-DIVERSITY OPTIMIZATION. In Workshop on Agent Learning in Open-Endedness (ALOE) at ICLR 2022. virtual, Vatican City. https:\/\/hal.science\/hal-03753541"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2016.00040"},{"key":"e_1_3_2_2_34_1","unstructured":"Tim Salimans Jonathan Ho Xi Chen Szymon Sidor and Ilya Sutskever. 2017. Evolution Strategies as a Scalable Alternative to Reinforcement Learning. arXiv:1703.03864 [stat.ML] https:\/\/arxiv.org\/abs\/1703.03864"},{"key":"e_1_3_2_2_35_1","volume-title":"Andrei Lupu, Eric Hambro, Aram H. Markosyan, Manish Bhatt, Yuning Mao, Minqi Jiang, Jack Parker-Holder, Jakob Foerster, Tim Rockt\u00e4schel, and Roberta Raileanu.","author":"Samvelyan Mikayel","year":"2024","unstructured":"Mikayel Samvelyan, Sharath Chandra Raparthy, Andrei Lupu, Eric Hambro, Aram H. Markosyan, Manish Bhatt, Yuning Mao, Minqi Jiang, Jack Parker-Holder, Jakob Foerster, Tim Rockt\u00e4schel, and Roberta Raileanu. 2024. Rainbow Teaming: Open-Ended Generation of Diverse Adversarial Prompts. arXiv:2402.16822 [cs.CL] https:\/\/arxiv.org\/abs\/2402.16822"},{"key":"e_1_3_2_2_36_1","volume-title":"Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347 (2017). arXiv:1707.06347 http:\/\/arxiv.org\/abs\/1707.06347"},{"key":"e_1_3_2_2_37_1","volume-title":"Advances in Neural Information Processing Systems","author":"Sutton Richard S","year":"1999","unstructured":"Richard S Sutton, David McAllester, Satinder Singh, and Yishay Mansour. 1999. Policy Gradient Methods for Reinforcement Learning with Function Approximation. In Advances in Neural Information Processing Systems, S. Solla, T. Leen, and K. M\u00fcller (Eds.), Vol. 12. MIT Press. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/1999\/file\/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"Bryon Tjanaka Matthew C. Fontaine David H. Lee Aniruddha Kalkar and Stefanos Nikolaidis. 2023. Training Diverse High-Dimensional Controllers by Scaling Covariance Matrix Adaptation MAP-Annealing. arXiv:2210.02622 [cs.RO] https:\/\/arxiv.org\/abs\/2210.02622","DOI":"10.1109\/LRA.2023.3313012"},{"key":"e_1_3_2_2_39_1","volume-title":"Approximating Gradients for Differentiable Quality Diversity in Reinforcement Learning. CoRR abs\/2202.03666","author":"Tjanaka Bryon","year":"2022","unstructured":"Bryon Tjanaka, Matthew C. Fontaine, Julian Togelius, and Stefanos Nikolaidis. 2022. Approximating Gradients for Differentiable Quality Diversity in Reinforcement Learning. CoRR abs\/2202.03666 (2022). arXiv:2202.03666 https:\/\/arxiv.org\/abs\/2202.03666"},{"key":"e_1_3_2_2_40_1","volume-title":"Tristan Deleu, Manuel Goul\u00e3o, Andreas Kallinteris, Markus Krimmel, Arjun KG, et al.","author":"Towers Mark","year":"2024","unstructured":"Mark Towers, Ariel Kwiatkowski, Jordan Terry, John U Balis, Gianluca De Cola, Tristan Deleu, Manuel Goul\u00e3o, Andreas Kallinteris, Markus Krimmel, Arjun KG, et al. 2024. Gymnasium: A Standard Interface for Reinforcement Learning Environments. arXiv preprint arXiv:2407.17032 (2024)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3205455.3205602"},{"key":"e_1_3_2_2_42_1","unstructured":"Daan Wierstra Tom Schaul Tobias Glasmachers Yi Sun and J\u00fcrgen Schmidhuber. 2011. Natural Evolution Strategies. arXiv:1106.4487 [stat.ML] https:\/\/arxiv.org\/abs\/1106.4487"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"}],"event":{"name":"GECCO '25: Genetic and Evolutionary Computation Conference","sponsor":["SIGEVO ACM Special Interest Group on Genetic and Evolutionary Computation"],"location":"NH Malaga Hotel Malaga Spain","acronym":"GECCO '25"},"container-title":["Proceedings of the Genetic and Evolutionary Computation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712256.3726335","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T20:41:38Z","timestamp":1759869698000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712256.3726335"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":43,"alternative-id":["10.1145\/3712256.3726335","10.1145\/3712256"],"URL":"https:\/\/doi.org\/10.1145\/3712256.3726335","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}