{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:13:41Z","timestamp":1763190821603,"version":"3.45.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228248","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Quality-Diversity Driven Action Swarm Evolution in Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Liyao","family":"Sun","sequence":"first","affiliation":[{"name":"Fudan University,Academy for Engineering and Technology,Shanghai,China"}]},{"given":"Kang","family":"Xu","sequence":"additional","affiliation":[{"name":"Fudan University,Academy for Engineering and Technology,Shanghai,China"}]},{"given":"Yan","family":"Ma","sequence":"additional","affiliation":[{"name":"Fudan University,Academy for Engineering and Technology,Shanghai,China"}]},{"given":"Wei","family":"Li","sequence":"additional","affiliation":[{"name":"Fudan University,Academy for Engineering and Technology,Shanghai,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"key":"ref2","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International Conference on Machine Learning","author":"Fujimoto"},{"article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","year":"2018","author":"Haarnoja","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126628"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/tevc.2024.3443913"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3390\/math13050833"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i18.30079"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i12.29289"},{"article-title":"Value-evolutionary-based reinforcement learning","year":"2024","author":"Li","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3233\/faia230551"},{"article-title":"ERL-Re2: Efficient evolutionary reinforcement learning with shared state representation and individual policy representation","volume-title":"International Conference on Learning Representations","author":"HAO","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICNN.1995.488968"},{"key":"ref13","first-page":"487","article-title":"Interactions between learning and evolution","volume":"10","author":"Ackley","year":"1991","journal-title":"Artificial life II"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1613\/jair.613"},{"key":"ref15","first-page":"877","article-title":"Evolutionary function approximation for reinforcement learning","volume":"7","author":"Whiteson","year":"2006","journal-title":"Journal of Machine Learning Research"},{"article-title":"Evolutionary algorithms for reinforcement learning","year":"2011","author":"Grefenstette","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107817"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.120185"},{"article-title":"Genetic soft updates for policy evolution in deep reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Marchesini","key":"ref19"},{"key":"ref20","first-page":"3341","article-title":"Collaborative evolutionary reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Khadka"},{"article-title":"CEM-RL: Combining evolutionary and gradient-based methods for policy search","volume-title":"International Conference on Learning Representations","author":"Pourchot","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10479-005-5724-z"},{"key":"ref23","first-page":"267","article-title":"GRAC: Self-guided and self-regularized actor-critic","volume-title":"Conference on Robot Learning","author":"Shao"},{"key":"ref24","article-title":"Taco: Temporal latent action-driven contrastive loss for visual reinforcement learning","volume":"36","author":"Zheng","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Soft actor-critic with cross-entropy policy optimization","year":"2021","author":"Shi","key":"ref25"},{"key":"ref26","first-page":"2095","article-title":"Residual skill policies: Learning an adaptable skill-based action space for reinforcement learning for robotics","volume-title":"Conference on Robot Learning","author":"Rana"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2016.2525458"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.swevo.2021.100974"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-30111-7_49"},{"article-title":"QD-RL: Efficient mixing of quality and diversity in reinforcement learning","year":"2020","author":"Cideron","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3512290.3528845"},{"article-title":"Quality-similar diversity via population based reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Wu","key":"ref32"},{"article-title":"Proximal policy gradient arborescence for quality diversity reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Batra","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2019.8848046"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.119011"},{"article-title":"Never give up: Learning directed exploration strategies","volume-title":"International Conference on Learning Representations","author":"Badia","key":"ref37"},{"key":"ref38","first-page":"6131","article-title":"Sunrise: A simple unified framework for ensemble learning in deep reinforcement learning","volume-title":"International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"139","author":"Lee"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2025,6,30]]},"location":"Rome, Italy","end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228248.pdf?arnumber=11228248","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:11:51Z","timestamp":1763190711000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228248\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228248","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}