{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T18:30:34Z","timestamp":1773858634717,"version":"3.50.1"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icra46639.2022.9812341","type":"proceedings-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T19:36:40Z","timestamp":1657654600000},"page":"5525-5531","source":"Crossref","is-referenced-by-count":21,"title":["Enhancing Deep Reinforcement Learning Approaches for Multi-Robot Navigation via Single-Robot Evolutionary Policy Search"],"prefix":"10.1109","author":[{"given":"Enrico","family":"Marchesini","sequence":"first","affiliation":[{"name":"University of Verona,Department of Computer Science,Verona,Italy,37135"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alessandro","family":"Farinelli","sequence":"additional","affiliation":[{"name":"University of Verona,Department of Computer Science,Verona,Italy,37135"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","article-title":"Formal verification of neu-ral networks for safety-critical tasks in deep reinforcement learning","author":"corsi","year":"2021","journal-title":"UAI"},{"key":"ref32","article-title":"Unity: A platform for intelligent agents","author":"juliani","year":"2018","journal-title":"CoRR"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11796","article-title":"Rainbow: Combining improvements in drl","author":"hessel","year":"2018","journal-title":"AAAI"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197512"},{"key":"ref34","article-title":"Formal verifi-cation for safe deep reinforcement learning in trajectory generation","author":"corsi","year":"2020","journal-title":"IRC"},{"key":"ref10","article-title":"Weighted qmix: Expanding monotonic value function factorisation for deep multi -agent reinforcement learning","author":"rashid","year":"2020","journal-title":"NeurIPS"},{"key":"ref11","article-title":"Centralizing state values in dueling architectures for multiagent reinforcement learning navigation","author":"marchesini","year":"2021","journal-title":"IROS"},{"key":"ref12","article-title":"Discrete deep reinforcement learning for mapless navigation","year":"2020","journal-title":"ICRA"},{"key":"ref13","article-title":"Evolution-guided policy gradient in rein-forcement learning","author":"khadka","year":"2018","journal-title":"NeurIPS"},{"key":"ref14","article-title":"CEM-RL: Combining evolutionary and gradient-based methods for policy search","author":"pourchot","year":"2019","journal-title":"ICLRE"},{"key":"ref15","article-title":"Genetic deep reinforcement learning for mapless navigation","author":"marchesini","year":"2020","journal-title":"AAMAS"},{"key":"ref16","article-title":"Genetic soft updates for policy evolution in deep reinforcement learning","author":"marchesini","year":"2021","journal-title":"ICLRE"},{"key":"ref17","article-title":"Exploring safer behaviors for deep reinforcement learning","year":"2022","journal-title":"AAAI"},{"key":"ref18","article-title":"Learning for a robot: Deep reinforcement learning, imitation learning, transfer learning","year":"2021","journal-title":"SENSORS"},{"key":"ref19","article-title":"Emergence of grounded compositional language in multi -agent populations","author":"mordatch","year":"2017","journal-title":"ArXiv"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3205455.3205473"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v33i3.2426"},{"key":"ref27","article-title":"Toward a new philosophy of machine intelligence","author":"fogel","year":"2006","journal-title":"Evolutionary Computation"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-73424-6_2"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref5","article-title":"Multiagent actorcritic for mixed cooperative-competitive environments","author":"lowe","year":"2017","journal-title":"NIPS"},{"key":"ref8","article-title":"QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning","author":"rashid","year":"2018","journal-title":"ICML"},{"key":"ref7","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"sunehag","year":"2018","journal-title":"AAMAS"},{"key":"ref2","article-title":"Benchmarking aquatic navigation using deep reinforcement learning and formal verification","author":"marchesini","year":"2021","journal-title":"IROS"},{"key":"ref9","article-title":"QTRAN: learning to factorize with transformation for cooperative multi-agent reinforcement learning","author":"son","year":"2019","journal-title":"ICML"},{"key":"ref1","article-title":"Virtual-to-real drl: Continuous control of mobile robots for mapless navigation","author":"tai","year":"2017","journal-title":"IROS"},{"key":"ref20","article-title":"A concise introduction to decentralized pomdps","author":"oliehoek","year":"2016","journal-title":"Intell Syst Springer"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11798"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206049"},{"key":"ref24","article-title":"Reinforcement learning in large discrete action spaces","author":"dulac-arnold","year":"2015","journal-title":"CoRR"},{"key":"ref23","article-title":"Q-learning in enormous action spaces via amortized approximate maximization","author":"wiele","year":"2018","journal-title":"NeurIPS workshop"},{"key":"ref26","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2016","journal-title":"ICML"},{"key":"ref25","article-title":"MAVEN: multi-agent variational exploration","author":"mahajan","year":"2019","journal-title":"NeuIPS"}],"event":{"name":"2022 IEEE International Conference on Robotics and Automation (ICRA)","location":"Philadelphia, PA, USA","start":{"date-parts":[[2022,5,23]]},"end":{"date-parts":[[2022,5,27]]}},"container-title":["2022 International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9811522\/9811357\/09812341.pdf?arnumber=9812341","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T23:07:51Z","timestamp":1667516871000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9812341\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/icra46639.2022.9812341","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}