{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T08:00:06Z","timestamp":1780732806246,"version":"3.54.1"},"reference-count":36,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&amp;D Program of China","award":["2021ZD0112703"],"award-info":[{"award-number":["2021ZD0112703"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1109\/lra.2025.3579647","type":"journal-article","created":{"date-parts":[[2025,6,12]],"date-time":"2025-06-12T17:44:26Z","timestamp":1749750266000},"page":"7843-7850","source":"Crossref","is-referenced-by-count":5,"title":["Reinforcement Learning for Multi-Agent Path Finding in Large-Scale Warehouses via Distributed Policy Evolution"],"prefix":"10.1109","volume":"10","author":[{"given":"Qinru","family":"Shi","sequence":"first","affiliation":[{"name":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0693-6574","authenticated-orcid":false,"given":"Meiqin","family":"Liu","sequence":"additional","affiliation":[{"name":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5117-3110","authenticated-orcid":false,"given":"Senlin","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Electrical Engineering, Zhejiang University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3422-944X","authenticated-orcid":false,"given":"Xuguang","family":"Lan","sequence":"additional","affiliation":[{"name":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/socs.v10i1.18510"},{"key":"ref2","first-page":"1711","article-title":"Research challenges and opportunities in multi-agent path finding and multi-agent pickup and delivery problems","volume-title":"Proc. AAMAS","volume":"20","author":"Salzman"},{"key":"ref3","first-page":"2077","article-title":"Learning to cooperate: Application of deep reinforcement learning for online AGV path finding","volume-title":"Proc. 19th Int. Conf. Auton. Agents Multiagent Syst.","author":"Zhang","year":"2020"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1002\/aaai.12069"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v27i1.8541"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v24i1.7564"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2014.11.001"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636683"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2014.11.006"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CAC.2018.8623483"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3292004"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2903261"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3062803"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340876"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3264540"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560748"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3139145"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342305"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812341"},{"key":"ref20","first-page":"1196","article-title":"Evolution-guided policy gradient in reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Khadka","year":"2018"},{"key":"ref21","first-page":"6542","article-title":"CEM-RL: Combining evolutionary and gradient-based methods for policy search","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Pourchot","year":"2019"},{"key":"ref22","first-page":"3341","article-title":"Collaborative evolutionary reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Khadka","year":"2019"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5728"},{"key":"ref24","article-title":"Genetic soft updates for policy evolution in deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Marchesini","year":"2021"},{"key":"ref25","article-title":"ERL-Re$^{2}$: Efficient evolutionary reinforcement learning with shared state representation and individual policy representation","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Jianye","year":"2022"},{"key":"ref26","article-title":"An image is worth 16  16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"ref27","first-page":"3666","article-title":"Graph convolutional reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jiang","year":"2020"},{"key":"ref28","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2016"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref31","first-page":"3864","article-title":"Distributed prioritized experience replay","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Horgan","year":"2018"},{"key":"ref32","article-title":"Evolution strategies as a scalable alternative to reinforcement learning","author":"Salimans","year":"2017"},{"key":"ref33","first-page":"1237","article-title":"Off-policy evolutionary reinforcement learning with maximum mutations","volume-title":"Proc. 21st Int. Conf. Auton. Agents Multiagent Syst.","author":"Suri","year":"2022"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6221"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-13844-7_47"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6631119"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11045364\/11034721.pdf?arnumber=11034721","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T07:08:32Z","timestamp":1750748912000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11034721\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8]]},"references-count":36,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3579647","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8]]}}}