{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T06:36:43Z","timestamp":1768459003283,"version":"3.49.0"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62503356"],"award-info":[{"award-number":["62503356"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Tianjin Postdoctoral Innovation Post Funding Project","award":["2024072042"],"award-info":[{"award-number":["2024072042"]}]},{"name":"China Higher Education Institution Industry-University-Research Innovation Fund","award":["2024ZY009"],"award-info":[{"award-number":["2024ZY009"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Contr. Syst. Technol."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1109\/tcst.2025.3619986","type":"journal-article","created":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T18:01:41Z","timestamp":1761242501000},"page":"304-317","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Exploration in Large State-Action Space Through Structured Action Space for Learning Multirobots Motion Planning"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1055-9513","authenticated-orcid":false,"given":"Chaoxu","family":"Mu","sequence":"first","affiliation":[{"name":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8558-222X","authenticated-orcid":false,"given":"Zewu","family":"Jiang","sequence":"additional","affiliation":[{"name":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1042-5421","authenticated-orcid":false,"given":"Junnan","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8306-1663","authenticated-orcid":false,"given":"Ke","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3238-745X","authenticated-orcid":false,"given":"Xin","family":"Xu","sequence":"additional","affiliation":[{"name":"College of Intelligence Science and Technology, National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9154-6620","authenticated-orcid":false,"given":"Jun","family":"Yi","sequence":"additional","affiliation":[{"name":"College of Electronic and Information Engineering, Chongqing University of Science and Technology, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s12599-014-0334-4"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1002\/rcs.408"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s11214-018-0520-7"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2005.165"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2007.11.009"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2009.5354448"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919890396"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/robot.2000.844730"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/70.508439"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2012.6232212"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2012.6225063"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2014.6957887"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1177\/027836498600500106"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref15","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Represent. (ICLR)","author":"Lillicrap"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1126\/science.aat8414"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3390\/math9212752"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793889"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.3006716"},{"key":"ref20","first-page":"103","article-title":"Learning a decentralized multi-arm motion planner","volume-title":"Proc. 4th Conf. Robot Learn. (CoRL)","volume":"155","author":"Ha"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3236361"},{"key":"ref22","first-page":"1","article-title":"Deep reinforcement learning in parameterized action space","volume-title":"Proc. 4th Int. Conf. Learn. Represent. (ICLR)","author":"Hausknecht"},{"key":"ref23","first-page":"941","article-title":"Learning action representations for reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn. (ICML).","author":"Chandak"},{"key":"ref24","first-page":"1","article-title":"HyAR: Addressing discrete-continuous action reinforcement learning via hybrid action representation","volume-title":"Proc. 10th Int. Conf. Learn. Represent. (ICLR)","author":"Li"},{"key":"ref25","first-page":"3040","article-title":"Growing action spaces","volume-title":"Proc. 37th Int. Conf. Mach. Learn. (ICML).","volume":"1","author":"Farquhar"},{"key":"ref26","volume-title":"Parametrized Deep Q-Networks Learning: Reinforcement Learning With Discrete-Continuous Hybrid Action Space","author":"Xiong et al","year":"2018"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/323"},{"key":"ref28","first-page":"943","article-title":"A Bayesian framework for reinforcement learning","volume-title":"Proc. 17th Int. Conf. Mach. Learn. (ICML)","author":"Strens"},{"key":"ref29","first-page":"8626","article-title":"Randomized prior functions for deep reinforcement learning","volume-title":"Proc. 31st Adv. Neural Inf. Process. Syst. (NeurIPS).","volume":"31","author":"Osband"},{"key":"ref30","first-page":"1","article-title":"Exploration by random network distillation","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Burda"},{"key":"ref31","first-page":"1","article-title":"Contingency-aware exploration in reinforcement learning","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Choi"},{"key":"ref32","first-page":"1","article-title":"Never give up: Learning directed exploration strategies","volume-title":"Proc. 8th Int. Conf. Learn. Representations (ICLR).","author":"Badia"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CoG47356.2020.9231562"},{"key":"ref34","first-page":"3836","article-title":"The uncertainty Bellman equation and exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"O\u2019Donoghue"},{"key":"ref35","article-title":"Go-explore: A new approach for hard-exploration problems","author":"Ecoffet","year":"2019","journal-title":"arXiv:1901.10995"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03157-9"},{"key":"ref37","first-page":"1","article-title":"Recurrent experience replay in distributed reinforcement learning","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Kapturowski"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9635857"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126620"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.05.006"}],"container-title":["IEEE Transactions on Control Systems Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/87\/11339250\/11215873.pdf?arnumber=11215873","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T22:04:52Z","timestamp":1768255492000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11215873\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":40,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tcst.2025.3619986","relation":{},"ISSN":["1063-6536","1558-0865","2374-0159"],"issn-type":[{"value":"1063-6536","type":"print"},{"value":"1558-0865","type":"electronic"},{"value":"2374-0159","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]}}}