{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T22:29:21Z","timestamp":1777501761134,"version":"3.51.4"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001321","name":"Guangdong Basic and Applied Basic Research Foundation","doi-asserted-by":"publisher","award":["2023A1515140071"],"award-info":[{"award-number":["2023A1515140071"]}],"id":[{"id":"10.13039\/501100001321","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62101029"],"award-info":[{"award-number":["62101029"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","award":["202006465043"],"award-info":[{"award-number":["202006465043"]}],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","award":["202306460078"],"award-info":[{"award-number":["202306460078"]}],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Syst. Man Cybern, Syst."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tsmc.2025.3646451","type":"journal-article","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T22:02:09Z","timestamp":1768255329000},"page":"1203-1215","source":"Crossref","is-referenced-by-count":1,"title":["Subgoal-Based Hierarchical Reinforcement Learning for Multiagent Collaboration"],"prefix":"10.1109","volume":"56","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1624-5494","authenticated-orcid":false,"given":"Cheng","family":"Xu","sequence":"first","affiliation":[{"name":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuchen","family":"Shi","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changtian","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9530-8838","authenticated-orcid":false,"given":"Ran","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8162-4269","authenticated-orcid":false,"given":"Shihong","family":"Duan","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yadong","family":"Wan","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7600-7231","authenticated-orcid":false,"given":"Xiaotong","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3236361"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3318070"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161227"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120495"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106369"},{"key":"ref6","first-page":"10199","article-title":"Weighted QMIX: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Rashid"},{"key":"ref7","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son"},{"key":"ref8","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative, multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref9","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref10","article-title":"Density-based curriculum for multi-goal reinforcement learning with sparse rewards","author":"Yang","year":"2021","journal-title":"arXiv:2109.08903"},{"key":"ref11","first-page":"3757","article-title":"Episodic multi-agent reinforcement learning with curiosity-driven exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zheng"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref13","first-page":"5048","article-title":"Hindsight experience replay","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","volume":"30","author":"Andrychowicz"},{"key":"ref14","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schaul"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i10.26386"},{"key":"ref16","first-page":"1","article-title":"Exploration by random network distillation","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Burda"},{"key":"ref17","first-page":"1258","article-title":"Never give up: Learning directed exploration strategies","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Badia"},{"key":"ref18","first-page":"53146","article-title":"Pomdp planning for object search in partially unknown environment","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s12065-022-00703-4"},{"issue":"178","key":"ref20","first-page":"1","article-title":"F2A2: Flexible fully-decentralized approximate actor-critic for cooperative multi-agent reinforcement learning","volume":"24","author":"Li","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.3390\/drones7030193"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref23","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Lowe"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.65109\/JSRC7365"},{"issue":"178","key":"ref25","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref26","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"Schulman","year":"2015","journal-title":"arXiv:1506.02438"},{"key":"ref27","article-title":"Probabilistic subgoal representations for hierarchical reinforcement learning","author":"Huiling Wang","year":"2024","journal-title":"arXiv:2406.16707"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-024-11632-x"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2437"},{"key":"ref30","article-title":"Incorporating spatial information into goal-conditioned hierarchical reinforcement learning via graph representations","author":"Zhang","year":"2025","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref31","first-page":"3307","article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Nachum"},{"key":"ref32","first-page":"5729","article-title":"Active hierarchical exploration with stable subgoal representation learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Li"},{"key":"ref33","first-page":"21579","article-title":"Generating adjacency-constrained subgoals in hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"issue":"42","key":"ref34","first-page":"1239","article-title":"Incorporating functional knowledge in neural networks","volume":"10","author":"Dugas","year":"2009","journal-title":"J. Mach. Learn. Res."},{"key":"ref35","first-page":"10494","article-title":"Hierarchies of reward machines","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Furelos-Blanco"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.65109\/LVZZ5205"},{"key":"ref37","first-page":"15032","article-title":"PettingZoo: Gym for multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Terry"}],"container-title":["IEEE Transactions on Systems, Man, and Cybernetics: Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6221021\/11372522\/11333884.pdf?arnumber=11333884","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T21:20:45Z","timestamp":1771449645000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11333884\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":37,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tsmc.2025.3646451","relation":{},"ISSN":["2168-2216","2168-2232"],"issn-type":[{"value":"2168-2216","type":"print"},{"value":"2168-2232","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}