{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:52:12Z","timestamp":1764960732291,"version":"3.46.0"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62103451","61973327","62373386"],"award-info":[{"award-number":["62103451","61973327","62373386"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20220530145209021"],"award-info":[{"award-number":["JCYJ20220530145209021"]}]},{"name":"Industry-University-Research Fund Project through the Ministry of Education of China","award":["2021ZYA02017"],"award-info":[{"award-number":["2021ZYA02017"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1109\/tnnls.2023.3322591","type":"journal-article","created":{"date-parts":[[2023,11,7]],"date-time":"2023-11-07T14:12:02Z","timestamp":1699366322000},"page":"1272-1286","source":"Crossref","is-referenced-by-count":1,"title":["EASpace: Enhanced Action Space for Policy Transfer"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6674-6495","authenticated-orcid":false,"given":"Zheng","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Aeronautics and Astronautics, Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1733-159X","authenticated-orcid":false,"given":"Qingrui","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Aeronautics and Astronautics, Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9817-3855","authenticated-orcid":false,"given":"Bo","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Aeronautics and Astronautics, Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8493-8773","authenticated-orcid":false,"given":"Xiaohan","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Aeronautics and Astronautics, Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0587-6752","authenticated-orcid":false,"given":"Tianjiang","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Aeronautics and Astronautics, Sun Yat-sen University, Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.3004555"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2023.3292075"},{"key":"ref3","first-page":"468","article-title":"Measuring the distance between finite Markov decision processes","volume-title":"Proc. Int. Conf. Auto. Agents Multiagent Syst.","author":"Song"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10933"},{"key":"ref5","first-page":"1026","article-title":"Object-oriented curriculum generation for reinforcement learning","volume-title":"Proc. Int. Conf. Auto. Agents MultiAgent Syst.","author":"Silva"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1160633.1160762"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11718"},{"key":"ref8","article-title":"Policy distillation","author":"Rusu","year":"2015","journal-title":"arXiv:1511.06295"},{"key":"ref9","first-page":"4499","article-title":"Distral: Robust multitask reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Teh"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2806087"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref12","first-page":"989","article-title":"Context-aware policy reuse","volume-title":"Proc. Int. Conf. Auto. Agents Multiagent Syst.","author":"Li"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/428"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"ref16","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Vezhnevets"},{"key":"ref17","first-page":"3307","article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Nachum"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3086033"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10733"},{"key":"ref20","article-title":"Actor-mimic: Deep multitask and transfer reinforcement learning","author":"Parisotto","year":"2015","journal-title":"arXiv:1511.06342"},{"key":"ref21","first-page":"4058","article-title":"Successor features for transfer in reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Barreto"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s11768-011-1005-3"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44851-9_35"},{"key":"ref24","first-page":"3380","article-title":"Direct policy iteration with demonstrations","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Chemali"},{"key":"ref25","first-page":"3352","article-title":"Reinforcement learning from demonstration through shaping","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Brys"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3004893"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-019-01073-3"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2021.3094207"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3128666"},{"key":"ref30","first-page":"2012","article-title":"DAC: The double actor-critic architecture for learning options","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Zhang"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3059912"},{"key":"ref32","first-page":"271","article-title":"Feudal reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"5","author":"Dayan"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105152"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2018.11.072"},{"key":"ref36","first-page":"3682","article-title":"Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Kulkarni"},{"key":"ref37","first-page":"968","article-title":"Hierarchical reinforcement learning with hindsight","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Levy"},{"key":"ref38","article-title":"Hierarchical deep multiagent reinforcement learning with temporal abstraction","author":"Tang","year":"2018","journal-title":"arXiv:1809.09332"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3087733"},{"key":"ref40","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schaul"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref42","first-page":"393","article-title":"Reinforcement learning methods for continuous-time Markov decision problems","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"7","author":"Bradtke"},{"key":"ref43","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wiewiora"},{"key":"ref44","first-page":"15","article-title":"Roles of macro-actions in accelerating reinforcement learning","volume":"1317","author":"McGovern","year":"1997","journal-title":"Grace Hopper Celebration of Women in Computing"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.21236\/ada276517"},{"key":"ref46","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proc. ICML","volume":"99","author":"Ng"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1088\/1367-2630\/aa69e7"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812083"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.1991.131810"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1023\/A:1020564024509"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"ref52","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3091680"},{"issue":"1","key":"ref55","first-page":"7382","article-title":"Curriculum learning for reinforcement learning domains: A framework and survey","volume":"21","author":"Narvekar","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1080\/0952813X.2015.1020517"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1002\/oca.2156"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10832116\/10310287.pdf?arnumber=10310287","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:38:56Z","timestamp":1764959936000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10310287\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":57,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3322591","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2025,1]]}}}