{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T03:50:05Z","timestamp":1768276205734,"version":"3.49.0"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62322316"],"award-info":[{"award-number":["62322316"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005090","name":"Beijing Nova Program","doi-asserted-by":"publisher","award":["20220484077"],"award-info":[{"award-number":["20220484077"]}],"id":[{"id":"10.13039\/501100005090","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005090","name":"Beijing Nova Program","doi-asserted-by":"publisher","award":["20230484435"],"award-info":[{"award-number":["20230484435"]}],"id":[{"id":"10.13039\/501100005090","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cogn. Dev. Syst."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tcds.2025.3543694","type":"journal-article","created":{"date-parts":[[2025,2,19]],"date-time":"2025-02-19T14:03:33Z","timestamp":1739973813000},"page":"1110-1124","source":"Crossref","is-referenced-by-count":4,"title":["Efficient Multitask Reinforcement Learning via Task-Specific Action Correction"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-8048-1112","authenticated-orcid":false,"given":"Jinyuan","family":"Feng","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1938-2603","authenticated-orcid":false,"given":"Min","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4841-4048","authenticated-orcid":false,"given":"Zhiqiang","family":"Pu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0312-5728","authenticated-orcid":false,"given":"Tenghai","family":"Qiu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3268-9482","authenticated-orcid":false,"given":"Jianqiang","family":"Yi","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5348-7671","authenticated-orcid":false,"given":"Jie","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Bath, Bath, U.K."}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2018.2840971"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2020.3030571"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref5","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref7","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989249"},{"key":"ref10","first-page":"15371","article-title":"Robust subtask learning for compositional generalization","volume-title":"Int. Conf. Mach. Learn.","author":"Jothimurugan","year":"2023"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3139667"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3246107"},{"key":"ref13","article-title":"An overview of multi-task learning in deep neural networks","author":"Ruder","year":"2017"},{"key":"ref14","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume":"33","author":"Yu","year":"2020","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3345735"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2024.3404061"},{"key":"ref17","first-page":"4344","article-title":"Learning by playing solving sparse reward tasks from scratch","volume-title":"Int. Conf. Mach. Learn.","author":"Riedmiller","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2021.10.011"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/RCAR58764.2023.10249999"},{"key":"ref20","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2017"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.cedpsych.2019.101832"},{"key":"ref22","first-page":"2608","article-title":"Towards safe reinforcement learning with a safety editor policy","volume":"35","author":"Yu","year":"2022","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref23","first-page":"15338","article-title":"First order constrained optimization in policy space","volume":"33","author":"Zhang","year":"2020","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref24","first-page":"8682","article-title":"Density constrained reinforcement learning","volume-title":"Int. Conf. Mach. Learn..","author":"Qin","year":"2021"},{"key":"ref25","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref26","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Yu","year":"2020"},{"key":"ref27","article-title":"Policy distillation","author":"Rusu","year":"2015"},{"key":"ref28","article-title":"Distral: Robust multitask reinforcement learning","volume":"30","author":"Teh","year":"2017","journal-title":"Adv. Inf. Process. Syst."},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3223872"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1057\/9781137319739.0006"},{"key":"ref31","first-page":"27503","article-title":"Efficiently identifying task groupings for multi-task learning","volume":"34","author":"Fifty","year":"2021","journal-title":"Advances in Neural Inf. Process. Syst."},{"key":"ref32","first-page":"11501","article-title":"Conservative data sharing for multi-task offline reinforcement learning","volume":"34","author":"Yu","year":"2021","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref33","first-page":"25611","article-title":"How to leverage unlabeled data in offline reinforcement learning","volume-title":"Int. Conf. Mach. Learn.","author":"Yu","year":"2022"},{"key":"ref34","first-page":"21495","article-title":"Paco: Parameter-compositional multi-task reinforcement learning","volume":"35","author":"Sun","year":"2022","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref35","first-page":"4767","article-title":"Multi-task reinforcement learning with soft modularization","volume":"33","author":"Yang","year":"2020","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i11.29129"},{"key":"ref37","first-page":"8728","article-title":"Adashare: Learning what to share for efficient deep multi-task learning","volume":"33","author":"Sun","year":"2020","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref38","first-page":"3854","article-title":"Learning to branch for multi-task learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Guo","year":"2020"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-46133-1_9"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3271445"},{"key":"ref41","article-title":"Multi-task reinforcement learning with mixture of orthogonal experts","author":"Hendawy","year":"2023"},{"key":"ref42","first-page":"9767","article-title":"Multi-task reinforcement learning with context-based representations","volume-title":"Int. Conf. Mach. Learn.","author":"Sodhani","year":"2021"},{"key":"ref43","first-page":"18878","article-title":"Conflict-averse gradient descent for multi-task learning","volume":"34","author":"Liu","year":"2021","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref44","article-title":"Mt-opt: Continuous multi-task robotic reinforcement learning at scale","author":"Kalashnikov","year":"2021"},{"key":"ref45","first-page":"557","article-title":"Scaling up multi-task robotic reinforcement learning","volume-title":"Conf. Robot Learn.","author":"Kalashnikov","year":"2022"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01712"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3338241"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2022.3221805"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2022.3187186"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3315513"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/springerreference_5781"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/203330.203343"},{"key":"ref53","article-title":"The starcraft multi-agent challenge","author":"Samvelyan","year":"2019"},{"key":"ref54","first-page":"78514","article-title":"Decompose a task into generalizable subtasks in multi-agent reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Tian","year":"2023"},{"key":"ref55","article-title":"Updet: Universal multi-agent reinforcement learning via policy decoupling with transformers","author":"Hu","year":"2021"},{"key":"ref56","article-title":"Action semantics network: Considering the effects of actions in multiagent systems","volume-title":"Proc. 8th Int. Conf. Learn. Represent. ICLR 2020, Addis Ababa","author":"Wang","year":"2020"}],"container-title":["IEEE Transactions on Cognitive and Developmental Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7274989\/11197738\/10892354.pdf?arnumber=10892354","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T18:33:05Z","timestamp":1765909985000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10892354\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":56,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tcds.2025.3543694","relation":{},"ISSN":["2379-8920","2379-8939"],"issn-type":[{"value":"2379-8920","type":"print"},{"value":"2379-8939","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}