{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T13:11:30Z","timestamp":1775913090383,"version":"3.50.1"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Science and Technology Innovation 2030","award":["2021ZD0201405"],"award-info":[{"award-number":["2021ZD0201405"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,6,1]]},"DOI":"10.1109\/tpami.2022.3223872","type":"journal-article","created":{"date-parts":[[2022,11,24]],"date-time":"2022-11-24T19:57:43Z","timestamp":1669319863000},"page":"7258-7269","source":"Crossref","is-referenced-by-count":21,"title":["Curriculum-Based Asymmetric Multi-Task Reinforcement Learning"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5500-3311","authenticated-orcid":false,"given":"Hanchi","family":"Huang","sequence":"first","affiliation":[{"name":"Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1754-1837","authenticated-orcid":false,"given":"Deheng","family":"Ye","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5659-3464","authenticated-orcid":false,"given":"Li","family":"Shen","sequence":"additional","affiliation":[{"name":"JD.com Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3865-8145","authenticated-orcid":false,"given":"Wei","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/BF00117447"},{"key":"ref3","article-title":"Emergent complexity via multi-agent competition","author":"Bansal","year":"2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref6","article-title":"Sample complexity of multi-task reinforcement learning","author":"Brunskill","year":"2013"},{"key":"ref7","first-page":"794","article-title":"GradNorm: Gradient normalization for adaptive loss balancing in deep multitask networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chen"},{"key":"ref8","article-title":"BabyAI: A platform to study the sample efficiency of grounded language learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chevalier-Boisvert"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989250"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/tetci.2023.3268707"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/0010-0277(93)90058-4"},{"key":"ref12","first-page":"1407","article-title":"IMPALA: Scalable distributed deep-RL with importance weighted actor-learner architectures","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Espeholt"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-014-0841-6"},{"key":"ref14","first-page":"37","article-title":"A general iterative shrinkage and thresholding algorithm for non-convex regularized optimization problems","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gong"},{"key":"ref15","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref16","first-page":"7482","article-title":"Multi-task learning using uncertainty to weigh losses for scene geometry and semantics","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit.","author":"Kendall"},{"key":"ref17","first-page":"1008","article-title":"Actor-critic algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Konda"},{"key":"ref18","article-title":"Convergence rate of Frank-Wolfe for non-convex objectives","author":"Lacoste-Julien","year":"2016"},{"key":"ref19","first-page":"230","article-title":"Asymmetric multi-task learning based on task relatedness and loss","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref20","first-page":"2956","article-title":"Deep asymmetric multi-task feature learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1057\/9781137319739.0006"},{"key":"ref22","article-title":"Towards deep learning models resistant to adversarial attacks","author":"Madry","year":"2017"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-017-1188-6"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299188"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/671"},{"key":"ref26","article-title":"Atari reinforcement learning leaderboard","author":"Shenton","year":"2018"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8851784"},{"key":"ref28","first-page":"4496","article-title":"Distral: Robust multitask reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Teh"},{"key":"ref29","article-title":"You only learn one representation: Unified network for multiple tasks","author":"Wang","year":"2021"},{"key":"ref30","article-title":"Mastering rate based curriculum learning","author":"Willems","year":"2020"},{"key":"ref31","article-title":"Training agent for first-person shooter game with actor-critic curriculum learning","volume-title":"Proc. 5th Int. Conf. Learn. Representations","author":"Wu"},{"key":"ref32","article-title":"Multi-task reinforcement learning with soft modularization","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref33","first-page":"621","article-title":"Towards playing full moba games with deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ye"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"ref35","article-title":"Gradient surgery for multi-task learning","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref36","first-page":"1","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Yu"},{"key":"ref37","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Yu"},{"key":"ref38","article-title":"Transporter networks: Rearranging the visual world for robotic manipulation","author":"Zeng","year":"2020"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/tkde.2021.3070203"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2538028"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10120646\/09960813.pdf?arnumber=9960813","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T09:31:27Z","timestamp":1726047087000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9960813\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,1]]},"references-count":39,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3223872","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,1]]}}}