{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T15:58:12Z","timestamp":1774627092054,"version":"3.50.1"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001659","name":"German Research Foundation DFG under Project CML","doi-asserted-by":"publisher","award":["TRR 169"],"award-info":[{"award-number":["TRR 169"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cogn. Dev. Syst."],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1109\/tcds.2023.3315513","type":"journal-article","created":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T18:01:03Z","timestamp":1694714463000},"page":"947-960","source":"Crossref","is-referenced-by-count":16,"title":["Continual Robot Learning Using Self-Supervised Task Inference"],"prefix":"10.1109","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1670-8962","authenticated-orcid":false,"given":"Muhammad Burhan","family":"Hafez","sequence":"first","affiliation":[{"name":"Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1343-4775","authenticated-orcid":false,"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[{"name":"Department of Informatics, Knowledge Technology Group, University of Hamburg, Hamburg, Germany"}]}],"member":"263","reference":[{"issue":"1","key":"ref1","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref2","article-title":"Solving rubik\u2019s cube with a robot hand","author":"Akkaya","year":"2019","journal-title":"arXiv:1910.07113"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.011"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref5","first-page":"1","article-title":"Policy distillation","volume-title":"Proc. ICLR","author":"Rusu"},{"key":"ref6","first-page":"4499","article-title":"Distral: Robust multitask reinforcement learning","volume-title":"Proc. Conf. Neural Inf. Process. Syst. (NeurIPS)","author":"Teh"},{"key":"ref7","first-page":"6920","article-title":"Teachable reinforcement learning via advice distillation","volume-title":"Proc. Conf. Neural Inf. Process. Syst. (NeurIPS)","author":"Watkins"},{"key":"ref8","first-page":"9653","article-title":"Guided meta-policy search","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Mendonca"},{"key":"ref9","first-page":"1","article-title":"Watch, try, learn: Meta-learning from demonstrations and reward","volume-title":"Proc. Int. Conf. Learning Represent. (ICLR)","author":"Zhou"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.047"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3139667"},{"key":"ref12","first-page":"5331","article-title":"Efficient off-policy meta-reinforcement learning via probabilistic context variables","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Rakelly"},{"key":"ref13","first-page":"12600","article-title":"MetaCURE: Meta reinforcement learning with empowerment-driven exploration","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Zhang"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1037\/0012-1649.42.4.723"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1038\/nrn2805"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636297"},{"key":"ref17","first-page":"991","article-title":"BC-Z: Zero-shot task generalization with robotic imitation learning","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Jang"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461076"},{"key":"ref19","first-page":"1113","article-title":"Learning latent plans from play","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Lynch"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462901"},{"key":"ref21","article-title":"MT-Opt: Continuous multi-task robotic reinforcement learning at scale","author":"Kalashnikov","year":"2021","journal-title":"arXiv:2104.08212"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1037\/0012-1649.31.5.838"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1017\/S0140525X05000129"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1037\/0012-1649.38.5.840"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.actpsy.2006.09.006"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuropsychologia.2017.10.026"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1080\/15250000902840003"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1093\/scan\/nss065"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.neulet.2012.10.002"},{"key":"ref30","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Sekar"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2020.XVI.053"},{"key":"ref32","first-page":"729","article-title":"Demonstration-guided reinforcement learning with learned skills","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Pertsch"},{"key":"ref33","first-page":"9767","article-title":"Multi-task reinforcement learning with context-based representations","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Sodhani"},{"key":"ref34","first-page":"4528","article-title":"Progress & compress: A scalable framework for continual learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Schwarz"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013796"},{"key":"ref36","first-page":"2661","article-title":"Zero-shot task generalization with multi-task deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Oh"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2008852117"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(02)00078-3"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525087"},{"key":"ref41","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Lillicrap"},{"key":"ref42","first-page":"13","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. ICLR","author":"Kingma"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.5555\/3026877.3026899"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2017.8172289"},{"key":"ref45","first-page":"1321","article-title":"Coppeliasim (formerly V-REP): A versatile and scalable robot simulation framework","volume-title":"Proc. IEEE\/RSJ Int. Conf. Intell. Robots Syst. (IROS)","author":"Rohmer"},{"key":"ref46","article-title":"World models","author":"Ha","year":"2018","journal-title":"arXiv:1803.10122"},{"key":"ref47","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Hafner"},{"key":"ref48","first-page":"1","article-title":"Auto-encoding variational bayes","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Kingma"}],"container-title":["IEEE Transactions on Cognitive and Developmental Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7274989\/10552653\/10251441.pdf?arnumber=10251441","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T21:10:10Z","timestamp":1719349810000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10251441\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":48,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tcds.2023.3315513","relation":{},"ISSN":["2379-8920","2379-8939"],"issn-type":[{"value":"2379-8920","type":"print"},{"value":"2379-8939","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]}}}