{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T18:45:58Z","timestamp":1765910758972,"version":"3.48.0"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001691","name":"JSPS KAKENHI","doi-asserted-by":"publisher","award":["JP19J22987"],"award-info":[{"award-number":["JP19J22987"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001691","name":"JSPS KAKENHI","doi-asserted-by":"publisher","award":["JP22H04998"],"award-info":[{"award-number":["JP22H04998"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001691","name":"JSPS KAKENHI","doi-asserted-by":"publisher","award":["JP23K24925"],"award-info":[{"award-number":["JP23K24925"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001863","name":"NEDO project","doi-asserted-by":"publisher","award":["JPNP20006"],"award-info":[{"award-number":["JPNP20006"]}],"id":[{"id":"10.13039\/501100001863","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002241","name":"JST Moonshot R&#x0026;D program","doi-asserted-by":"publisher","award":["JPMJMS223B-3"],"award-info":[{"award-number":["JPMJMS223B-3"]}],"id":[{"id":"10.13039\/501100002241","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100015641","name":"Tateisi Science and Technology Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015641","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cogn. Dev. Syst."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tcds.2025.3543350","type":"journal-article","created":{"date-parts":[[2025,2,19]],"date-time":"2025-02-19T14:03:33Z","timestamp":1739973813000},"page":"1260-1271","source":"Crossref","is-referenced-by-count":2,"title":["Foundational Policy Acquisition via Multitask Learning for Motor Skill Generation"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-0712-268X","authenticated-orcid":false,"given":"Satoshi","family":"Yamamori","sequence":"first","affiliation":[{"name":"Learning Machines Group, Graduate School of Informatics, Kyoto University, Kyoto, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4115-1919","authenticated-orcid":false,"given":"Jun","family":"Morimoto","sequence":"additional","affiliation":[{"name":"Learning Machines Group, Graduate School of Informatics, Kyoto University, Kyoto, Japan"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Learning to adapt in dynamic, real-world environments through meta-reinforcement learning","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Nagabandi","year":"2019"},{"key":"ref2","first-page":"5331","article-title":"Efficient off-policy meta-reinforcement learning via probabilistic context variables","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","volume":"97","author":"Rakelly","year":"2019"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2020.3045574"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3338241"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1002\/cphy.c170043"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1101\/2021.03.14.435278","article-title":"A synthesis of the many errors and learning processes of visuomotor adaptation","author":"Morehead","year":"2021"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.7554\/eLife.76639"},{"key":"ref8","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","volume":"70","author":"Finn","year":"2017"},{"key":"ref9","first-page":"5048","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Andrychowicz","year":"2017"},{"key":"ref10","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","volume":"37","author":"Schaul","year":"2015"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/770"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3184-5_3"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-200-7.50072-6"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2010.2065430"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2021.3137262"},{"key":"ref16","first-page":"530","article-title":"Contextual imagined goals for self-supervised robotic learning","volume-title":"Proc. 3rd Annu. Conf. Robot Learn.","volume":"100","author":"Nair","year":"2019"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017944732463"},{"key":"ref18","first-page":"7248","article-title":"RL Unplugged: A suite of benchmarks for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Gulcehre","year":"2020"},{"volume-title":"Applied Nonlinear ControlM","year":"1991","author":"Slotine","key":"ref19"},{"key":"ref20","first-page":"1563","article-title":"Minimax differential dynamic programming: An application to robust biped walking","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"15","author":"Morimoto","year":"2002"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-009-9133-z"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.048"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968053"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.064"},{"issue":"27","key":"ref25","first-page":"949","article-title":"Natural evolution strategies","volume":"15","author":"Wierstra","year":"2014","journal-title":"J. Mach. Learn. Res."},{"article-title":"Evolution strategies as a scalable alternative to reinforcement learning","year":"2017","author":"Salimans","key":"ref26"},{"key":"ref27","first-page":"2546","article-title":"Algorithms for hyper-parameter optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"24","author":"Bergstra","year":"2011"},{"issue":"1","key":"ref28","first-page":"115","article-title":"Making a science of model search: Hyperparameter optimization in hundreds of dimensions for vision architectures","volume-title":"Proc. 30th Int. Conf. Mach. Learn.","volume":"28","author":"Bergstra","year":"2013"},{"key":"ref29","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Haarnoja","year":"2018"},{"article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","year":"2018","author":"Levine","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330701"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref33","article-title":"Benchmarks for deep off-policy evaluation","volume-title":"Proc. 9th Int. Conf. Learn. Represent., Virtual Event","author":"Fu","year":"2021"},{"article-title":"RL${}^{2}$: Fast reinforcement learning via slow reinforcement learning","year":"2016","author":"Duan","key":"ref34"},{"key":"ref35","article-title":"ProMP: Proximal meta-policy search","volume-title":"Proc. 7th Int. Conf. Learn. Represent.","author":"Rothfuss","year":"2019"},{"article-title":"PEARL: Efficient off-policy meta-learning via probabilistic context variables,\u201d 2019","year":"2024","author":"Rakelly","key":"ref36"},{"article-title":"MT-Opt: Continuous multi-task robotic reinforcement learning at scale","year":"2021","author":"Kalashnikov","key":"ref37"},{"key":"ref38","article-title":"Open X-Embodiment: Robotic learning datasets and RT-X models","volume-title":"Proc. Towards Generalist Robots: Learn. Paradigms Scalable Skill Acquisition @ CoRL","author":"Vuong","year":"2023"},{"key":"ref39","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","volume":"80","author":"Fujimoto","year":"2018"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref41","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Represent.","author":"Lillicrap","year":"2016"},{"article-title":"Soft actor-critic algorithms and applications","year":"2018","author":"Haarnoja","key":"ref42"}],"container-title":["IEEE Transactions on Cognitive and Developmental Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7274989\/11197738\/10892349.pdf?arnumber=10892349","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T18:33:19Z","timestamp":1765909999000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10892349\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":42,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tcds.2025.3543350","relation":{},"ISSN":["2379-8920","2379-8939"],"issn-type":[{"type":"print","value":"2379-8920"},{"type":"electronic","value":"2379-8939"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}