{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T15:51:18Z","timestamp":1778169078857,"version":"3.51.4"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T00:00:00Z","timestamp":1670803200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T00:00:00Z","timestamp":1670803200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s11704-022-2037-1","type":"journal-article","created":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T09:03:21Z","timestamp":1670835801000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["MAML2: meta reinforcement learning via meta-learning for task categories"],"prefix":"10.1007","volume":"17","author":[{"given":"Qiming","family":"Fu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhechao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nengwei","family":"Fang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Xing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianping","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,12,12]]},"reference":[{"key":"2037_CR1","unstructured":"Santoro A, Bartunov S, Botvinick M, Wierstra D, Lillicrap T. Meta-learning with memory-augmented neural networks. In: Proceedings of the 33rd International Conference on Machine Learning. 2016, 1842\u20131850"},{"key":"2037_CR2","doi-asserted-by":"crossref","unstructured":"Hochreiter S, Younger A S, Conwell P R. Learning to learn using gradient descent. In: Proceedings of the International Conference on Artificial Neural Networks. 2001, 87\u201394","DOI":"10.1007\/3-540-44668-0_13"},{"issue":"1","key":"2037_CR3","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1162\/neco.1992.4.1.131","volume":"4","author":"J Schmidhuber","year":"1992","unstructured":"Schmidhuber J. Learning to control fast-weight memories: an alternative to dynamic recurrent networks. Neural Computation, 1992, 4(1): 131\u2013139","journal-title":"Neural Computation"},{"key":"2037_CR4","unstructured":"Fakoor R, Chaudhari P, Soatto S, Smola A J. Meta-Q-learning. In: Proceedings of the 8th International Conference on Learning Representations. 2020"},{"key":"2037_CR5","unstructured":"Wang J X, Kurth-Nelson Z, Tirumala D, Soyer H, Leibo J Z, Munos R, Blundell C, Kumaran D, Botvinick M. Learning to reinforcement learn. 2016, arXiv preprint arXiv: 1611.05763"},{"key":"2037_CR6","unstructured":"Finn C, Abbeel P, Levine S. Model-agnostic meta-learning for fast adaptation of deep networks. In: Proceedings of the 34th International Conference on Machine Learning. 2017, 1126\u20131135"},{"key":"2037_CR7","unstructured":"Deleu T, Bengio Y. The effects of negative adaptation in Model-Agnostic Meta-Learning. In: Proceedings of the 2nd Workshop on Meta-Learning. 2018"},{"key":"2037_CR8","doi-asserted-by":"crossref","unstructured":"Lecarpentier E, Abel D, Asadi K, Jinnai Y, Rachelson E, Littman M L. Lipschitz lifelong reinforcement learning. In: Proceedings of the 35th AAAI Conference on Artificial Intelligence. 2021, 8270\u20138278","DOI":"10.1609\/aaai.v35i9.17006"},{"key":"2037_CR9","unstructured":"Finn C, Rajeswaran A, Kakade S, Levine S. Online meta-learning. In: Proceedings of the 36th International Conference on Machine Learning. 2019, 1920\u20131930"},{"key":"2037_CR10","doi-asserted-by":"crossref","unstructured":"Nguyen T, Luu T, Pham T, Rakhimkul S, Yoo C D. Robust MAML: prioritization task buffer with adaptive learning process for model-agnostic meta-learning. In: Proceedings of 2021 IEEE International Conference on Acoustics, Speech and Signal Processing. 2021, 3460\u20133464","DOI":"10.1109\/ICASSP39728.2021.9413446"},{"key":"2037_CR11","unstructured":"Clavera I, Rothfuss J, Schulman J, Fujita Y, Asfour T, Abbeel P. Model-based reinforcement learning via meta-policy optimization. In: Proceedings of the 2nd Conference on Robot Learning. 2018, 617\u2013629"},{"key":"2037_CR12","unstructured":"Seo Y, Lee K, Clavera I, Kurutach T, Shin J, Abbeel P. Trajectory-wise multiple choice learning for dynamics generalization in reinforcement learning. In: Proceedings of the 34th International Conference on Neural Information Processing Systems. 2020, 12968\u201312979"},{"key":"2037_CR13","unstructured":"Kirsch L, van Steenkiste S, Schmidhuber J. Improving generalization in meta reinforcement learning using learned objectives. In: Proceedings of the 8th International Conference on Learning Representations. 2020"},{"key":"2037_CR14","unstructured":"Sohn S, Woo H, Choi J, Lee H. Meta reinforcement learning with autonomous inference of subtask dependencies. In: Proceedings of the 8th International Conference on Learning Representations. 2020"},{"key":"2037_CR15","unstructured":"Guzman-Rivera A, Kohli P, Batra D, Rutenbar R A. Efficiently enforcing diversity in multi-output structured prediction. In: Proceedings of the 17th International Conference on Artificial Intelligence and Statistics. 2014, 284\u2013292"},{"key":"2037_CR16","unstructured":"Guzm\u00e1n-Rivera A, Batra D, Kohli P. Multiple choice learning: learning to produce multiple structured outputs. In: Proceedings of the 25th International Conference on Neural Information Processing Systems. 2012, 1799\u20131807"},{"key":"2037_CR17","unstructured":"Silver D L, Yang Q, Li L. Lifelong machine learning systems: beyond learning algorithms. In: Proceedings of 2013 AAAI Spring Symposium Series. 2013, 49\u201355"},{"key":"2037_CR18","unstructured":"Brunskill E, Li L. PAC-inspired option discovery in lifelong reinforcement learning. In: Proceedings of the 31st International Conference on Machine Learning. 2014, 316\u2013324"},{"key":"2037_CR19","unstructured":"Brafman R I, Tennenholtz M. R-max \u2014 a general polynomial time algorithm for near-optimal reinforcement learning. In: Proceedings of the 17th International Joint Conference on Artificial Intelligence. 2001, 953\u2013958"},{"key":"2037_CR20","unstructured":"Abel D, Jinnai Y, Guo S Y, Konidaris G, Littman M L. Policy and value transfer in lifelong reinforcement learning. In: Proceedings of the 35th International Conference on Machine Learning. 2018, 20\u201329"},{"key":"2037_CR21","unstructured":"Nichol A, Achiam J, Schulman J. On first-order meta-learning algorithms. 2018, arXiv preprint arXiv: 1803.02999"},{"key":"2037_CR22","doi-asserted-by":"crossref","unstructured":"Wang H, Dong S, Shao L. Measuring structural similarities in finite MDPs. In: Proceedings of the 28th International Joint Conference on Artificial Intelligence. 2019, 3684\u20133690","DOI":"10.24963\/ijcai.2019\/511"},{"key":"2037_CR23","unstructured":"Song J, Gao Y, Wang H, An B. Measuring the distance between finite Markov decision processes. In: Proceedings of 2016 International Conference on Autonomous Agents and Multiagent Systems. 2016, 468\u2013476"},{"key":"2037_CR24","unstructured":"Hu Y, Gao Y, An B. Learning in multi-agent systems with sparse interactions by knowledge transfer and game abstraction. In: Proceedings of 2015 International Conference on Autonomous Agents and Multiagent Systems. 2015, 753\u2013761"},{"issue":"11","key":"2037_CR25","first-page":"2157","volume":"42","author":"Q M Fu","year":"2014","unstructured":"Fu Q M, Liu Q, You S H, Huang W, Zhang X F. A novel fast Sarsa algorithm based on value function transfer. Acta Electronica Sinica, 2014, 42(11): 2157\u20132161","journal-title":"Acta Electronica Sinica"},{"key":"2037_CR26","volume-title":"Reinforcement Learning: An Introduction","author":"R S Sutton","year":"2018","unstructured":"Sutton R S, Barto A G. Reinforcement Learning: An Introduction. 2nd ed. Cambridge: MIT Press, 2018","edition":"2nd ed."},{"key":"2037_CR27","unstructured":"Duan Y, Schulman J, Chen X, Bartlett P L, Sutskever I, Abbeel P. RL2: fast reinforcement learning via slow reinforcement learning. 2016, arXiv preprint arXiv: 1611.02779"},{"key":"2037_CR28","unstructured":"Rakelly K, Zhou A, Finn C, Levine S, Quillen D. Efficient off-policy meta-reinforcement learning via probabilistic context variables. In: Proceedings of the 36th International Conference on Machine Learning. 2019, 5331\u20135340"},{"key":"2037_CR29","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S. Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Proceedings of the 35th International Conference on Machine Learning. 2018, 1861\u20131870"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-022-2037-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-022-2037-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-022-2037-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T20:26:26Z","timestamp":1726691186000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-022-2037-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,12]]},"references-count":29,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["2037"],"URL":"https:\/\/doi.org\/10.1007\/s11704-022-2037-1","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,12]]},"assertion":[{"value":"20 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 June 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 December 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"174325"}}