{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:21:12Z","timestamp":1740100872001,"version":"3.37.3"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T00:00:00Z","timestamp":1658016000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T00:00:00Z","timestamp":1658016000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,17]]},"DOI":"10.1109\/rcar54675.2022.9872291","type":"proceedings-article","created":{"date-parts":[[2022,9,5]],"date-time":"2022-09-05T20:31:54Z","timestamp":1662409914000},"page":"499-504","source":"Crossref","is-referenced-by-count":0,"title":["Celebrating Robustness in Efficient Off-Policy Meta-Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Ziyi","family":"Liu","sequence":"first","affiliation":[{"name":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"}]},{"given":"Zongyuan","family":"Li","sequence":"additional","affiliation":[{"name":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"}]},{"given":"Qianqian","family":"Cao","sequence":"additional","affiliation":[{"name":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"}]},{"given":"Yuan","family":"Wan","sequence":"additional","affiliation":[{"name":"Wuhan University of Technology,Department of Mathematical,WuHan,China,430070"}]},{"given":"Xian","family":"Guo","sequence":"additional","affiliation":[{"name":"NanKai University,Institute of Robotics and Automatic Information System and the Tianjin Key Laboratory of Intelligent Robotics, College of Artificial Intelligence,Tianjin,China,300350"}]}],"member":"263","reference":[{"key":"ref32","article-title":"Deep variational information bottleneck","author":"alemi","year":"2016","journal-title":"arXiv preprint arXiv 1612 00410"},{"journal-title":"Decoupling adaptation from modeling with meta-optimizers for meta learning","year":"2019","author":"arnold","key":"ref31"},{"key":"ref30","article-title":"Rapid learning or feature reuse? towards understanding the effectiveness of maml","author":"raghu","year":"2019","journal-title":"arXiv preprint arXiv 1909 09150"},{"key":"ref10","article-title":"Learning to reinforcement learn","author":"wang","year":"2016","journal-title":"arXiv preprint arXiv 1611 05763"},{"key":"ref11","first-page":"5331","article-title":"Efficient off-policy meta-reinforcement learning via probabilistic context variables","author":"rakelly","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref12","article-title":"Meld: Meta-reinforcement learning from images via latent state models","author":"zhao","year":"2020","journal-title":"arXiv preprint arXiv 2010 13957"},{"key":"ref13","article-title":"Meta-q-learning","author":"fakoor","year":"2019","journal-title":"arXiv preprint arXiv 1910 07470"},{"key":"ref14","article-title":"Meta-learning: from few-shot learning to rapid reinforcement learning","author":"finn","year":"2019","journal-title":"ICML"},{"key":"ref15","article-title":"Promp: Proximal meta-policy search","author":"rothfuss","year":"2018","journal-title":"arXiv preprint arXiv 1810 06008"},{"key":"ref16","article-title":"A simple neural attentive meta-learner","author":"mishra","year":"2017","journal-title":"arXiv preprint arXiv 1707 03374"},{"key":"ref17","article-title":"Meta-reinforcement learning robust to distributional shift via model identification and experience relabeling","author":"mendonca","year":"2020","journal-title":"arXiv preprint arXiv 2006 04989"},{"key":"ref18","article-title":"Meta-learning and universality: Deep representations and gradient descent can approximate any learning algorithm","author":"finn","year":"2017","journal-title":"arXiv preprint arXiv 1710 11622"},{"key":"ref19","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"arXiv preprint arXiv 1812 09111"},{"key":"ref28","first-page":"818","article-title":"Visualizing and understanding convolutional networks","author":"zeiler","year":"2014","journal-title":"European Conference on Computer Vision"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref3","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref6","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"finn","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref29","first-page":"4700","article-title":"Densely connected convolutional networks","author":"huang","year":"2017","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref5","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref8","first-page":"23","article-title":"Meta-learning with temporal convolutions","volume":"2","author":"mishra","year":"2017","journal-title":"arXiv preprint arXiv 1707 03374"},{"key":"ref7","article-title":"Rl&#x00B2;: Fast reinforcement learning via slow reinforcement learning","author":"duan","year":"2016","journal-title":"arXiv preprint arXiv 1611 02779"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref9","article-title":"Meta-reinforcement learning of structured exploration strategies","volume":"31","author":"gupta","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref1","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref20","article-title":"Matching networks for one shot learning","volume":"29","author":"vinyals","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref22","article-title":"Auto-encoding variational bayes","author":"kingma","year":"2013","journal-title":"arXiv preprint arXiv 1312 6114"},{"key":"ref21","article-title":"Embed to control: A locally linear latent dynamics model for control from raw images","volume":"28","author":"watter","year":"2015","journal-title":"Advances in neural information processing systems"},{"journal-title":"beta-vae Learning basic visual concepts with a constrained variational framework","year":"2016","author":"higgins","key":"ref24"},{"key":"ref23","article-title":"Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion","volume":"11","author":"vincent","year":"2010","journal-title":"Journal of Machine Learning Research"},{"key":"ref26","article-title":"Stochastic neural networks for hierarchical reinforcement learning","author":"florensa","year":"2017","journal-title":"arXiv preprint arXiv 1704 03012"},{"key":"ref25","article-title":"Learning an embedding space for transferable robot skills","author":"hausman","year":"2018","journal-title":"International Conference on Learning Representations"}],"event":{"name":"2022 IEEE International Conference on Real-time Computing and Robotics (RCAR)","start":{"date-parts":[[2022,7,17]]},"location":"Guiyang, China","end":{"date-parts":[[2022,7,22]]}},"container-title":["2022 IEEE International Conference on Real-time Computing and Robotics (RCAR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9872150\/9872152\/09872291.pdf?arnumber=9872291","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,26]],"date-time":"2022-09-26T21:11:41Z","timestamp":1664226701000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9872291\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,17]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/rcar54675.2022.9872291","relation":{},"subject":[],"published":{"date-parts":[[2022,7,17]]}}}