{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T19:03:08Z","timestamp":1772823788059,"version":"3.50.1"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Science and Technology Innovation 2030 Major Project of the Ministry of Science and Technology of China","award":["2018AAA0101604"],"award-info":[{"award-number":["2018AAA0101604"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61936009"],"award-info":[{"award-number":["61936009"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906106"],"award-info":[{"award-number":["61906106"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62022048"],"award-info":[{"award-number":["62022048"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1109\/tnnls.2021.3121432","type":"journal-article","created":{"date-parts":[[2021,11,5]],"date-time":"2021-11-05T19:17:49Z","timestamp":1636139869000},"page":"4033-4046","source":"Crossref","is-referenced-by-count":8,"title":["Exploration With Task Information for Meta Reinforcement Learning"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8133-5010","authenticated-orcid":false,"given":"Peng","family":"Jiang","sequence":"first","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7361-9283","authenticated-orcid":false,"given":"Shiji","family":"Song","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7251-0988","authenticated-orcid":false,"given":"Gao","family":"Huang","sequence":"additional","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2018.11.004"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794127"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aiide.v14i1.13033"},{"key":"ref6","first-page":"1440","article-title":"Learning context-aware task reasoning for efficient meta reinforcement learning","volume-title":"Proc. Int. Joint Conf. Auton. Agents Multiagent Syst. (AAMAS)","author":"Wang"},{"key":"ref7","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","volume":"70","author":"Finn"},{"key":"ref8","first-page":"1","article-title":"Promp: Proximal meta-policy search","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Rothfuss"},{"key":"ref9","first-page":"1","article-title":"RL2: Fast reinforcement learning via slow reinforcement learning","volume":"abs\/1611.02779","author":"Duan","year":"2016","journal-title":"CoRR"},{"key":"ref10","first-page":"787","article-title":"Long short-term memory and learning-to-learn in networks of spiking neurons","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NeuralIps)","volume":"31","author":"Bellec"},{"key":"ref11","first-page":"5331","article-title":"Efficient off-policy meta-reinforcement learning via probabilistic context variables","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Rakelly"},{"key":"ref12","first-page":"1","article-title":"Meta-Q-learning","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Fakoor"},{"key":"ref13","article-title":"On first-order meta-learning algorithms","author":"Nichol","year":"2018","journal-title":"arXiv:1803.02999"},{"key":"ref14","first-page":"1","article-title":"ES-MAML: Simple Hessian-free meta learning","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Song"},{"key":"ref15","first-page":"1","article-title":"DiCE: The infinitely differentiable monte-carlo estimator","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Foerster"},{"key":"ref16","first-page":"4061","article-title":"Taming MAML: Efficient unbiased meta-reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Liu"},{"key":"ref17","article-title":"Learning to reinforcement learn","author":"Wang","year":"2016","journal-title":"arXiv:1611.05763"},{"key":"ref18","first-page":"4351","article-title":"Been there, done that: Meta-learning with episodic recall","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"80","author":"Ritter"},{"key":"ref19","first-page":"1","article-title":"A simple neural attentive meta-learner","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Mishra"},{"key":"ref20","article-title":"Meta-SGD: Learning to learn quickly for few-shot learning","author":"Li","year":"2017","journal-title":"arXiv:1707.09835"},{"key":"ref21","article-title":"Meta reinforcement learning as task inference","author":"Humplik","year":"2019","journal-title":"arXiv:1905.06424"},{"key":"ref22","first-page":"1","article-title":"Some considerations on learning to explore via meta-reinforcement learning","volume":"abs\/1803.01118","author":"Stadie","year":"2018","journal-title":"CoRR"},{"key":"ref23","first-page":"5302","article-title":"Meta-reinforcement learning of structured exploration strategies","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NeuralIps)","author":"Gupta"},{"key":"ref24","first-page":"910","article-title":"MAME: Model-agnostic meta-exploration","volume-title":"Proc. Conf. Rob Learn. (CoRL)","author":"Gurumurthy"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17297"},{"key":"ref26","article-title":"Learning adaptive exploration strategies in dynamic environments through informed policy regularization","author":"Kamienny","year":"2020","journal-title":"arXiv:2005.02934"},{"key":"ref27","article-title":"Decoupling exploration and exploitation for meta-reinforcement learning without sacrifices","author":"Zheran Liu","year":"2020","journal-title":"arXiv:2008.02790"},{"key":"ref28","first-page":"1109","article-title":"VIME: Variational information maximizing exploration","volume-title":"Proc. Adv. Neural Inf. Proces. Syst.","author":"Houthooft"},{"key":"ref29","first-page":"10249","article-title":"Learning to share and hide intentions using information regularization","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NeuralIps)","author":"Strouse"},{"key":"ref30","first-page":"1","article-title":"InfoBot: Transfer and exploration via the information bottleneck","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Goyal"},{"key":"ref31","article-title":"Implicit generative modeling for efficient exploration","author":"Ratzlaff","year":"2019","journal-title":"arXiv:1911.08017"},{"key":"ref32","first-page":"4496","article-title":"Distral: Robust multitask reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NeuralIps)","author":"Teh"},{"key":"ref33","article-title":"Information asymmetry in KL-regularized RL","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Galashov"},{"key":"ref34","first-page":"1","article-title":"Dynamics-aware unsupervised discovery of skills","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Sharma"},{"key":"ref35","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","volume":"80","author":"Haarnoja"},{"issue":"320","key":"ref36","first-page":"201","article-title":"The IM algorithm: A variational approach to information maximization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"16","author":"Agakov"},{"key":"ref37","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Proces. Syst. (NeuralIps)","author":"Sutton"},{"key":"ref38","first-page":"1582","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"80","author":"Fujimoto"},{"key":"ref39","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proc. Conf. Rob Learn. (CoRL)","author":"Yu"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/475"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10208115\/09604770.pdf?arnumber=9604770","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:43:00Z","timestamp":1705020180000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9604770\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8]]},"references-count":40,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2021.3121432","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8]]}}}