{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T23:14:22Z","timestamp":1740179662400,"version":"3.37.3"},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102387"],"award-info":[{"award-number":["62102387"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"University Synergy Innovation Program of Anhui Province","award":["GXXT-2022-041"],"award-info":[{"award-number":["GXXT-2022-041"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Games"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tg.2023.3304315","type":"journal-article","created":{"date-parts":[[2023,8,11]],"date-time":"2023-08-11T17:32:37Z","timestamp":1691775157000},"page":"538-548","source":"Crossref","is-referenced-by-count":0,"title":["Multigoal Reinforcement Learning via Exploring Entropy-Regularized Successor Matching"],"prefix":"10.1109","volume":"15","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4131-0625","authenticated-orcid":false,"given":"Xiaoyun","family":"Feng","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7466-4593","authenticated-orcid":false,"given":"Yun","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Anhui University, Hefei, China"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/CoG51982.2022.9893637"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1038\/nature14236"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref4","first-page":"1","article-title":"Model based reinforcement learning for Atari","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kaiser","year":"2020"},{"key":"ref5","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lillicrap","year":"2016"},{"year":"2017","author":"Schulman","article-title":"Proximal policy optimization algorithms","key":"ref6"},{"key":"ref7","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref8","first-page":"1094","article-title":"Learning to achieve goals","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Kaelbling","year":"1993"},{"key":"ref9","first-page":"1312","article-title":"Universal value function approximators","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schaul","year":"2015"},{"year":"2018","author":"Plappert","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","key":"ref10"},{"key":"ref11","first-page":"1","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Andrychowicz","year":"2017"},{"key":"ref12","first-page":"1","article-title":"Temporal difference models: Model-free deep RL for model-based control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Pong","year":"2018"},{"key":"ref13","first-page":"2377","article-title":"Generalization and exploration via randomized value functions","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Osband","year":"2016"},{"key":"ref14","first-page":"1","article-title":"Visual reinforcement learning with imagined goals","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Nair","year":"2018"},{"key":"ref15","first-page":"1515","article-title":"Automatic goal generation for reinforcement learning agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Florensa","year":"2018"},{"key":"ref16","first-page":"1","article-title":"Exploration via hindsight goal generation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ren","year":"2019"},{"key":"ref17","first-page":"1","article-title":"Unsupervised control through non-parametric discriminative rewards","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Warde-Farley","year":"2019"},{"key":"ref18","first-page":"1","article-title":"Skew-fit: State-covering self-supervised reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pong","year":"2020"},{"key":"ref19","first-page":"7750","article-title":"Maximum entropy gain exploration for long horizon multi-goal reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pitis","year":"2020"},{"key":"ref20","first-page":"1","article-title":"Curious: Intrinsically motivated multi-task, multi-goal reinforcement learning","author":"Colas","year":"2018"},{"key":"ref21","first-page":"2681","article-title":"Provably efficient maximum entropy exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hazan","year":"2019"},{"key":"ref22","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2017"},{"key":"ref23","first-page":"1","article-title":"Diversity is all you need: Learning skills without a reward function","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Eysenbach","year":"2019"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1080\/09540099108946587"},{"key":"ref25","first-page":"1","article-title":"Training agent for first-person shooter game with actor-critic curriculum learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wu","year":"2017"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1109\/TKDE.2009.191"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1162\/neco.1993.5.4.613"},{"year":"2016","author":"Kulkarni","article-title":"Deep successor reinforcement learning","key":"ref28"},{"key":"ref29","first-page":"1","article-title":"Successor features for transfer in reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Barreto","year":"2017"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1109\/IROS.2017.8206049"},{"key":"ref31","first-page":"501","article-title":"Transfer in deep reinforcement learning using successor features and generalised policy improvement","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Barreto","year":"2018"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1109\/ICCV.2017.60"},{"key":"ref33","first-page":"1","article-title":"Eigenoption discovery through the deep successor representation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Machado","year":"2018"},{"key":"ref34","first-page":"26963","article-title":"Successor feature landmarks for long-horizon goal-conditioned reinforcement learning","volume":"13","author":"Hoang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref35","first-page":"1","article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Nachum","year":"2018"},{"key":"ref36","first-page":"1025","article-title":"Relay policy learning: Solving long-horizon tasks via imitation and reinforcement learning","volume-title":"Proc. IEEE Conf. Robot Learn.","author":"Gupta","year":"2019"},{"key":"ref37","first-page":"1","article-title":"Hierarchical foresight: Self-supervised learning of long-horizon tasks via visual subgoal generation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Nair","year":"2020"},{"key":"ref38","first-page":"1430","article-title":"Goal-conditioned reinforcement learning with imagined subgoals","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chane-Sane","year":"2021"},{"key":"ref39","first-page":"151","article-title":"Understanding the impact of entropy on policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"97","author":"Ahmed","year":"2019"},{"key":"ref40","first-page":"1","article-title":"Maximum entropy RL (provably) solves some robust RL problems","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Eysenbach","year":"2022"},{"key":"ref41","first-page":"741","article-title":"Stochastic latent actor-critic: Deep reinforcement learning with a latent variable model","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Lee","year":"2020"},{"key":"ref42","first-page":"7553","article-title":"Maximum entropy-regularized multi-goal reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhao","year":"2019"},{"year":"2019","author":"Islam","article-title":"Entropy regularization with discounted future state distribution in policy gradient methods","key":"ref43"},{"issue":"3","key":"ref44","first-page":"599","article-title":"Dijkstras algorithm revisited: The dynamic programming connexion","volume":"35","author":"Sniedovich","year":"2006","journal-title":"Control Cybern."},{"doi-asserted-by":"publisher","key":"ref45","DOI":"10.1109\/ICRA.2018.8463162"},{"year":"2016","author":"Brockman","article-title":"OpenAI gym","key":"ref46"}],"container-title":["IEEE Transactions on Games"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7782673\/10361571\/10214633.pdf?arnumber=10214633","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:26:11Z","timestamp":1705019171000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10214633\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":46,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tg.2023.3304315","relation":{},"ISSN":["2475-1502","2475-1510"],"issn-type":[{"type":"print","value":"2475-1502"},{"type":"electronic","value":"2475-1510"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}