{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:53:04Z","timestamp":1774353184471,"version":"3.50.1"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Ministry of Culture and Innovation of Hungary from the National Research, Development, and Innovation Fund"},{"name":"KDP-2021"},{"name":"ISF","award":["909\/23"],"award-info":[{"award-number":["909\/23"]}]},{"name":"MOST","award":["1001706842"],"award-info":[{"award-number":["1001706842"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1109\/lra.2024.3387134","type":"journal-article","created":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T18:26:27Z","timestamp":1712773587000},"page":"5174-5181","source":"Crossref","is-referenced-by-count":4,"title":["Adaptive Curriculum Learning With Successor Features for Imbalanced Compositional Reward Functions"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9926-4054","authenticated-orcid":false,"given":"Laszlo","family":"Szoke","sequence":"first","affiliation":[{"name":"Budapest University of Technology and Economics, Budapest, Hungary"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7683-3031","authenticated-orcid":false,"given":"Shahaf S.","family":"Shperberg","sequence":"additional","affiliation":[{"name":"Ben-Gurion University, Beer Sheva, Israel"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7917-9864","authenticated-orcid":false,"given":"Jarrett","family":"Holtz","sequence":"additional","affiliation":[{"name":"Robert Bosch LLC., Budapest, Hungary"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5793-7679","authenticated-orcid":false,"given":"Alessandro","family":"Allievi","sequence":"additional","affiliation":[{"name":"Robert Bosch LLC., Budapest, Hungary"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"On a formal model of safe and scalable self-driving cars","author":"Shalev-Shwartz","year":"2017"},{"key":"ref2","first-page":"656","article-title":"Q-decomposition for reinforcement learning agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Russell","year":"2003"},{"key":"ref3","first-page":"5392","article-title":"Hybrid reward architecture for reinforcement learning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Seijen","year":"2017"},{"key":"ref4","article-title":"Explainable reinforcement learning via reward decomposition","volume-title":"Proc. IJCAI\/ECAI Workshop Explainable Artif. Intell.","author":"Juozapaitis","year":"2019"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015410"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2017.8329799"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2018.8761044"},{"key":"ref8","article-title":"Successor features for transfer in reinforcement learning","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"30","author":"Barreto","year":"2017"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1993.5.4.613"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1907370117"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3150493"},{"key":"ref12","article-title":"Fast task inference with variational intrinsic successor features","author":"Hansen","year":"2019"},{"key":"ref13","article-title":"Successor feature representations","author":"Reinke","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i13.17399"},{"key":"ref15","first-page":"8025","article-title":"Policy caches with successor features","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Nemecek","year":"2021"},{"key":"ref16","first-page":"196:1","article-title":"Successor features combine elements of model-free and model-based reinforcement learning","volume":"21","author":"Lehnert","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref17","first-page":"1724","article-title":"Gamma-models: Generative temporal difference learning for infinite-horizon prediction","volume-title":"Proc. Neural Inf. Process. Syst.","volume":"33","author":"Janner","year":"2020"},{"key":"ref18","article-title":"Advantages and limitations of using successor features for transfer in reinforcement learning","author":"Lehnert","year":"2017"},{"key":"ref19","first-page":"1","article-title":"A new representation of successor features for transfer across dissimilar environments","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Abdolshah","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016054"},{"key":"ref21","first-page":"751","article-title":"Disentangling successor features for coordination in multi-agent reinforcement learning","volume-title":"Proc. 21st Int. Conf. Auton. Agents Multiagent Syst.","author":"Kim","year":"2022"},{"key":"ref22","first-page":"3305","article-title":"Psiphi-learning: Reinforcement learning with demonstrations using successor features and inverse temporal difference learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Filos","year":"2021"},{"key":"ref23","first-page":"17298","article-title":"Risk-aware transfer in reinforcement learning using successor features","volume":"34","author":"Gimelfarb","year":"2021","journal-title":"Neural Inf. Process. Syst."},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01611-x"},{"key":"ref26","first-page":"1","article-title":"Curriculum learning for reinforcement learning domains","volume":"21","author":"Narvekar","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/BF00117447"},{"key":"ref28","article-title":"Prioritized experience replay","author":"Schaul","year":"2015"},{"key":"ref29","article-title":"Automated curriculum generation through setter-solver interactions","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Racanire","year":"2020"},{"key":"ref30","first-page":"1515","article-title":"Automatic goal generation for reinforcement learning agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Florensa","year":"2018"},{"key":"ref31","article-title":"Training agent for first-person shooter game with actor-critic curriculum learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wu","year":"2017"},{"key":"ref32","article-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref33","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref34","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref35","first-page":"3053","article-title":"RLlib: Abstractions for distributed reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liang","year":"2018"},{"key":"ref36","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Achiam","year":"2017"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/520"},{"key":"ref38","article-title":"Omnisafe: An infrastructure for accelerating safe reinforcement learning research","author":"Ji","year":"2023"},{"key":"ref39","article-title":"Social attention for autonomous decision-making in dense traffic","author":"Leurent","year":"2019"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569938"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10504377\/10496160.pdf?arnumber=10496160","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,23]],"date-time":"2024-04-23T04:52:22Z","timestamp":1713847942000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10496160\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":40,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/lra.2024.3387134","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6]]}}}