{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T07:52:24Z","timestamp":1773820344350,"version":"3.50.1"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,8,21]]},"DOI":"10.1109\/cog51982.2022.9893550","type":"proceedings-article","created":{"date-parts":[[2022,9,20]],"date-time":"2022-09-20T19:33:31Z","timestamp":1663702411000},"page":"353-360","source":"Crossref","is-referenced-by-count":1,"title":["Task Relabelling for Multi-task Transfer using Successor Features"],"prefix":"10.1109","author":[{"given":"Martin","family":"Balla","sequence":"first","affiliation":[{"name":"Queen Mary University of London"}]},{"given":"Diego","family":"Perez-Liebana","sequence":"additional","affiliation":[{"name":"Queen Mary University of London"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1993.5.4.613"},{"key":"ref11","article-title":"Deep successor reinforcement learning","author":"kulkarni","year":"2016"},{"key":"ref12","first-page":"13052","article-title":"The option keyboard combining skills in reinforcement learning","author":"barreto","year":"2019","journal-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems"},{"key":"ref13","article-title":"Fast task inference with variational intrinsic successor features","author":"hansen","year":"2020"},{"key":"ref14","article-title":"Psiphi-learning: Reinforcement learning with demonstrations using successor features and inverse temporal difference learning","author":"filos","year":"2021","journal-title":"arXiv preprint arXiv 2102 12041"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5955"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1038\/s41562-017-0180-8"},{"key":"ref17","first-page":"1312","article-title":"Universal value function approximators","author":"schaul","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref18","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref19","article-title":"Universal successor features approximators","author":"borsa","year":"2018","journal-title":"arXiv 1812 07626"},{"key":"ref4","article-title":"A survey of generalisation in deep reinforcement learning","author":"kirk","year":"2021","journal-title":"arXiv preprint arXiv 2111 02269"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grand-master level in starcraft ii using multi-agent reinforcement learning","volume":"575","author":"vinyals","year":"2019","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"600","DOI":"10.1109\/CoG47356.2020.9231552","article-title":"Augmenting automated game testing with deep reinforcement learning","author":"bergdahl","year":"2020","journal-title":"2020 IEEE Conference on Games (CoG2020)"},{"key":"ref5","article-title":"Successor features for transfer in reinforcement learning","author":"barreto","year":"2016","journal-title":"arXiv preprint arXiv 1606 05312"},{"key":"ref8","first-page":"4246","article-title":"The malmo platform for artificial intelligence experimentation","author":"johnson","year":"2016","journal-title":"IJCAI"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2018.8490417"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref9","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1907370117"},{"key":"ref22","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref21","article-title":"Benchmarking the spectrum of agent capabilities","author":"hafner","year":"2021","journal-title":"Deep RL Workshop NeurIPS 2021"}],"event":{"name":"2022 IEEE Conference on Games (CoG)","location":"Beijing, China","start":{"date-parts":[[2022,8,21]]},"end":{"date-parts":[[2022,8,24]]}},"container-title":["2022 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9893561\/9893544\/09893550.pdf?arnumber=9893550","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T20:25:30Z","timestamp":1665433530000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9893550\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,21]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/cog51982.2022.9893550","relation":{},"subject":[],"published":{"date-parts":[[2022,8,21]]}}}