{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T18:28:18Z","timestamp":1779906498457,"version":"3.53.1"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001804","name":"Canada Research Chairs","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001804","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1109\/lra.2023.3236882","type":"journal-article","created":{"date-parts":[[2023,1,13]],"date-time":"2023-01-13T21:40:58Z","timestamp":1673646058000},"page":"1263-1270","source":"Crossref","is-referenced-by-count":8,"title":["Learning From Guided Play: Improving Exploration for Adversarial Imitation Learning With Simple Auxiliary Tasks"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5904-8402","authenticated-orcid":false,"given":"Trevor","family":"Ablett","sequence":"first","affiliation":[{"name":"Space and Terrestrial Autonomous Robotic Systems (STARS) Laboratory, University of Toronto Institute for Aerospace Studies (UTIAS), Toronto, Ontario, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6278-9575","authenticated-orcid":false,"given":"Bryan","family":"Chan","sequence":"additional","affiliation":[{"name":"Department of Computing Science, University of Alberta, Edmonton, Alberta, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5528-6136","authenticated-orcid":false,"given":"Jonathan","family":"Kelly","sequence":"additional","affiliation":[{"name":"Space and Terrestrial Autonomous Robotic Systems (STARS) Laboratory, University of Toronto Institute for Aerospace Studies (UTIAS), Toronto, Ontario, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref2","first-page":"1479","article-title":"Unifying count-based exploration and intrinsic motivation","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","volume":"29","author":"Bellemare","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref4","article-title":"Shaping and policy search in reinforcement learning","author":"Ng","year":"2003"},{"key":"ref5","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ng","year":"2000"},{"key":"ref6","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Ho","year":"2016"},{"key":"ref7","article-title":"Discriminator-actor-Critic: Addressing sample inefficiency and reward bias in adversarial imitation learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kostrikov","year":"2019"},{"key":"ref8","article-title":"Learning robust rewards with adverserial inverse reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fu","year":"2018"},{"key":"ref9","first-page":"14656","article-title":"What matters for adversarial imitation learning ?","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","author":"Orsini","year":"2021"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03157-9"},{"key":"ref11","article-title":"Learning from guided play: A scheduled hierarchical approach for improving exploration in adversarial imitation learning","volume-title":"Proc. Neural Inf. Process. Syst. Deep Reinforcement Learn. Workshop","author":"Ablett","year":"2021"},{"key":"ref12","first-page":"4344","article-title":"Learning by playing solving sparse reward tasks from scratch","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Riedmiller","year":"2018"},{"key":"ref13","first-page":"1113","article-title":"Learning latent plans from play","volume-title":"Proc. Conf. Robot Learn.","author":"Lynch","year":"2019"},{"key":"ref14","article-title":"Relay policy learning: Solving long horizon tasks via imitation and reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Gupta","year":"2019"},{"key":"ref15","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref16","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2018"},{"key":"ref17","article-title":"QT-Opt: Scalable deep reinforcement learning for vision-based robotic manipulation","author":"Kalashnikov","year":"2018"},{"key":"ref18","article-title":"What matters in learning from offline human demonstrations for robot manipulation","volume-title":"Proc. Conf. Robot Learn.","author":"Mandlekar","year":"2021"},{"key":"ref19","first-page":"4511","article-title":"Hyperparameter selection for imitation learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Hussenot","year":"2021"},{"key":"ref20","first-page":"8547","article-title":"Variational inverse control with events: A general framework for data-driven reward definition","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Fu","year":"2018"},{"key":"ref21","first-page":"207","article-title":"The intentional unintentional agent: Learning to solve many continuous control tasks simultaneously","volume-title":"Proc. Conf. Robot Learn.","author":"Cabi","year":"2017"},{"key":"ref22","first-page":"247","article-title":"Task-relevant adversarial imitation learning","volume-title":"Proc. Conf. Robot Learn.","author":"Zolna","year":"2021"},{"key":"ref23","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist.","author":"Ross","year":"2011"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636440"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref26","article-title":"Fighting failures with FIRE: Failure identification to reduce expert burden in intervention-based learning","author":"Ablett","year":"2020"},{"key":"ref27","first-page":"1235","article-title":"Multi-modal imitation learning from unstructured demonstrations using generative adversarial nets","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Hausman","year":"2017"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref29","article-title":"Why does hierarchy (Sometimes) work so well in reinforcement learning ?","volume-title":"Proc. Neural Inf. Process. Syst. Deep Reinforcement Learn. Workshop","author":"Nachum","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11775"},{"key":"ref31","article-title":"Directed-info GAIL: Learning hierarchical policies from unsegmented demonstrations using directed information","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Sharma","year":"2019"},{"key":"ref32","first-page":"5097","article-title":"Adversarial option-aware hierarchical imitation learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Jing","year":"2021"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3453160"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561384"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10024862\/10016714.pdf?arnumber=10016714","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T07:08:12Z","timestamp":1707808092000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10016714\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3]]},"references-count":35,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/lra.2023.3236882","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3]]}}}