{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T16:39:15Z","timestamp":1757608755778,"version":"3.44.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128275","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"2333-2339","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Imitation Without Demonstrations via Value-Penalized Auxiliary Control from Examples"],"prefix":"10.1109","author":[{"given":"Trevor","family":"Ablett","sequence":"first","affiliation":[{"name":"University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bryan","family":"Chan","sequence":"additional","affiliation":[{"name":"University of Alberta,Department of Computing Science,Edmonton,Alberta,Canada,T6G 2E8"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jayce Haoran","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jonathan","family":"Kelly","sequence":"additional","affiliation":[{"name":"University of Toronto Institute for Aerospace Studies (UTIAS),Space &#x0026; Terrestrial Autonomous Robotic Systems (STARS) Laboratory,Toronto,Ontario,Canada,M3H 5T6"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Replacing Rewards with Examples: Example-Based Policy Search via Recursive Classification","author":"Eysenbach","year":"2021","journal-title":"Advances in Neural Information Processing Systems (NeurIPS\u201921), Virtual"},{"key":"ref2","article-title":"Why Does Hierarchy (Sometimes) Work So Well in Reinforcement Learning?","volume-title":"Proceedings of the Neural Information Processing Systems (NeurIPS\u201919) Deep Reinforcement Learning Workshop","author":"Nachum","year":"2019"},{"key":"ref3","first-page":"4344","article-title":"Learning by Playing Solving Sparse Reward Tasks from Scratch","volume-title":"Proceedings of the 35th International Conference on Machine Learning (ICML\u201918)","author":"Riedmiller"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3236882"},{"key":"ref5","article-title":"Deep Reinforcement Learning at the Edge of the Statistical Precipice","volume":"34","author":"Agarwal","year":"2021","journal-title":"Advances in Neural Information Processing Systems (Neurips\u201921)"},{"key":"ref6","article-title":"Unpacking Reward Shaping: Understanding the Benefits of Reward Engineering on Sample Complexity","volume-title":"Advances in Neural Information Processing Systems (Neurips\u201922)","author":"Gupta","year":"2022"},{"key":"ref7","first-page":"278","article-title":"Policy Invariance Under Reward Transformations: Theory and Application to Reward Shaping","volume-title":"Proceedings of the Sixteenth International Conference on Machine Learning (ICML\u201999), ser. ICML \u201999","author":"Ng"},{"journal-title":"Data-efficient Deep Reinforcement Learning for Dexterous Manipulation","year":"2017","author":"Popov","key":"ref8"},{"key":"ref9","first-page":"1094","article-title":"Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning","volume-title":"Conference on Robot Learning (CoRL\u201919), ser. Proceedings of Machine Learning Research","volume":"100","author":"Yu"},{"key":"ref10","article-title":"Hindsight Experience Replay","author":"Andrychowicz","year":"2017","journal-title":"Advances in Neural Information Processing Systems (NIPS\u201917)"},{"key":"ref11","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"International Conference on Machine Learning (ICML\u201900)","author":"Ng","year":"2000"},{"article-title":"Discriminator-Actor-Critic: Addressing Sample Inefficiency and Reward Bias in Adversarial Imitation Learning","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR\u201919)","author":"Kostrikov","key":"ref12"},{"key":"ref13","article-title":"SQIL: Imitation Learning Via Reinforcement Learning with Sparse Rewards","volume-title":"International Conference on Learning Representations (ICLR\u201920)","author":"Reddy","year":"2020"},{"key":"ref14","article-title":"Generative Adversarial Imitation Learning","volume-title":"Advances in Neural Information Processing Systems (NIPS\u201916)","author":"Ho","year":"2016"},{"article-title":"Learning Robust Rewards with Adverserial inverse Reinforcement Learning","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR\u201918)","author":"Fu","key":"ref15"},{"key":"ref16","article-title":"Variational Inverse Control with Events: A General Framework for Data-Driven Reward Definition","volume-title":"Advances in Neural Information Processing Systems (NeurIPS\u201918)","author":"Fu","year":"2018"},{"key":"ref17","first-page":"1861","article-title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning (ICML\u201918)","author":"Haarnoja"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2019.xv.073"},{"key":"ref19","article-title":"Conservative Q-Learning for Offline Reinforcement Learning","author":"Kumar","year":"2020","journal-title":"Advances in Neural Information Processing Systems (Neurips\u201920)"},{"key":"ref20","first-page":"155","article-title":"Contrastive Example-Based Control","volume-title":"Learning for Dynamics and Control (L4DC\u201923), ser. Proceedings of Machine Learning Research","volume":"211","author":"Hatch"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref22","article-title":"Sample Complexity of Goal-Conditioned Hierarchical Reinforcement Learning","volume-title":"Advances in Neural Information Processing Systems (NeurIPS\u201923)","author":"Robert","year":"2023"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref24","article-title":"Learning from Guided Play: A Scheduled Hierarchical Approach for Improving Exploration in Adversarial Imitation Learning","volume-title":"Proceedings of the Neural Information Processing Systems (NeurIPS\u201921) Deep Reinforcement Learning Workshop","author":"Ablett","year":"2021"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3366023"},{"volume-title":"Example-Driven Model-Based Reinforcement Learning for Solving Long-Horizon Visuomotor Tasks","year":"2021","author":"Wu","key":"ref26"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref27"},{"issue":"1","key":"ref28","article-title":"Boosting Soft Q-Learning by Bounding","volume":"1","author":"Adamczyk","year":"2024","journal-title":"Reinforcement Learning Journal"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01337"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.049"},{"article-title":"Exploration by Random Network Distillation","volume-title":"International Conference on Learning Representations (ICLR\u201919)","author":"Burda","key":"ref31"},{"volume-title":"Is Exploration All You Need? Effective Exploration Characteristics for Transfer in Reinforcement Learning","year":"2024","author":"Balloch","key":"ref32"},{"volume-title":"RL sandbox","year":"2020","author":"Chan","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2014.01.005"},{"key":"ref35","article-title":"Defining and characterizing reward gaming","volume-title":"Advances in Neural Information Processing Systems (NeurIPS\u201922)","author":"Skalse","year":"2022"},{"article-title":"QT-Opt: Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation","volume-title":"Conference on Robot Learning (CoRL\u201918)","author":"Kalashnikov","key":"ref36"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2025,5,19]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128275.pdf?arnumber=11128275","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:18:23Z","timestamp":1756880303000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128275\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128275","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}