{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:38:12Z","timestamp":1740101892023,"version":"3.37.3"},"reference-count":75,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003006","name":"ETH","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003006","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10161415","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"10189-10196","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Learning of High Level Plans from Play"],"prefix":"10.1109","author":[{"given":"N\u00faria Armengol","family":"Urp\u00ed","sequence":"first","affiliation":[{"name":"ETH Zurich,Department of Computer Science,Switzerland"}]},{"given":"Marco","family":"Bagatella","sequence":"additional","affiliation":[{"name":"ETH Zurich,Department of Computer Science,Switzerland"}]},{"given":"Otmar","family":"Hilliges","sequence":"additional","affiliation":[{"name":"ETH Zurich,Department of Computer Science,Switzerland"}]},{"given":"Georg","family":"Martius","sequence":"additional","affiliation":[{"name":"Max Planck Institute for Intelligent Systems,T&#x00FC;bingen,Germany"}]},{"given":"Stelian","family":"Coros","sequence":"additional","affiliation":[{"name":"ETH Zurich,Department of Computer Science,Switzerland"}]}],"member":"263","reference":[{"volume-title":"Optimization-based motion planning for legged robots","year":"2018","author":"Winkler","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2798285"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2014.7041375"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1177\/02783649221102473"},{"key":"ref5","article-title":"Broadly-exploring, local-policy trees for long-horizon task planning","volume-title":"5th Annual Conference on Robot Learning","author":"Sermanet","year":"2021"},{"key":"ref6","article-title":"Search on the replay buffer: Bridging planning and reinforcement learning","volume":"32","author":"Eysenbach","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN47096.2020.9223504"},{"key":"ref8","article-title":"Mt-opt: Continuous multi-task robotic reinforcement learning at scale","author":"Kalashnkov","year":"2021","journal-title":"arXiv preprint"},{"key":"ref9","article-title":"Solving rubiks cube with a robot hand","author":"Akkaya","year":"2019","journal-title":"arXiv preprint"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref11","article-title":"Qt-opt: Scalable deep reinforcement learning for vision-basedrobotic manipulation (2018)","author":"Kalashnikov","year":"2018","journal-title":"arXiv preprint"},{"key":"ref12","article-title":"Data-efficient hi-erarchical reinforcement learning","volume":"31","author":"Nachum","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref13","article-title":"Hierarchical reinforcement learning with hindsight","author":"Levy","year":"2018","journal-title":"arXiv preprint"},{"journal-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning","first-page":"2016","author":"Coumans","key":"ref14"},{"key":"ref15","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/70.508439"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1177\/02783640122067453"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1177\/0278364911406761"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2009.5152817"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914528132"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913484072"},{"key":"ref22","article-title":"Logic-geometric programming: An optimization-based approach to combined task and motion planning","volume-title":"Twenty-Fourth International Joint Conference on Artificial Intelligence","author":"Toussaint","year":"2015"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989464"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-091420-084139"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981797"},{"key":"ref26","article-title":"One-shot imitation learning","volume":"30","author":"Duan","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022140919877"},{"key":"ref28","article-title":"Feudal reinforcement learning","volume":"5","author":"Dayan","year":"1992","journal-title":"Advances in neural information processing systems"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1177\/105971239700600202"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1613\/jair.639"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812140"},{"key":"ref33","first-page":"1094","article-title":"Learning to achieve goals","volume-title":"IN PROC. OF IJCAI-93","author":"Kaelbling","year":"1993"},{"key":"ref34","article-title":"Hind-sight experience replay","volume":"30","author":"Andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref35","article-title":"Relay policy learning: Solving long horizon tasks via imitation and rein-forcement learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Gupta","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3060403"},{"article-title":"Deep reinforcement learning in parameterized action space","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Hausknecht","key":"ref37"},{"key":"ref38","first-page":"21 847","article-title":"Accelerating robotic reinforcement learning via parameterized action primitives","volume":"34","author":"Dalal","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10226"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561315"},{"key":"ref41","first-page":"589","article-title":"Motion planner augmented reinforcement learning for robot manipulation in obstructed environments","volume-title":"Proceedings of the 2020 Conference on Robot Learning","volume":"155","author":"Yamada","year":"2021"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2972794"},{"key":"ref43","article-title":"A new challenge for reinforcement learning","volume":"5","author":"Vinyals","year":"2017","journal-title":"arXivpreprint"},{"key":"ref44","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019","journal-title":"arXiv preprint"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6144"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/650"},{"key":"ref47","article-title":"Learn what not to learn: Action elimination with deep reinforcement learning","volume":"31","author":"Zahavy","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref48","first-page":"162","article-title":"Action elimination and stopping conditions for reinforcement learning","volume-title":"Proceedings of the 20th International Conference on Machine Learning (ICML-03)","author":"Even-Dar","year":"2003"},{"issue":"2","key":"ref49","first-page":"67","volume-title":"The theory of affordances","volume":"1","author":"Gibson","year":"1977"},{"key":"ref50","article-title":"What can i do here? a theory of affordances in reinforcement learning","author":"Khetarpal","year":"2020","journal-title":"ICML"},{"key":"ref51","article-title":"Possibility before utility: Learning and using hierarchical affordances","volume-title":"International Conference on Learning Representations","author":"Costales","year":"2022"},{"key":"ref52","article-title":"Accelerating reinforcement learning with learned skill priors","volume-title":"Conference on Robot Learning (CoRL)","author":"Pertsch","year":"2020"},{"key":"ref53","article-title":"Demonstration-guided reinforcement learning with learned skills","volume-title":"5th Conference on Robot Learning","author":"Pertsch","year":"2021"},{"key":"ref54","article-title":"Parrot: Data-driven behavioral priors for reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Singh","year":"2021"},{"issue":"221","key":"ref55","first-page":"1","article-title":"Behavior priors for efficient reinforcement learning","volume":"23","author":"Tirumala","year":"2022","journal-title":"Journal of MachineLearning Research"},{"key":"ref56","article-title":"Learning latent plans from play","volume-title":"Conference on Robot Learning (CoRL)","author":"Lynch","year":"2019"},{"volume-title":"Spot\u00ae | Boston Dynamics","year":"2022","key":"ref57"},{"key":"ref58","article-title":"Do as i can and not as i say: Grounding language in robotic affordances","author":"Ahn","year":"2022","journal-title":"arXiv preprint"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3146589"},{"key":"ref60","article-title":"Constructing skill trees for reinforcement learning agents from demonstration trajectories","volume":"23","author":"Konidaris","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139383"},{"key":"ref62","first-page":"1312","article-title":"Universal value function approximators","volume-title":"International conference on machine learning","author":"Schaul","year":"2015"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1002\/SERIES1345"},{"volume-title":"On the sample complexity of reinforcement learning","year":"2003","author":"Kakade","key":"ref64"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-34106-9_26"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143955"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/cdc.1995.478953"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.5555\/3016100.3016191"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref71","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International conference on machine learning","author":"Fujimoto","year":"2018"},{"volume-title":"Reinforcement learning for robots using neural networks","year":"1992","author":"Lin","key":"ref72"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981093"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561835"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2023,5,29]]},"location":"London, United Kingdom","end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161415.pdf?arnumber=10161415","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T11:46:36Z","timestamp":1709293596000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10161415\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":75,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10161415","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}