{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T03:24:02Z","timestamp":1769916242568,"version":"3.49.0"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10161493","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"5938-5945","source":"Crossref","is-referenced-by-count":14,"title":["Dexterous Manipulation from Images: Autonomous Real-World RL via Substep Guidance"],"prefix":"10.1109","author":[{"given":"Kelvin","family":"Xu","sequence":"first","affiliation":[{"name":"UC Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zheyuan","family":"Hu","sequence":"additional","affiliation":[{"name":"UC Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ria","family":"Doshi","sequence":"additional","affiliation":[{"name":"UC Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aaron","family":"Rovinsky","sequence":"additional","affiliation":[{"name":"UC Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vikash","family":"Kumar","sequence":"additional","affiliation":[{"name":"Meta AI Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abhishek","family":"Gupta","sequence":"additional","affiliation":[{"name":"University of Washington"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[{"name":"UC Berkeley"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913506757"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.mechatronics.2018.02.009"},{"key":"ref12","article-title":"Policy search for motor primitives in robotics","volume":"21","author":"kober","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2157689.2157815"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SIMPAR.2018.8376268"},{"key":"ref37","author":"zhu","year":"2020","journal-title":"The ingredients of real-world robotic reinforcement learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"ref36","article-title":"Variational inverse control with events: A general framework for data-driven reward definition","author":"fu","year":"2018","journal-title":"ar Xiv preprint ar Xiv"},{"key":"ref31","first-page":"330","article-title":"Better-than-demonstrator imitation learning via automatically-ranked demonstrations","author":"brown","year":"2020","journal-title":"Conference on Robot Learning"},{"key":"ref30","first-page":"342","article-title":"Learning multimodal rewards from rankings","author":"myers","year":"2022","journal-title":"Conference on Robot Learning"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907864"},{"key":"ref33","article-title":"Hierarchical actor-critic","volume":"abs 1712 948","author":"levy","year":"2017","journal-title":"CoRR"},{"key":"ref10","article-title":"Contact-invariant optimization for hand manipulation","author":"mordatch","year":"2012","journal-title":"Proceedings of the ACM SIG-GRAPH\/Eurographics symposium on computer animation Euro-graphics Association"},{"key":"ref32","article-title":"Data-efficient hierarchical reinforcement learning","volume":"31","author":"nachum","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/HUMANOIDS.2015.7363524"},{"key":"ref39","first-page":"1437","article-title":"A comprehensive survey on safe rein-forcement learning","volume":"16","author":"garcia","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref16","author":"allshire","year":"2021","journal-title":"Transferring dexterous manipulation from gpu simulation to a remote real-world trifinger"},{"key":"ref38","article-title":"Exploration by random network distillation","author":"burda","year":"2019","journal-title":"7th International Conference on Learning Representations ICLR 2019"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794033"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794102"},{"key":"ref24","article-title":"Maximum entropy inverse reinforcement learning","author":"ziebart","year":"2008","journal-title":"AAAI"},{"key":"ref46","author":"eysenbach","year":"2017","journal-title":"Leave no trace Learning to reset for safe and autonomous reinforcement learning"},{"key":"ref23","author":"reddy","year":"2019","journal-title":"Sqil Imitation learning via reinforcement learning with sparse rewards"},{"key":"ref45","author":"singh","year":"2019","journal-title":"End-to-End Robotic Reinforcement Learning without Reward Engineering"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143936"},{"key":"ref25","author":"wulfmeier","year":"2015","journal-title":"Maximum entropy deep inverse reinforcement learning"},{"key":"ref20","author":"mandikal","year":"2020","journal-title":"Dexterous robotic grasping with object-centric visual affordances"},{"key":"ref42","author":"hiraoka","year":"2021","journal-title":"Dropout q-functions for doubly efficient reinforcement learning"},{"key":"ref41","first-page":"6346","article-title":"Mural: Meta-learning uncertainty-aware rewards for outcome-driven reinforcement learning","volume":"139","author":"li","year":"2021","journal-title":"Proceedings of the 38th International Conference on Machine Learning ser Proceedings of Machine Learning Research"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630809"},{"key":"ref44","author":"haarnoja","year":"2018","journal-title":"Soft actor-critic algorithms and applications"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197181"},{"key":"ref43","author":"kostrikov","year":"2020","journal-title":"Image augmentation is all you need Regularizing deep reinforcement learning from pixels"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460854"},{"key":"ref27","first-page":"123","article-title":"Including uncertainty when learning from human corrections","author":"losey","year":"2018","journal-title":"Conference on Robot Learning"},{"key":"ref29","article-title":"Guiding policies with language via meta-learning","volume":"abs 1811 7882","author":"co-reyes","year":"2018","journal-title":"CoRR"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561384"},{"key":"ref7","author":"kumar","year":"2016","journal-title":"Learning Dexterous Manipulation Policies from Experience and Imitation"},{"key":"ref9","first-page":"1101","article-title":"Deep dynamics models for learning dexterous manipulation","author":"nagabandi","year":"2020","journal-title":"Conference on Robot Learning"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref3","article-title":"Learning dexterous in-hand manipulation","volume":"abs 1808 177","author":"openai","year":"2018","journal-title":"CoRR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1561\/2300000053"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref40","article-title":"Learning to be safe: Deep rl with a safety critic","author":"srinivasan","year":"2020","journal-title":"ar Xiv preprint ar Xiv"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161493.pdf?arnumber=10161493","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:32:29Z","timestamp":1690219949000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10161493\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10161493","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}