{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T12:44:05Z","timestamp":1752669845653},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9561927","type":"proceedings-article","created":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T20:28:35Z","timestamp":1634675315000},"page":"14284-14290","source":"Crossref","is-referenced-by-count":11,"title":["Reward Machines for Vision-Based Robotic Manipulation"],"prefix":"10.1109","author":[{"given":"Alberto","family":"Camacho","sequence":"first","affiliation":[]},{"given":"Jacob","family":"Varley","sequence":"additional","affiliation":[]},{"given":"Andy","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Deepali","family":"Jain","sequence":"additional","affiliation":[]},{"given":"Atil","family":"Iscen","sequence":"additional","affiliation":[]},{"given":"Dmitry","family":"Kalashnikov","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"On the generalization of equivariance and convolution in neural networks to the action of compact groups","author":"kondor","year":"2018","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref31","first-page":"159","article-title":"Non-markovian rewards expressed in LTL: guiding search via reward shaping","author":"camacho","year":"2017","journal-title":"Proceedings of the 10th International Symposium on Combinatorial Search (SoCS)"},{"key":"ref30","first-page":"901","article-title":"Weight normalization: A simple reparameterization to accelerate training of deep neural networks","author":"salimans","year":"2016","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"article-title":"TF-Agents: A library for reinforcement learning in tensorflow","year":"2018","author":"guadarrama","key":"ref37"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2020.XVI.035"},{"article-title":"Good robot!\": Efficient reinforcement learning for multi-step visual tasks via reward shaping","year":"2019","author":"hundt","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref10","first-page":"1008","article-title":"Self-consistent trajectory autoencoder: Hierarchical reinforcement learning with trajectory embeddings","author":"co-reyes","year":"2018","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref11","article-title":"Towards a unified theory of state abstraction for MDPs","author":"li","year":"2006","journal-title":"International Symposium on Artificial Intelligence and Mathematics"},{"key":"ref12","first-page":"2915","article-title":"Near optimal behavior via approximate state abstraction","author":"abel","year":"2016","journal-title":"Proc Int Conf Machine Learn (ICML)"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(02)00376-4"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013134"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/840"},{"key":"ref16","first-page":"15497","article-title":"Learning reward machines for partially observable reinforcement learning","author":"toro icarte","year":"2019","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref17","first-page":"291","article-title":"Learning a visuomotor controller for real world robotic grasping using simulated depth images","author":"viereck","year":"2017","journal-title":"Proceedings of the 1st Annual Conference on Robot Learning (CoRL)"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917710318"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914549607"},{"key":"ref28","first-page":"4482","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Intl Conf on Machine Learning (ICML)"},{"key":"ref4","first-page":"2112","article-title":"Using reward machines for high-level task specification and decomposition in reinforcement learning","author":"toro icarte","year":"2018","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref27","first-page":"621","article-title":"Learning interpretable models in linear temporal logic","author":"camacho","year":"2019","journal-title":"Proceedings of the 29th International Conference on Automated Planning and Scheduling (ICAPS)"},{"article-title":"Qt-opt: Scalable deep reinforcement learning for vision-based robotic manipulation","year":"2018","author":"kalashnikov","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"article-title":"Layer normalization","year":"2016","author":"ba","key":"ref29"},{"key":"ref5","article-title":"Task decomposition in reinforcement learning","author":"karlsson","year":"1994","journal-title":"Proceedings of the AAAI Spring Symposium on Goal-Driven Learning"},{"key":"ref8","first-page":"3686","article-title":"MCP: Learning composable hierarchical control with multiplicative compositional policies","author":"peng","year":"2019","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref7","article-title":"Hierarchical visuomotor control of humanoids","author":"merel","year":"2019","journal-title":"Proceedings of the 7th International Conference on Learning Representations (ICLR)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630937"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968263"},{"article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","year":"2017","author":"popov","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593986"},{"article-title":"Solving rubik&#x2019;s cube with a robot hand","year":"2019","author":"akkaya","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2988642"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48989-6_18"},{"article-title":"Go-explore: a new approach for hard-exploration problems","year":"2019","author":"ecoffet","key":"ref25"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2021,5,30]]},"location":"Xi'an, China","end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09561927.pdf?arnumber=9561927","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T11:47:13Z","timestamp":1652183233000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9561927\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9561927","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}