{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T16:36:59Z","timestamp":1757608619074,"version":"3.44.0"},"reference-count":72,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["1750649,2107256,2314182,2409351"],"award-info":[{"award-number":["1750649,2107256,2314182,2409351"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128466","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"4797-4805","source":"Crossref","is-referenced-by-count":0,"title":["On-Robot Reinforcement Learning with Goal-Contrastive Rewards"],"prefix":"10.1109","author":[{"given":"Ondrej","family":"Biza","sequence":"first","affiliation":[{"name":"Robotics and AI Institute (Boston Dynamics AI Institute LLC)"}]},{"given":"Thomas","family":"Weng","sequence":"additional","affiliation":[{"name":"Robotics and AI Institute (Boston Dynamics AI Institute LLC)"}]},{"given":"Lingfeng","family":"Sun","sequence":"additional","affiliation":[{"name":"Robotics and AI Institute (Boston Dynamics AI Institute LLC)"}]},{"given":"Karl","family":"Schmeckpeper","sequence":"additional","affiliation":[{"name":"Robotics and AI Institute (Boston Dynamics AI Institute LLC)"}]},{"given":"Tarik","family":"Kelestemur","sequence":"additional","affiliation":[{"name":"Robotics and AI Institute (Boston Dynamics AI Institute LLC)"}]},{"given":"Yecheng Jason","family":"Ma","sequence":"additional","affiliation":[{"name":"University of Pennsylvania"}]},{"given":"Robert","family":"Platt","sequence":"additional","affiliation":[{"name":"Robotics and AI Institute (Boston Dynamics AI Institute LLC)"}]},{"given":"Jan-Willem","family":"van de Meent","sequence":"additional","affiliation":[{"name":"University of Amsterdam"}]},{"given":"Lawson L.S.","family":"Wong","sequence":"additional","affiliation":[{"name":"Northeastern University, Khoury College of Computer Sciences"}]}],"member":"263","reference":[{"volume-title":"Continuously Improving Mobile Manipulation with Autonomous Real-World RL.","year":"2024","author":"Mendonca","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812140"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2022.xviii.071"},{"key":"ref4","first-page":"1577","article-title":"Efficient online re-inforcement learning with offline data","volume-title":"International Conference on Machine Learning, ICML 2023, 23\u201329 July 2023, Honolulu, Hawaii, USA, ser. Proceedings of Machine Learning Research","volume":"202","author":"Ball","year":"2023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.056"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610421"},{"key":"ref7","article-title":"Practice makes perfect: Planning to learn skill parameter policies","volume":"abs\/2402.15025","author":"Kumar","year":"2024","journal-title":"CoRR"},{"key":"ref8","article-title":"RT-2: vision- language-action models transfer web knowledge to robotic control","volume":"abs\/2307.15818","author":"Brohan","year":"2023","journal-title":"CoRR"},{"key":"ref9","first-page":"120","article-title":"Vision-language models as success detectors","volume-title":"Conference on Lifelong Learning Agents, 22\u201325 August 2023, McGill University, Montreal, Quebec, Canada, ser. Proceedings of Machine Learning Research","volume":"232","author":"Du","year":"2023"},{"key":"ref10","first-page":"540","article-title":"Voxposer: Composable 3d value maps for robotic manipulation with language models","volume-title":"Conference on Robot Learning, CoRL 2023, 6\u2013 9 November 2023, Atlanta, GA, USA, ser. Proceedings of Machine Learning Research","volume":"229","author":"Huang","year":"2023"},{"key":"ref11","first-page":"374","article-title":"Language to rewards for robotic skill synthesis","volume-title":"Conference on Robot Learning, CoRL 2023, 6\u20139 November 2023, Atlanta, GA, USA, ser. Proceedings of Machine Learning Research","volume":"229","author":"Yu","year":"2023"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462891"},{"key":"ref13","first-page":"892","article-title":"R3M: A universal visual representation for robot manipulation","volume-title":"Conference on Robot Learning, CoRL 2022, 14\u201318 December 2022, Auckland, New Zealand, ser. Proceedings of Machine Learning Research","volume":"205","author":"Nair","year":"2022"},{"key":"ref14","article-title":"VIP: towards universal visual reward and representation via value-implicit pre-training","volume-title":"The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1\u20135, 2023","author":"Ma","year":"2023"},{"key":"ref15","first-page":"23301","article-title":"LIV: language-image representations and rewards for robotic control","volume-title":"International Conference on Machine Learning, ICML 2023, 23\u201329 July 2023, Honolulu, Hawaii, USA, ser. Proceedings of Machine Learning Research","volume":"202","author":"Ma","year":"2023"},{"key":"ref16","first-page":"11321","article-title":"Reinforcement learning from passive data via latent intentions","volume-title":"International Conference on Machine Learning, ICML 2023, 23\u201329 July 2023, Honolulu, Hawaii, USA, ser. Proceedings of Machine Learning Research","volume":"202","author":"Ghosh","year":"2023"},{"key":"ref17","article-title":"Robotic offline RL from internet videos via value-function pre-training","volume":"abs\/2309.13041","author":"Bhateja","year":"2023","journal-title":"CoRR"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"ref19","article-title":"Scaling egocentric vision: The epic-kitchens dataset","volume-title":"European Conference on Computer Vision (ECCV)","author":"Damen","year":"2018"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.120"},{"key":"ref21","article-title":"Open x-embodiment: Robotic learning datasets and RT-X models","volume":"abs\/2310.08864","author":"Collaboration","year":"2023","journal-title":"CoRR"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2017.xiii.050"},{"key":"ref23","first-page":"8547","article-title":"Variational inverse control with events: A general framework for data-driven reward definition","volume-title":"Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3\u20138, 2018, Montreal, Canada","author":"Fu","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2019.xv.073"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610040"},{"key":"ref26","first-page":"1678","article-title":"What matters in learning from offline human demonstrations for robot manipulation","volume-title":"Conference on Robot Learning, 8\u201311 November 2021, London, UK, ser. Proceedings of Machine Learning Research","volume":"164","author":"Mandlekar","year":"2021"},{"key":"ref27","first-page":"1820","article-title":"Mimicgen: A data generation system for scalable robot learning using human demonstrations","volume-title":"Conference on Robot Learning, CoRL 2023, 6\u20139 November 2023, Atlanta, GA, USA, ser. Proceedings of Machine Learning Research","volume":"229","author":"Mandlekar","year":"2023"},{"key":"ref28","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume-title":"Proceedings of the Twenty- Third AAAI Conference on Artificial Intelligence, AAAI 2008, Chicago, Illinois, USA, July 13\u201317, 2008","author":"Ziebart","year":"2008"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref30","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proceedings of the Seventeenth International Conference on Machine Learning (ICML 2000), Stanford University, Stanford, CA, USA, June 29 - July 2, 2000","author":"Ng","year":"2000"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/279943.279964"},{"key":"ref32","first-page":"19","article-title":"Nonlinear inverse reinforcement learning with gaussian processes","volume-title":"Advances in Neural Information Processing Systems 24: 25th Annual Conference on Neural Information Processing Systems 2011. Proceedings of a meeting held 12\u201314 December 2011","author":"Levine"},{"key":"ref33","article-title":"Maximum entropy deep inverse reinforcement learning","author":"Wulfmeier","year":"2015","journal-title":"CoRR, vol. abs\/1507.04888"},{"key":"ref34","first-page":"49","article-title":"Guided cost learning: Deep inverse optimal control via policy optimization","volume-title":"Proceedings of the 33nd International Conference on Machine Learning, ICML 2016, New York City, NY, USA, June 19\u201324, 2016 ser. JMLR Workshop and Conference Proceedings","volume":"48","author":"Finn","year":"2016"},{"key":"ref35","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Advances in Neural Information Processing Systems 29: Annual Con-ference on Neural Information Processing Systems 2016, December 5\u2013 10, 2016, Barcelona, Spain","author":"Ho"},{"key":"ref36","article-title":"Learning robust rewards with adversarial inverse reinforcement learning","volume":"abs\/1710.11248","author":"Fu","year":"2017","journal-title":"CoRR"},{"key":"ref37","article-title":"SQIL: imitation learning via reinforcement learning with sparse rewards","volume-title":"8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26\u201330, 2020","author":"Reddy","year":"2020"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610873"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00228"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636080"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_33"},{"key":"ref42","first-page":"654","article-title":"Videodex: Learning dexterity from internet videos","volume-title":"Conference on Robot Learning, CoRL 2022, 14\u201318 December 2022, Auckland, New Zealand, ser. Proceedings of Machine Learning Research","volume":"205","author":"Shaw","year":"2022"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00329"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.026"},{"key":"ref45","article-title":"Zero- shot robot manipulation from passive human videos","volume":"abs\/2302.02011","author":"Bharadhwaj","year":"2023","journal-title":"CoRR"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01324"},{"key":"ref47","article-title":"R+X: retrieval and execution from everyday human videos","volume":"abs\/2407.12957","author":"Papagiannis","year":"2024","journal-title":"CoRR"},{"key":"ref48","article-title":"Mimicplay: Long-horizon imitation learning by watching human play","volume":"abs\/2302.12422","author":"Wang","year":"2023","journal-title":"CoRR"},{"key":"ref49","article-title":"Look ma, no hands! agent- environment factorization of egocentric videos","volume-title":"Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 \u2013 16, 2023","author":"Chang","year":"2023"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.024"},{"key":"ref51","article-title":"Reinforcement learning with videos: Combining offline observations with interaction","author":"Schmeckpeper","year":"2020","journal-title":"arXiv preprint"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_42"},{"key":"ref53","first-page":"19561","article-title":"Reinforcement learning with action-free pre-training from videos","volume-title":"International Conference on Machine Learning, ICML 2022, 17\u201323 July 2022, Baltimore, Maryland, USA, ser. Proceedings of Machine Learning Research","volume":"162","author":"Seo","year":"2022"},{"key":"ref54","article-title":"Learning to imitate object interactions from internet videos","volume":"abs\/2211.13225","author":"Patel","year":"2022","journal-title":"CoRR"},{"volume-title":"Reinforcement Learning: An Introduction.","year":"2018","author":"Sutton","key":"ref55"},{"key":"ref56","article-title":"Algaedice: Policy gradient from arbitrary experience","volume":"abs\/1912.02074","author":"Nachum","year":"2019","journal-title":"CoRR"},{"key":"ref57","article-title":"Representation learning with contrastive predictive coding","volume":"abs\/1807.03748","author":"van den Oord","year":"2018","journal-title":"CoRR"},{"key":"ref58","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proceedings of the Sixteenth International Conference on Machine Learning (ICML 1999), Bled, Slovenia, June 27 \u2013 30, 1999","author":"Ng","year":"1999"},{"key":"ref59","first-page":"12765","article-title":"Phasic self-imitative reduction for sparse-reward goal-conditioned reinforcement learning","volume-title":"International Conference on Machine Learning, ICML 2022, 17\u201323 July 2022, Baltimore, Maryland, USA, ser. Proceedings of Machine Learning Research","volume":"162","author":"Li","year":"2022"},{"key":"ref60","first-page":"16118","article-title":"Generalizable imitation learning from observation via inferring goal proximity","volume-title":"Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6\u201314, 2021, virtual","author":"Lee","year":"2021"},{"key":"ref61","first-page":"463","article-title":"Learning to drive a bicycle using reinforcement learning and shaping","volume-title":"Proceedings of the Fifteenth International Conference on Machine Learning (ICML 1998), Madi-son, Wisconsin, USA, July 24\u201327, 1998","author":"Randlov","year":"1998"},{"volume-title":"Zeromq: An open-source universal messaging library","key":"ref62"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref65","article-title":"GPT-4 technical report","volume":"abs\/2303.08774","year":"2023","journal-title":"CoRR"},{"key":"ref66","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"3rd Annual Conference on Robot Learning, CoRL 2019, Osaka, Japan, October 30 - November 1, 2019, Proceedings, ser. Proceedings of Machine Learning Research","volume":"100","author":"Yu","year":"2019"},{"key":"ref67","article-title":"Learning fine-grained view-invariant rep-resentations from unpaired ego-exo videos via temporal alignment","volume-title":"Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 \u2013 16, 2023","author":"Xue","year":"2023"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02084"},{"key":"ref69","article-title":"Image augmentation is all you need: Regularizing deep reinforcement learning from pixels","volume-title":"9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3\u20137, 2021","author":"Yarats","year":"2021"},{"key":"ref70","article-title":"Where are we in the search for an artificial visual cortex for embodied intelligence?","volume-title":"Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 \u2013 16, 2023","author":"Majumdar","year":"2023"},{"key":"ref71","article-title":"Dinov2: Learning robust visual features without supervision","volume":"2024","author":"Oquab","year":"2024","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref72","article-title":"Theia: Distilling diverse vision foundation models for robot learning","volume":"abs\/2407.20179","author":"Shang","year":"2024","journal-title":"CoRR"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2025,5,19]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128466.pdf?arnumber=11128466","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:08:00Z","timestamp":1756879680000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128466\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":72,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128466","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}