{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:30:19Z","timestamp":1766068219808,"version":"3.44.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128322","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"8284-8290","source":"Crossref","is-referenced-by-count":3,"title":["R+X: Retrieval and Execution from Everyday Human Videos"],"prefix":"10.1109","author":[{"given":"Georgios","family":"Papagiannis","sequence":"first","affiliation":[{"name":"Imperial College London,The Robot Learning Lab"}]},{"given":"Norman","family":"Di Palo","sequence":"additional","affiliation":[{"name":"Imperial College London,The Robot Learning Lab"}]},{"given":"Pietro","family":"Vitiello","sequence":"additional","affiliation":[{"name":"Imperial College London,The Robot Learning Lab"}]},{"given":"Edward","family":"Johns","sequence":"additional","affiliation":[{"name":"Imperial College London,The Robot Learning Lab"}]}],"member":"263","reference":[{"volume-title":"Apple vision pro.","key":"ref1"},{"volume-title":"Meta. Meta quest 3","key":"ref2"},{"volume-title":"Ray-ban smart glasses","author":"Ray-Ban","key":"ref3"},{"volume-title":"MagicLeap. Magicleap","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.052"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.026"},{"volume-title":"Bc-z: Zero-shot task generalization with robotic imitation learning","year":"2022","author":"Jang","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801982"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.078"},{"key":"ref10","article-title":"Vision-based manipulation from single human video with open-world object graphs","author":"Zhu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.043"},{"key":"ref12","article-title":"RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control","author":"Brohan","year":"2023","journal-title":"arXiv e-prints"},{"key":"ref13","first-page":"8469","article-title":"PaLM-e: An embodied multimodal language model","volume-title":"Proceedings of the 40th International Conference on Machine Learning","volume":"202","author":"Driess"},{"key":"ref14","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Brown","year":"2020"},{"journal-title":"Gemini-Team","article-title":"Gemini: A family of highly capable multimodal models","year":"2023","key":"ref15"},{"key":"ref16","article-title":"Large Language Models as General Pattern Machines","author":"Mirchandani","year":"2023","journal-title":"arXiv e-prints"},{"key":"ref17","article-title":"Training Compute-Optimal Large Language Models","author":"Hoffmann","year":"2022","journal-title":"arXiv e-prints"},{"journal-title":"Open x-embodiment: Robotic learning datasets and rt-x models","year":"2023","key":"ref18"},{"key":"ref19","article-title":"Mimicplay: Long-horizon imitation learning by watching human play","author":"Wang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.096"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130883"},{"journal-title":"G. Team","article-title":"Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context","year":"2024","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"volume-title":"Deep vit features as dense visual descriptors","year":"2022","author":"Amir","key":"ref24"},{"volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2021","author":"Dosovitskiy","key":"ref25"},{"journal-title":"arXiv e-prints","article-title":"GPT-4 Technical Report","year":"2023","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00938"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.90"},{"volume-title":"R3m: A universal visual representation for robot manipulation","year":"2022","author":"Nair","key":"ref29"},{"volume-title":"Octo: An open-source generalist robot policy","year":"2024","author":"Team","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3367329"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2025,5,19]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128322.pdf?arnumber=11128322","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:15:25Z","timestamp":1756880125000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128322\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128322","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}