{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T10:06:59Z","timestamp":1766138819713,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610870","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"6448-6454","source":"Crossref","is-referenced-by-count":5,"title":["InterRep: A Visual Interaction Representation for Robotic Grasping"],"prefix":"10.1109","author":[{"given":"Yu","family":"Cui","sequence":"first","affiliation":[{"name":"Zhejiang University,College of Control Science and Engineering,Hangzhou,China,310027"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Ye","sequence":"additional","affiliation":[{"name":"Zhejiang University,College of Control Science and Engineering,Hangzhou,China,310027"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qingtao","family":"Liu","sequence":"additional","affiliation":[{"name":"Zhejiang University,College of Control Science and Engineering,Hangzhou,China,310027"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anjun","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University,College of Control Science and Engineering,Hangzhou,China,310027"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gaofeng","family":"Li","sequence":"additional","affiliation":[{"name":"Zhejiang University,College of Control Science and Engineering,Hangzhou,China,310027"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiming","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University,College of Control Science and Engineering,Hangzhou,China,310027"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"International conference on machine learning","author":"Chen"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref3"},{"issue":"240","key":"ref4","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery","year":"2023","journal-title":"Journal of Machine Learning Research"},{"key":"ref5","first-page":"892","article-title":"R3m: A universal visual representation for robot manipulation","volume-title":"Conference on Robot Learning","author":"Nair"},{"key":"ref6","first-page":"17 359","article-title":"The unsurprising effectiveness of pre-trained vision models for control","volume-title":"International Conference on Machine Learning","author":"Parisi"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref8","first-page":"416","article-title":"Real-world robot learning with masked visual pre-training","volume-title":"Conference on Robot Learning","author":"Radosavovic"},{"article-title":"Masked visual pre-training for motor control","year":"2022","author":"Xiao","key":"ref9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.032"},{"article-title":"Liv: Language-image representations and rewards for robotic control","year":"2023","author":"Ma","key":"ref11"},{"article-title":"Viola: Imitation learning for vision-based manipulation with object proposal priors","year":"2022","author":"Zhu","key":"ref12"},{"key":"ref13","first-page":"55","article-title":"Graph inverse reinforcement learning from diverse videos","volume-title":"Conference on Robot Learning","author":"Kumar"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630771"},{"article-title":"Learning invariant representations for reinforcement learning without reconstruction","year":"2020","author":"Zhang","key":"ref15"},{"key":"ref16","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","volume":"28","author":"Ren","year":"2015","journal-title":"Advances in neural information processing systems"},{"article-title":"Multi-View Domain Adaptive Object Detection in Surveillance Cameras","volume-title":"AAAI Conference on Artificial Intelligence (AAAI)","author":"Lu","key":"ref17"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"ref21","first-page":"618","article-title":"Learning generalizable dexterous manipulation from human grasp affordance","volume-title":"CoRL","author":"Wu","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_37"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342334"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_34"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197124"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130883"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"article-title":"Dextransfer: Real world multi-fingered dexterous grasping with minimal human demonstrations","year":"2022","author":"Chen","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2012.6225116"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530103"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref33","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Conference on robot learning","author":"Yu"},{"key":"ref34","article-title":"Where are we in the search for an artificial visual cortex for embodied intelligence?","volume":"36","author":"Majumdar","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"For pre-trained vision models in motor control, not all policy learning methods are created equal","year":"2023","author":"Hu","key":"ref35"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610870.pdf?arnumber=10610870","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:22:49Z","timestamp":1723267369000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610870\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610870","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}