{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T12:41:04Z","timestamp":1778071264873,"version":"3.51.4"},"reference-count":70,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128495","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"16050-16057","source":"Crossref","is-referenced-by-count":6,"title":["ViTa-Zero: Zero-shot Visuotactile Object 6D Pose Estimation"],"prefix":"10.1109","author":[{"given":"Hongyu","family":"Li","sequence":"first","affiliation":[{"name":"Amazon Fulfillment Technologies &#x0026; Robotics,Westborough,MA,01581"}]},{"given":"James","family":"Akl","sequence":"additional","affiliation":[{"name":"Amazon Fulfillment Technologies &#x0026; Robotics,Westborough,MA,01581"}]},{"given":"Srinath","family":"Sridhar","sequence":"additional","affiliation":[{"name":"Amazon Fulfillment Technologies &#x0026; Robotics,Westborough,MA,01581"}]},{"given":"Tye","family":"Brady","sequence":"additional","affiliation":[{"name":"Amazon Fulfillment Technologies &#x0026; Robotics,Westborough,MA,01581"}]},{"given":"Ta\u015fk\u0131n","family":"Pad\u0131r","sequence":"additional","affiliation":[{"name":"Amazon Fulfillment Technologies &#x0026; Robotics,Westborough,MA,01581"}]}],"member":"263","reference":[{"issue":"30","key":"ref1","first-page":"1","article-title":"A Review of Robot Learning for Manipulation: Challenges, Representations, and Algorithms","volume":"22","author":"Kroemer","year":"2021","journal-title":"Journal of Machine Learning Research"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adc9244"},{"key":"ref3","first-page":"297","article-title":"A System for General In-Hand Object Re-Orientation","volume-title":"Proceedings of the 5th Conference on Robot Learning. PMLR","author":"Chen","year":"2022"},{"key":"ref4","author":"Qi","year":"2023","journal-title":"General In-hand Object Rotation with Vision and Touch"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/tro.2024.3433870"},{"key":"ref6","author":"Akkaya","year":"2019","journal-title":"Solving Rubik\u2019s Cube with a Robot Hand"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160216"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00346"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00776"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00997"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00275"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01199"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02039"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01692"},{"key":"ref17","author":"Labb\u00e9","year":"2022","journal-title":"MegaPose: 6D Pose Estimation of Novel Objects via Render & Compare"},{"key":"ref18","first-page":"35 103","article-title":"OnePose++: Keypoint-Free One-Shot Object Pose Estimation without CAD Models","volume":"35","author":"He","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_18"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02636"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.067"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3313941"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3143289"},{"key":"ref25","first-page":"49 921","article-title":"VinT-6D: A Large-Scale Object-in-hand Dataset from Vision, Touch and Proprioception","volume-title":"Proceedings of the 41st International Conference on Machine Learning. PMLR","author":"Wan","year":"2024"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161264"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341688"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3337690"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3244552"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802001"},{"key":"ref31","author":"Li","year":"2025","journal-title":"V-HOP: Visuo-Haptic 6D Object Pose Tracking"},{"key":"ref32","author":"Yang","year":"2022","journal-title":"Touch and Go: Learning from Human-Collected Vision and Touch"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3146945"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2961050"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811832"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00066"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530127"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2005.1577023"},{"key":"ref39","first-page":"1015","article-title":"Tactile Object Pose Estimation from the First Touch with Geometric Contact Rendering","volume-title":"Proceedings of the 2020 Conference on Robot Learning. PMLR","author":"Villalonga","year":"2021"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160359"},{"key":"ref41","first-page":"3142","article-title":"Dexterity from Touch: Self-Supervised Pre-Training of Tactile Representations with Robotic Play","volume-title":"Proceedings of The 7th Conference on Robot Learning. PMLR","author":"Guzey"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02488"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02505"},{"key":"ref44","author":"Zhao","year":"2024","journal-title":"Transferable Tactile Transformers for Representation Learning Across Diverse Sensors and Tasks"},{"key":"ref45","author":"Lin","year":"2024","journal-title":"Learning Visuotactile Skills with Two Multifingered Hands"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.036"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10610350"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2977257"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.3390\/s17122762"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593661"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794298"},{"key":"ref52","first-page":"319","article-title":"Midas-Touch: Monte-Carlo inference over distributions across sliding touch","volume-title":"Proceedings of The 6th Conference on Robot Learning. PMLR","author":"Suresh","year":"2023"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812040"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/RoboSoft51838.2021.9479234"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2019.2959445"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01091"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3372102"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967960"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00027"},{"key":"ref60","article-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Paszke","year":"2019"},{"key":"ref61","author":"Kingma","year":"2017","journal-title":"Adam: A Method for Stochastic Optimization"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00570"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/34.121791"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914558494"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abn1944"},{"key":"ref70","author":"Li","year":"2024","journal-title":"Unifying 3D Representation and Control of Diverse Robots with a Single Camera"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128495.pdf?arnumber=11128495","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:07:55Z","timestamp":1756879675000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128495\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":70,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128495","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}