{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T06:01:41Z","timestamp":1779948101279,"version":"3.53.1"},"reference-count":96,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,20]]},"DOI":"10.1109\/3dv69130.2026.00071","type":"proceedings-article","created":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T19:40:49Z","timestamp":1779910849000},"page":"681-692","source":"Crossref","is-referenced-by-count":0,"title":["Reconstructing Hand-Held Objects in 3D from Images and Videos"],"prefix":"10.1109","author":[{"given":"Jane","family":"Wu","sequence":"first","affiliation":[{"name":"UC Berkeley"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Georgios","family":"Pavlakos","sequence":"additional","affiliation":[{"name":"UT Austin"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Georgia","family":"Gkioxari","sequence":"additional","affiliation":[{"name":"Caltech"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jitendra","family":"Malik","sequence":"additional","affiliation":[{"name":"UC Berkeley"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Introducing hot3d: An egocentric dataset for 3d hand and object tracking","author":"Banerjee","year":"2024","journal-title":"arXiv preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01547"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58601-0_22"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01219"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/70.795798"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00893"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19769-7_14"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01239"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02022"},{"key":"ref10","article-title":"Handnerf: Learning to reconstruct hand-object interaction scene from a single rgb image","author":"Choi","year":"2023","journal-title":"arXiv preprint"},{"key":"ref11","article-title":"Gemini 2.5: Pushing the frontier with advanced reasoning, multimodality, long context, and next generation agentic capabilities","author":"Comanici","year":"2025","journal-title":"arXiv preprint"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr42600.2020.00508"},{"key":"ref13","first-page":"720","article-title":"Scaling egocentric vision: The epic-kitchens dataset","volume-title":"Proceedings of the European conference on computer vision (ECCV)","author":"Damen","year":"2018"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1554"},{"key":"ref16","article-title":"An image is worth $16 \\times 16$ words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52729.2023.01244"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1973.9030"},{"key":"ref19","article-title":"Hacd: Hand-aware conditional diffusion for monocular hand-held object reconstruction","author":"Fu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01627"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00988"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-025-02557-6"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00326"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01638"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1017\/cbo9780511811685"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01208"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00065"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00075"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref31","article-title":"Lrm: Large reconstruction model for single image to 3d","author":"Hong","year":"2023","journal-title":"arXiv preprint"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555401"},{"key":"ref33","article-title":"Speech & language processing","author":"Jurafsky","year":"2000","journal-title":"Pearson Education India"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01231"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/MFI.2017.8170406"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00998"},{"key":"ref38","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3618333"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref41","article-title":"One-2\u20133-45: Any single image to 3 d mesh in 45 seconds without per-shape optimization","volume":"36","author":"Liu","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02034"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00951"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10584-0_11"},{"key":"ref47","article-title":"Luma AI","volume-title":"Capture 3D","year":"2023"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2463671"},{"key":"ref50","article-title":"OpenAI","volume-title":"Gpt-4v(ision) system card","year":"2023"},{"key":"ref51","article-title":"Dinov2: Learning robust visual features without supervision","author":"Oquab","year":"2023","journal-title":"arXiv preprint"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00155"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00025"},{"key":"ref54","article-title":"Learning to imitate object interactions from internet videos","author":"Patel","year":"2022","journal-title":"arXiv preprint"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00938"},{"key":"ref56","article-title":"Dreamfusion: Text-to-3d using 2d diffusion","author":"Poole","year":"2022","journal-title":"arXiv preprint"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01143"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73229-4_8"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73229-4_11"},{"key":"ref60","first-page":"652","article-title":"Pointnet: Deep learning on point sets for 3d classification and segmentation","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Qi","year":"2017"},{"key":"ref61","article-title":"Pointnet++: Deep hierarchical feature learning on point sets in a metric space","volume":"30","author":"Ruizhongtai Qi","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01386"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"ref64","article-title":"Accelerating 3d deep learning with pytorch3d","author":"Ravi","year":"2020","journal-title":"arXiv preprint"},{"key":"ref65","article-title":"Sam 2: Segment anything in images and videos","author":"Ravi","year":"2024","journal-title":"arXiv preprint"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130883"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/iccvw54120.2021.00201"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1023\/A:1014573219977"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00989"},{"key":"ref72","article-title":"Hand-object interaction pretraining from videos","author":"Singh","year":"2024","journal-title":"arXiv preprint"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-34372-9"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_34"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1186\/s41074-017-0027-2"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/BF00129684"},{"key":"ref77","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1967.1054010"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00062"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00567"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00066"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02035"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00609"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00875"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00278"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01635"},{"key":"ref87","article-title":"Instantmesh: Efficient 3d mesh generation from a single image with sparse-view large reconstruction models","author":"Xu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00277"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02028"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00387"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.01806"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00187"},{"key":"ref93","article-title":"Predicting 4d hand trajectory from monocular videos","author":"Ye","year":"2025","journal-title":"arXiv preprint"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01245"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00953"},{"key":"ref96","article-title":"Handypriors: Physically consistent perception of hand-object interactions with differentiable priors","author":"Zhang","year":"2023","journal-title":"arXiv preprint"}],"event":{"name":"2026 International Conference on 3D Vision (3DV)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2026,3,20]]},"end":{"date-parts":[[2026,3,23]]}},"container-title":["2026 International Conference on 3D Vision (3DV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11533157\/11533158\/11533237.pdf?arnumber=11533237","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T05:01:57Z","timestamp":1779944517000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11533237\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,20]]},"references-count":96,"URL":"https:\/\/doi.org\/10.1109\/3dv69130.2026.00071","relation":{},"subject":[],"published":{"date-parts":[[2026,3,20]]}}}