{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,4]],"date-time":"2025-10-04T08:00:09Z","timestamp":1759564809547,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,29]],"date-time":"2022-11-29T00:00:00Z","timestamp":1669680000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,29]]},"DOI":"10.1145\/3550469.3555401","type":"proceedings-article","created":{"date-parts":[[2022,11,30]],"date-time":"2022-11-30T11:07:54Z","timestamp":1669806474000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["Reconstructing Hand-Held Objects from Monocular Video"],"prefix":"10.1145","author":[{"given":"Di","family":"Huang","sequence":"first","affiliation":[{"name":"The University of Sydney, Australia"}]},{"given":"Xiaopeng","family":"Ji","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang Univerisity, China"}]},{"given":"Xingyi","family":"He","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang Univerisity, China"}]},{"given":"Jiaming","family":"Sun","sequence":"additional","affiliation":[{"name":"Image Derivative Inc., China"}]},{"given":"Tong","family":"He","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, China"}]},{"given":"Qing","family":"Shuai","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang Univerisity, China"}]},{"given":"Wanli","family":"Ouyang","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory, China and The University of Sydney, Australia"}]},{"given":"Xiaowei","family":"Zhou","sequence":"additional","affiliation":[{"name":"State Key Laboratory of CAD&amp;CG, Zhejiang Univerisity, China"}]}],"member":"320","published-online":{"date-parts":[[2022,11,30]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Adnane Boukhayma Rodrigo\u00a0de Bem and Philip\u00a0HS Torr. 2019. 3d hand shape and pose from images in the wild. In CVPR.","key":"e_1_3_2_3_1_1","DOI":"10.1109\/CVPR.2019.01110"},{"key":"e_1_3_2_3_2_1","volume-title":"Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:1512.03012(2015).","author":"Chang X","year":"2015","unstructured":"Angel\u00a0X Chang, Thomas Funkhouser, Leonidas Guibas, Pat Hanrahan, Qixing Huang, Zimo Li, Silvio Savarese, Manolis Savva, Shuran Song, Hao Su, 2015. Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:1512.03012(2015)."},{"doi-asserted-by":"crossref","unstructured":"Anpei Chen Zexiang Xu Andreas Geiger Jingyi Yu and Hao Su. 2022. TensoRF: Tensorial Radiance Fields. arxiv:2203.09517\u00a0[cs.CV]","key":"e_1_3_2_3_3_1","DOI":"10.1007\/978-3-031-19824-3_20"},{"doi-asserted-by":"crossref","unstructured":"Dengsheng Chen Jun Li Zheng Wang and Kai Xu. 2020. Learning canonical shape space for category-level 6d object pose and size estimation. In CVPR.","key":"e_1_3_2_3_4_1","DOI":"10.1109\/CVPR42600.2020.01199"},{"doi-asserted-by":"crossref","unstructured":"Zhiqin Chen and Hao Zhang. 2019. Learning implicit fields for generative shape modeling. In CVPR.","key":"e_1_3_2_3_5_1","DOI":"10.1109\/CVPR.2019.00609"},{"key":"e_1_3_2_3_6_1","volume-title":"Neural unsigned distance fields for implicit function learning. Advances in Neural Information Processing Systems 33","author":"Chibane Julian","year":"2020","unstructured":"Julian Chibane, Gerard Pons-Moll, 2020. Neural unsigned distance fields for implicit function learning. Advances in Neural Information Processing Systems 33 (2020)."},{"unstructured":"Christopher\u00a0B Choy Danfei Xu JunYoung Gwak Kevin Chen and Silvio Savarese. 2016. 3d-r2n2: A unified approach for single and multi-view 3d object reconstruction. In ECCV.","key":"e_1_3_2_3_7_1"},{"unstructured":"Angela Dai Charles Ruizhongtai\u00a0Qi and Matthias Nie\u00dfner. 2017. Shape completion using 3d-encoder-predictor cnns and shape synthesis. In CVPR.","key":"e_1_3_2_3_8_1"},{"unstructured":"Haoqiang Fan Hao Su and Leonidas\u00a0J Guibas. 2017. A point set generation network for 3d object reconstruction from a single image. In CVPR.","key":"e_1_3_2_3_9_1"},{"key":"e_1_3_2_3_10_1","volume-title":"dense, and robust multiview stereopsis. CVPR","author":"Furukawa Yasutaka","year":"2007","unstructured":"Yasutaka Furukawa and Jean Ponce. 2007. Accurate, dense, and robust multiview stereopsis. CVPR (2007)."},{"unstructured":"Liuhao Ge Zhou Ren Yuncheng Li Zehao Xue Yingying Wang Jianfei Cai and Junsong Yuan. 2019. 3D Hand Shape and Pose Estimation from a Single RGB Image. In CVPR.","key":"e_1_3_2_3_11_1"},{"unstructured":"Amos Gropp Lior Yariv Niv Haim Matan Atzmon and Yaron Lipman. 2020. Implicit Geometric Regularization for Learning Shapes. In ICML.","key":"e_1_3_2_3_12_1"},{"doi-asserted-by":"crossref","unstructured":"Thibault Groueix Matthew Fisher Vladimir\u00a0G. Kim Bryan Russell and Mathieu Aubry. 2018. AtlasNet: A Papier-M\u00e2ch\u00e9 Approach to Learning 3D Surface Generation. In CVPR.","key":"e_1_3_2_3_13_1","DOI":"10.1109\/CVPR.2018.00030"},{"doi-asserted-by":"crossref","unstructured":"Shreyas Hampali Mahdi Rad Markus Oberweger and Vincent Lepetit. 2020. HOnnotate: A Method for 3D Annotation of Hand and Object Poses. In CVPR.","key":"e_1_3_2_3_14_1","DOI":"10.1109\/CVPR42600.2020.00326"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_15_1","DOI":"10.1145\/3386569.3392452"},{"doi-asserted-by":"crossref","unstructured":"Yana Hasson G\u00fcl Varol Dimitris Tzionas Igor Kalevatykh Michael\u00a0J. Black Ivan Laptev and Cordelia Schmid. 2019. Learning joint reconstruction of hands and manipulated objects. In CVPR.","key":"e_1_3_2_3_16_1","DOI":"10.1109\/CVPR.2019.01208"},{"unstructured":"Eldar Insafutdinov and Alexey Dosovitskiy. 2018. Unsupervised Learning of Shape and Pose with Differentiable Point Clouds. In NeurIPS.","key":"e_1_3_2_3_17_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_18_1","DOI":"10.1145\/2047196.2047270"},{"key":"e_1_3_2_3_19_1","volume-title":"Gal: Geometric adversarial loss for single-view 3d-object reconstruction. In ECCV.","author":"Jiang Li","year":"2018","unstructured":"Li Jiang, Shaoshuai Shi, Xiaojuan Qi, and Jiaya Jia. 2018. Gal: Geometric adversarial loss for single-view 3d-object reconstruction. In ECCV."},{"key":"e_1_3_2_3_20_1","volume-title":"Grasping Field: Learning Implicit Representations for Human Grasps.","author":"Karunratanakul Korrawe","year":"2020","unstructured":"Korrawe Karunratanakul, Jinlong Yang, Yan Zhang, Michael Black, Krikamol Muandet, and Siyu Tang. 2020. Grasping Field: Learning Implicit Representations for Human Grasps. (2020)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_21_1","DOI":"10.5555\/1281957.1281965"},{"doi-asserted-by":"crossref","unstructured":"Nikos Kolotouros Georgios Pavlakos Michael\u00a0J Black and Kostas Daniilidis. 2019. Learning to Reconstruct 3D Human Pose and Shape via Model-fitting in the Loop. In ICCV.","key":"e_1_3_2_3_22_1","DOI":"10.1109\/ICCV.2019.00234"},{"doi-asserted-by":"crossref","unstructured":"Dominik Kulon Riza\u00a0Alp Guler Iasonas Kokkinos Michael\u00a0M. Bronstein and Stefanos Zafeiriou. 2020. Weakly-Supervised Mesh-Convolutional Hand Reconstruction in the Wild. In CVPR.","key":"e_1_3_2_3_23_1","DOI":"10.1109\/CVPR42600.2020.00504"},{"key":"e_1_3_2_3_24_1","volume-title":"International journal of computer vision 38, 3","author":"Kutulakos N","year":"2000","unstructured":"Kiriakos\u00a0N Kutulakos and Steven\u00a0M Seitz. 2000. A theory of shape by space carving. International journal of computer vision 38, 3 (2000), 199\u2013218."},{"key":"e_1_3_2_3_25_1","volume-title":"BARF: Bundle-Adjusting Neural Radiance Fields. In IEEE International Conference on Computer Vision (ICCV).","author":"Lin Chen-Hsuan","year":"2021","unstructured":"Chen-Hsuan Lin, Wei-Chiu Ma, Antonio Torralba, and Simon Lucey. 2021. BARF: Bundle-Adjusting Neural Radiance Fields. In IEEE International Conference on Computer Vision (ICCV)."},{"key":"e_1_3_2_3_26_1","volume-title":"Real-Time High-Resolution Background Matting. arXiv","author":"Lin Shanchuan","year":"2020","unstructured":"Shanchuan Lin, Andrey Ryabtsev, Soumyadip Sengupta, Brian Curless, Steve Seitz, and Ira Kemelmacher-Shlizerman. 2020. Real-Time High-Resolution Background Matting. arXiv (2020), arXiv\u20132012."},{"key":"e_1_3_2_3_27_1","volume-title":"Marching cubes: A high resolution 3D surface construction algorithm. SIGGRAPH","author":"Lorensen E","year":"1987","unstructured":"William\u00a0E Lorensen and Harvey\u00a0E Cline. 1987. Marching cubes: A high resolution 3D surface construction algorithm. SIGGRAPH (1987)."},{"key":"e_1_3_2_3_28_1","volume-title":"Distinctive image features from scale-invariant keypoints. IJCV","author":"Lowe G","year":"2004","unstructured":"David\u00a0G Lowe. 2004. Distinctive image features from scale-invariant keypoints. IJCV (2004)."},{"doi-asserted-by":"crossref","unstructured":"Priyanka Mandikal KL Navaneet Mayank Agarwal and R\u00a0Venkatesh Babu. 2018. 3D-LMNet: Latent embedding matching for accurate and diverse 3D point cloud reconstruction from a single image. arXiv preprint arXiv:1807.07796(2018).","key":"e_1_3_2_3_29_1","DOI":"10.1007\/978-3-030-11015-4_50"},{"doi-asserted-by":"crossref","unstructured":"Lars Mescheder Michael Oechsle Michael Niemeyer Sebastian Nowozin and Andreas Geiger. 2019. Occupancy networks: Learning 3d reconstruction in function space. In CVPR.","key":"e_1_3_2_3_30_1","DOI":"10.1109\/CVPR.2019.00459"},{"doi-asserted-by":"crossref","unstructured":"Ben Mildenhall Pratul\u00a0P. Srinivasan Matthew Tancik Jonathan\u00a0T. Barron Ravi Ramamoorthi and Ren Ng. 2020. NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis. In ECCV.","key":"e_1_3_2_3_31_1","DOI":"10.1007\/978-3-030-58452-8_24"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_32_1","DOI":"10.1109\/CVPR.2018.00013"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_33_1","DOI":"10.1145\/3306346.3322958"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_34_1","DOI":"10.1145\/3528223.3530127"},{"unstructured":"Jeong\u00a0Joon Park Peter Florence Julian Straub Richard Newcombe and Steven Lovegrove. 2019. DeepSDF: Learning Continuous Signed Distance Functions for Shape Representation. In CVPR.","key":"e_1_3_2_3_35_1"},{"key":"e_1_3_2_3_36_1","volume-title":"Nerfies: Deformable Neural Radiance Fields. ICCV","author":"Park Keunhong","year":"2021","unstructured":"Keunhong Park, Utkarsh Sinha, Jonathan\u00a0T. Barron, Sofien Bouaziz, Dan\u00a0B Goldman, Steven\u00a0M. Seitz, and Ricardo Martin-Brualla. 2021. Nerfies: Deformable Neural Radiance Fields. ICCV (2021)."},{"key":"e_1_3_2_3_37_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_3_38_1","volume-title":"Octnet: Learning deep 3d representations at high resolutions. In CVPR.","author":"Riegler Gernot","year":"2017","unstructured":"Gernot Riegler, Ali Osman\u00a0Ulusoy, and Andreas Geiger. 2017. Octnet: Learning deep 3d representations at high resolutions. In CVPR."},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_39_1","DOI":"10.1145\/3130800.3130883"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_40_1","DOI":"10.1145\/566654.566600"},{"doi-asserted-by":"crossref","unstructured":"Johannes\u00a0Lutz Sch\u00f6nberger and Jan-Michael Frahm. 2016. Structure-from-Motion Revisited. In CVPR.","key":"e_1_3_2_3_41_1","DOI":"10.1109\/CVPR.2016.445"},{"doi-asserted-by":"crossref","unstructured":"Johannes\u00a0Lutz Sch\u00f6nberger Enliang Zheng Marc Pollefeys and Jan-Michael Frahm. 2016. Pixelwise View Selection for Unstructured Multi-View Stereo. In ECCV.","key":"e_1_3_2_3_42_1","DOI":"10.1007\/978-3-319-46487-9_31"},{"doi-asserted-by":"crossref","unstructured":"Maxim Tatarchenko Alexey Dosovitskiy and Thomas Brox. 2017. Octree generating networks: Efficient convolutional architectures for high-resolution 3d outputs. In ICCV.","key":"e_1_3_2_3_43_1","DOI":"10.1109\/ICCV.2017.230"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_44_1","DOI":"10.1145\/2897824.2925965"},{"doi-asserted-by":"crossref","unstructured":"Henning Tjaden Ulrich Schwanecke and Elmar Sch\u00f6mer. 2016. Real-time monocular segmentation and pose tracking of multiple objects. In ECCV.","key":"e_1_3_2_3_45_1","DOI":"10.1007\/978-3-319-46493-0_26"},{"doi-asserted-by":"crossref","unstructured":"Henning Tjaden Ulrich Schwanecke and Elmar Schomer. 2017. Real-time monocular pose estimation of 3D objects using temporally consistent local color histograms. In ICCV.","key":"e_1_3_2_3_46_1","DOI":"10.1109\/ICCV.2017.23"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_47_1","DOI":"10.1145\/2629500"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_48_1","DOI":"10.1109\/ICCV48922.2021.01272"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_49_1","DOI":"10.1109\/ICCV.2015.90"},{"doi-asserted-by":"crossref","unstructured":"He Wang Srinath Sridhar Jingwei Huang Julien Valentin Shuran Song and Leonidas\u00a0J Guibas. 2019. Normalized object coordinate space for category-level 6d object pose and size estimation. In CVPR.","key":"e_1_3_2_3_50_1","DOI":"10.1109\/CVPR.2019.00275"},{"doi-asserted-by":"crossref","unstructured":"Nanyang Wang Yinda Zhang Zhuwen Li Yanwei Fu Wei Liu and Yu-Gang Jiang. 2018. Pixel2mesh: Generating 3d mesh models from single rgb images. In ECCV.","key":"e_1_3_2_3_51_1","DOI":"10.1007\/978-3-030-01252-6_4"},{"key":"e_1_3_2_3_52_1","volume-title":"NeuS: Learning Neural Implicit Surfaces by","author":"Wang Peng","year":"2021","unstructured":"Peng Wang, Lingjie Liu, Yuan Liu, Christian Theobalt, Taku Komura, and Wenping Wang. 2021a. NeuS: Learning Neural Implicit Surfaces by Volume Rendering for Multi-view Reconstruction. arXiv preprint arXiv:2106.10689(2021)."},{"doi-asserted-by":"crossref","unstructured":"Weiyue Wang Qiangui Huang Suya You Chao Yang and Ulrich Neumann. 2017. Shape inpainting using 3d generative adversarial network and recurrent convolutional networks. In ICCV.","key":"e_1_3_2_3_53_1","DOI":"10.1109\/ICCV.2017.252"},{"unstructured":"Zirui Wang Shangzhe Wu Weidi Xie Min Chen and Victor\u00a0Adrian Prisacariu. 2021b. NeRF \u2212 \u2212: Neural Radiance Fields Without Known Camera Parameters. arXiv preprint arXiv:2102.07064(2021).","key":"e_1_3_2_3_54_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_55_1","DOI":"10.1109\/CVPR.2008.4587832"},{"key":"e_1_3_2_3_56_1","volume-title":"DISN: Deep Implicit Surface Network for High-quality Single-view 3D Reconstruction. In NeurIPS.","author":"Xu Qiangeng","year":"2019","unstructured":"Qiangeng Xu, Weiyue Wang, Duygu Ceylan, Radomir Mech, and Ulrich Neumann. 2019. DISN: Deep Implicit Surface Network for High-quality Single-view 3D Reconstruction. In NeurIPS."},{"key":"e_1_3_2_3_57_1","volume-title":"MVSNet: Depth Inference for Unstructured Multi-view Stereo. European Conference on Computer Vision (ECCV)","author":"Yao Yao","year":"2018","unstructured":"Yao Yao, Zixin Luo, Shiwei Li, Tian Fang, and Long Quan. 2018. MVSNet: Depth Inference for Unstructured Multi-view Stereo. European Conference on Computer Vision (ECCV) (2018)."},{"unstructured":"Yufei Ye Abhinav Gupta and Shubham Tulsiani. 2022. What\u2019s in your hands? 3D Reconstruction of Generic Objects in Hands. (2022).","key":"e_1_3_2_3_58_1"},{"key":"e_1_3_2_3_59_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3306346.3322998","article-title":"InteractionFusion: real-time reconstruction of hand poses and deformable objects in hand-object interactions","volume":"38","author":"Zhang Hao","year":"2019","unstructured":"Hao Zhang, Zi-Hao Bo, Jun-Hai Yong, and Feng Xu. 2019. InteractionFusion: real-time reconstruction of hand poses and deformable objects in hand-object interactions. ACM Transactions on Graphics (TOG) 38, 4 (2019), 1\u201311.","journal-title":"ACM Transactions on Graphics (TOG)"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_60_1","DOI":"10.1145\/3478513.3480500"},{"doi-asserted-by":"publisher","key":"e_1_3_2_3_61_1","DOI":"10.1109\/ICCV48922.2021.01554"}],"event":{"sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"],"acronym":"SA '22","name":"SA '22: SIGGRAPH Asia 2022","location":"Daegu Republic of Korea"},"container-title":["SIGGRAPH Asia 2022 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3550469.3555401","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3550469.3555401","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:44Z","timestamp":1750182704000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3550469.3555401"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,29]]},"references-count":61,"alternative-id":["10.1145\/3550469.3555401","10.1145\/3550469"],"URL":"https:\/\/doi.org\/10.1145\/3550469.3555401","relation":{},"subject":[],"published":{"date-parts":[[2022,11,29]]},"assertion":[{"value":"2022-11-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}