{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:52Z","timestamp":1765339492969,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["62406039, 62321001, 62471055, U23B2001, 62171057, 62201072, 62071067"],"award-info":[{"award-number":["62406039, 62321001, 62471055, U23B2001, 62171057, 62201072, 62071067"]}]},{"name":"High-Quality Development Project of the MIIT","award":["2440STCZB2584"],"award-info":[{"award-number":["2440STCZB2584"]}]},{"name":"Ministry of Education and China Mobile Joint Fund","award":["MCM20200202, MCM20180101"],"award-info":[{"award-number":["MCM20200202, MCM20180101"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2024PTB-004"],"award-info":[{"award-number":["2024PTB-004"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Postdoctoral Fellowship Program of CPSF","award":["GZC20230320"],"award-info":[{"award-number":["GZC20230320"]}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2023TQ0039, 2024M750257"],"award-info":[{"award-number":["2023TQ0039, 2024M750257"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755342","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:15Z","timestamp":1761375255000},"page":"1646-1655","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Rule Meets Learning: Confidence-Aware Multi-View Fusion for Self-Supervised 3D Hand Pose Estimation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1691-6457","authenticated-orcid":false,"given":"Pengfei","family":"Ren","sequence":"first","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2182-2228","authenticated-orcid":false,"given":"Jingyu","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3072-7422","authenticated-orcid":false,"given":"Haifeng","family":"Sun","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0829-4624","authenticated-orcid":false,"given":"Qi","family":"Qi","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4627-6307","authenticated-orcid":false,"given":"Jing","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1486-0573","authenticated-orcid":false,"given":"Jianxin","family":"Liao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Networking and Switching Technology, Beijing University of Posts and Telecommunications, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2018.06.097"},{"key":"e_1_3_2_1_2_1","first-page":"10451","article-title":"Model-based 3d hand reconstruction via self-supervised learning","author":"Chen Yujin","year":"2021","unstructured":"Yujin Chen, Zhigang Tu, Di Kang, Linchao Bao, Ying Zhang, Xuefei Zhe, Ruizhi Chen, and Junsong Yuan. 2021a. Model-based 3d hand reconstruction via self-supervised learning. In CVPR. 10451-10460.","journal-title":"CVPR."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01142"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19919"},{"key":"e_1_3_2_1_5_1","volume-title":"European Conference on Computer Vision. Springer, 35-52","author":"Cheng Wencan","year":"2024","unstructured":"Wencan Cheng, Eunji Kim, and Jong Hwan Ko. 2024a. HandDAGT: A Denoising Adaptive Graph Transformer for 3D Hand Pose Estimation. In European Conference on Computer Vision. Springer, 35-52."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01107"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00221"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00025"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01013"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_8"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00878"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.391"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.602"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2827052"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_29"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_29"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8297136"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58595-2_2"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6761"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the British Machine Vision Conference.","author":"Kulon Dominik","year":"2019","unstructured":"Dominik Kulon, Haoyang Wang, Riza Alp G\u00fcler, Michael M. Bronstein, and Stefanos Zafeiriou. 2019. Single Image 3D Hand Reconstruction with Mesh Convolutions. In Proceedings of the British Machine Vision Conference."},{"key":"e_1_3_2_1_21_1","volume-title":"Pointcnn: Convolution on x-transformed points. Advances in neural information processing systems","author":"Li Yangyan","year":"2018","unstructured":"Yangyan Li, Rui Bu, Mingchao Sun, Wei Wu, Xinhan Di, and Baoquan Chen. 2018. Pointcnn: Convolution on x-transformed points. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00071"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00714"},{"key":"e_1_3_2_1_24_1","unstructured":"Yamin Mao Zhihua Liu Weiming Li SoonYong Cho Qiang Wang and Xiaoshuai Hao. 2024. Enhancing 3D Hand Pose Estimation via Dense Ordinal Regression Network.. In BMVC."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern Recognition. 5079-5088","author":"Moon Gyeongsik","year":"2018","unstructured":"Gyeongsik Moon, Yong Chang, and Kyoung Mu Lee. 2018. V2v-posenet: Voxel-to-voxel prediction network for accurate 3d hand and human pose estimation from a single depth map. In Proceedings of the IEEE conference on computer vision and pattern Recognition. 5079-5088."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.75"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.536"},{"key":"e_1_3_2_1_28_1","first-page":"5099","article-title":"Pointnet: Deep hierarchical feature learning on point sets in a metric space","author":"Qi Charles Ruizhongtai","year":"2017","unstructured":"Charles Ruizhongtai Qi, Li Yi, Hao Su, and Leonidas J Guibas. 2017. Pointnet: Deep hierarchical feature learning on point sets in a metric space. In Advances in Neural Information Processing Systems. 5099-5108.","journal-title":"Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_29_1","volume-title":"Accelerating 3D Deep Learning with PyTorch3D. arXiv:2007.08501","author":"Ravi Nikhila","year":"2020","unstructured":"Nikhila Ravi, Jeremy Reizenstein, David Novotny, Taylor Gordon, Wan-Yen Lo, Justin Johnson, and Georgia Gkioxari. 2020. Accelerating 3D Deep Learning with PyTorch3D. arXiv:2007.08501 (2020)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25310"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3192708"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01990"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.01.045"},{"key":"e_1_3_2_1_34_1","volume-title":"SRN: Stacked Regression Network for Real-time 3D Hand Pose Estimation.. In BMVC. 112.","author":"Ren Pengfei","year":"2019","unstructured":"Pengfei Ren, Haifeng Sun, Qi Qi, Jingyu Wang, and Weiting Huang. 2019. SRN: Stacked Regression Network for Real-time 3D Hand Pose Estimation.. In BMVC. 112."},{"key":"e_1_3_2_1_35_1","volume-title":"Black","author":"Romero Javier","year":"2017","unstructured":"Javier Romero, Dimitrios Tzionas, and Michael J. Black. 2017. Embodied Hands: Modeling and Capturing Hands and Bodies Together. TOG, Vol. 36, 6 (2017)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298683"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.490"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2629500"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01111"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00540"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_27"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00088"},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4866-4874","author":"Yuan Shanxin","year":"2017","unstructured":"Shanxin Yuan, Qi Ye, Bjorn Stenger, Siddhant Jain, and Tae-Kyun Kim. 2017. Bighand2. 2m benchmark: Hand pose dataset and state of the art analysis. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4866-4874."},{"key":"e_1_3_2_1_44_1","first-page":"3425","article-title":"Semantic graph convolutional networks for 3d human pose regression","author":"Zhao Long","year":"2019","unstructured":"Long Zhao, Xi Peng, Yu Tian, Mubbasir Kapadia, and Dimitris N Metaxas. 2019. Semantic graph convolutional networks for 3d human pose regression. In CVPR. 3425-3435.","journal-title":"CVPR."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755342","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:01:07Z","timestamp":1765339267000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755342"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":44,"alternative-id":["10.1145\/3746027.3755342","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755342","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}