{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T14:57:06Z","timestamp":1776956226887,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"2019YFE0125700"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681131","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"8942-8951","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["VoCAPTER: Voting-based Pose Tracking for Category-level Articulated Object via Inter-frame Priors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1610-6056","authenticated-orcid":false,"given":"Li","family":"Zhang","sequence":"first","affiliation":[{"name":"Hefei Institute of Physical Science, Chinese Academy of Sciences, University of Science and Technology of China, &amp; Astribot Inc, HeFei, AnHui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4873-3991","authenticated-orcid":false,"given":"Zean","family":"Han","sequence":"additional","affiliation":[{"name":"Department of Mathematics, Chinese University of Hong Kong, New Territories, Hong Kong SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0005-2620","authenticated-orcid":false,"given":"Yan","family":"Zhong","sequence":"additional","affiliation":[{"name":"School of Mathematical Sciences, National Engineering Research Center of Visual Technology, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1590-1753","authenticated-orcid":false,"given":"Qiaojun","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Shanghai Jiao Tong University, ShangHai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8204-6197","authenticated-orcid":false,"given":"Xingyu","family":"Wu","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9202-6591","authenticated-orcid":false,"given":"Xue","family":"Wang","sequence":"additional","affiliation":[{"name":"Hefei Institute of Physical Science, Chinese Academy Sciences, HeFei, AnHui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9534-3425","authenticated-orcid":false,"given":"Rujing","family":"Wang","sequence":"additional","affiliation":[{"name":"Hefei Institute of Physical Science, Chinese Academy Sciences, HeFei, AnHui, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01854"},{"key":"e_1_3_2_1_2_1","volume-title":"A survey of augmented reality. Presence: teleoperators & virtual environments 6, 4","author":"Azuma Ronald T","year":"1997","unstructured":"Ronald T Azuma. 1997. A survey of augmented reality. Presence: teleoperators & virtual environments 6, 4 (1997), 355--385."},{"key":"e_1_3_2_1_3_1","volume-title":"Gazeenabled activity recognition for augmented reality feedback. Computers & Graphics","author":"Bekta\u015f Kenan","year":"2024","unstructured":"Kenan Bekta\u015f, Jannis Strecker, Simon Mayer, and Kimberly Garcia. 2024. Gazeenabled activity recognition for augmented reality feedback. Computers & Graphics (2024), 103909."},{"key":"e_1_3_2_1_4_1","volume-title":"Trends and challenges in robot manipulation. Science 364, 6446","author":"Billard Aude","year":"2019","unstructured":"Aude Billard and Danica Kragic. 2019. Trends and challenges in robot manipulation. Science 364, 6446 (2019), eaat8414."},{"key":"e_1_3_2_1_5_1","volume-title":"Bridging the domain gap: Self-supervised 3d scene understanding with foundation models. Advances in Neural Information Processing Systems 36","author":"Chen Zhimin","year":"2024","unstructured":"Zhimin Chen, Longlong Jing, Yingwei Li, and Bing Li. 2024. Bridging the domain gap: Self-supervised 3d scene understanding with foundation models. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3517235","article-title":"ARticulate: One- Shot Interactions with Intelligent Assistants in Unfamiliar Smart Spaces Using Augmented Reality","volume":"6","author":"Clark Meghan","year":"2022","unstructured":"Meghan Clark, Mark W Newman, and Prabal Dutta. 2022. ARticulate: One- Shot Interactions with Intelligent Assistants in Unfamiliar Smart Spaces Using Augmented Reality. Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies 6, 1 (2022), 1--24.","journal-title":"Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jretconser.2023.103577"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00666"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00677"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Bertram Drost Markus Ulrich Nassir Navab and Slobodan Ilic. 2010. Model globally match locally: Efficient and robust 3D object recognition. In 2010 IEEE computer society conference on computer vision and pattern recognition. Ieee 998--1005.","DOI":"10.1109\/CVPR.2010.5540108"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3358415"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02031"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9982029"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818131"},{"key":"e_1_3_2_1_15_1","volume-title":"NAP: Neural 3D Articulated Object Prior. Advances in Neural Information Processing Systems 36","author":"Lei Jiahui","year":"2024","unstructured":"Jiahui Lei, Congyue Deng, William B Shen, Leonidas J Guibas, and Kostas Daniilidis. 2024. NAP: Neural 3D Articulated Object Prior. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_16_1","volume-title":"Neural-logic humanobject interaction detection. Advances in Neural Information Processing Systems 36","author":"Li Liulei","year":"2024","unstructured":"Liulei Li, Jianan Wei, Wenguan Wang, and Yi Yang. 2024. Neural-logic humanobject interaction detection. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00301"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00376"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00354"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811720"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611852"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Liu Liu","year":"2024","unstructured":"Liu Liu, Anran Huang, Qi Wu, Dan Guo, Xun Yang, and Meng Wang. 2024. KPATracker: Towards Robust and Real-Time Category-Level Articulated Object 6D Pose Tracking. In Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38. 3684--3692."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01439"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3138644"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02034"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110151"},{"key":"e_1_3_2_1_27_1","volume-title":"CPS: Improving class-level 6D pose and shape estimation from monocular images with self-supervised learning. arXiv preprint arXiv:2003.05848","author":"Manhardt Fabian","year":"2020","unstructured":"Fabian Manhardt, Gu Wang, Benjamin Busam, Manuel Nickel, Sven Meier, Luca Minciullo, Xiangyang Ji, and Nassir Navab. 2020. CPS: Improving class-level 6D pose and shape estimation from monocular images with self-supervised learning. arXiv preprint arXiv:2003.05848 (2020)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460825"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2019.2914306"},{"key":"e_1_3_2_1_30_1","volume-title":"Where2explore: Few-shot affordance learning for unseen novel categories of articulated objects. Advances in Neural Information Processing Systems 36","author":"Ning Chuanruo","year":"2024","unstructured":"Chuanruo Ning, Ruihai Wu, Haoran Lu, Kaichun Mo, and Hao Dong. 2024. Where2explore: Few-shot affordance learning for unseen novel categories of articulated objects. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_31_1","volume-title":"Pointnet: Deep hierarchical feature learning on point sets in a metric space. Advances in neural information processing systems 30","author":"Qi Charles Ruizhongtai","year":"2017","unstructured":"Charles Ruizhongtai Qi, Li Yi, Hao Su, and Leonidas J Guibas. 2017. Pointnet: Deep hierarchical feature learning on point sets in a metric space. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2024.104632"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i14.29464"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196679"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00275"},{"key":"e_1_3_2_1_36_1","volume-title":"RPMArt: Towards Robust Perception and Manipulation for Articulated Objects. arXiv preprint arXiv:2403.16023","author":"Wang Junbo","year":"2024","unstructured":"Junbo Wang, Wenhai Liu, Qiaojun Yu, Yang You, Liu Liu, Weiming Wang, and Cewu Lu. 2024. RPMArt: Towards Robust Perception and Manipulation for Articulated Objects. arXiv preprint arXiv:2403.16023 (2024)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9635991"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00066"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01296"},{"key":"e_1_3_2_1_40_1","volume-title":"Learning environment-aware affordance for 3d articulated object manipulation under occlusions. Advances in Neural Information Processing Systems 36","author":"Wu Ruihai","year":"2024","unstructured":"Ruihai Wu, Kai Cheng, Yan Zhao, Chuanruo Ning, Guanqi Zhan, and Hao Dong. 2024. Learning environment-aware affordance for 3d articulated object manipulation under occlusions. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01111"},{"key":"e_1_3_2_1_42_1","volume-title":"OMAD: Object Model with Articulated Deformations for Pose Estimation and Retrieval. arXiv preprint arXiv:2112.07334","author":"Xue Han","year":"2021","unstructured":"Han Xue, Liu Liu,Wenqiang Xu, Haoyuan Fu, and Cewu Lu. 2021. OMAD: Object Model with Articulated Deformations for Pose Estimation and Retrieval. arXiv preprint arXiv:2112.07334 (2021)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00277"},{"key":"e_1_3_2_1_44_1","volume-title":"Weiming Wang, and Cewu Lu.","author":"You Yang","year":"2022","unstructured":"Yang You, Wenhao He, Michael Xu Liu, Weiming Wang, and Cewu Lu. 2022. Go Beyond Point Pairs: A General and Accurate Sim2Real Object Pose Voting Method with Efficient Online Synthetic Training. CoRR (2022)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00112"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2022.3147415"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109896"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681131","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681131","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:53Z","timestamp":1750294673000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681131"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":47,"alternative-id":["10.1145\/3664647.3681131","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681131","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}