{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:50:40Z","timestamp":1767340240421,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Science and Technology Major Project","award":["2022ZD0116405"],"award-info":[{"award-number":["2022ZD0116405"]}]},{"name":"the National Natural Science Foundation of China","award":["No. 62306025, No. 92367204"],"award-info":[{"award-number":["No. 62306025, No. 92367204"]}]},{"name":"Beijing Municipal Science and Technology Project","award":["Nos. Z231100010323002"],"award-info":[{"award-number":["Nos. Z231100010323002"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681121","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"5308-5317","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["VRDistill: Vote Refinement Distillation for Efficient Indoor 3D Object Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6673-9670","authenticated-orcid":false,"given":"Ze","family":"Yuan","sequence":"first","affiliation":[{"name":"Nanjing University &amp; Beihang University, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1956-3367","authenticated-orcid":false,"given":"Jinyang","family":"Guo","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8016-736X","authenticated-orcid":false,"given":"Dakai","family":"An","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6742-4332","authenticated-orcid":false,"given":"Junran","family":"Wu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1328-8431","authenticated-orcid":false,"given":"He","family":"Zhu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5531-714X","authenticated-orcid":false,"given":"Jianhao","family":"Li","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1660-5325","authenticated-orcid":false,"given":"Xueyuan","family":"Chen","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6241-8352","authenticated-orcid":false,"given":"Ke","family":"Xu","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5183-8538","authenticated-orcid":false,"given":"Jiaheng","family":"Liu","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, Jiangsu, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Variational Information Distillation for Knowledge Transfer. In 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 9155--9163","author":"Ahn Sungsoo","year":"2019","unstructured":"Sungsoo Ahn, Shell Xu Hu, Andreas Damianou, Neil D. Lawrence, and Zhenwen Dai. 2019. Variational Information Distillation for Knowledge Transfer. In 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 9155--9163."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_2_1","DOI":"10.1109\/ICCV51070.2023.00635"},{"key":"e_1_3_2_2_3_1","volume-title":"NIPS","volume":"30","author":"Chen Guobin","year":"2017","unstructured":"Guobin Chen, Wongun Choi, Xiang Yu, Tony Han, and Manmohan Chandraker. 2017. Learning efficient object detection models with knowledge distillation. NIPS, Vol. 30 (2017)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_4_1","DOI":"10.1109\/CVPR42600.2020.00047"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_5_1","DOI":"10.1109\/CVPR52729.2023.01301"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_6_1","DOI":"10.1109\/CVPR.2017.261"},{"doi-asserted-by":"crossref","unstructured":"Xing Dai Zeren Jiang Zhao Wu Yiping Bao Zhicheng Wang Si Liu and Erjin Zhou. 2021. General instance distillation for object detection. In CVPR. 7842--7851.","key":"e_1_3_2_2_7_1","DOI":"10.1109\/CVPR46437.2021.00775"},{"unstructured":"Jianyuan Guo Kai Han Yunhe Wang Han Wu Xinghao Chen Chunjing Xu and Chang Xu. 2021. Distilling object detectors via decoupled features. In CVPR. 2154--2164.","key":"e_1_3_2_2_8_1"},{"key":"e_1_3_2_2_9_1","volume-title":"JointPruning: Pruning networks along multiple dimensions for efficient point cloud processing","author":"Guo Jinyang","year":"2021","unstructured":"Jinyang Guo, Jiaheng Liu, and Dong Xu. 2021. JointPruning: Pruning networks along multiple dimensions for efficient point cloud processing. IEEE Transactions on Circuits and Systems for Video Technology (2021)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_10_1","DOI":"10.1109\/TCSVT.2022.3197395"},{"unstructured":"Jinyang Guo Wanli Ouyang and Dong Xu. 2020. Multi-Dimensional Pruning: A Unified Framework for Model Compression. In CVPR.","key":"e_1_3_2_2_11_1"},{"key":"e_1_3_2_2_12_1","volume-title":"Forty-first International Conference on Machine Learning.","author":"Guo Jinyang","year":"2024","unstructured":"Jinyang Guo, Jianyu Wu, Zining Wang, Jiaheng Liu, Ge Yang, Yifu Ding, Ruihao Gong, Haotong Qin, and Xianglong Liu. 2024. Compressing large language models by joint sparsification and quantization. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_2_13_1","volume-title":"Cbanet: Towards complexity and bitrate adaptive deep image compression using a single network","author":"Guo Jinyang","year":"2023","unstructured":"Jinyang Guo, Dong Xu, and Guo Lu. 2023. Cbanet: Towards complexity and bitrate adaptive deep image compression using a single network. IEEE Transactions on Image Processing (2023)."},{"key":"e_1_3_2_2_14_1","volume-title":"Multidimensional Pruning and Its Extension: A Unified Framework for Model Compression","author":"Guo Jinyang","year":"2023","unstructured":"Jinyang Guo, Dong Xu, and Wanli Ouyang. 2023. Multidimensional Pruning and Its Extension: A Unified Framework for Model Compression. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_2_15_1","volume-title":"Model compression using progressive channel pruning","author":"Guo Jinyang","year":"2020","unstructured":"Jinyang Guo, Weichen Zhang, Wanli Ouyang, and Dong Xu. 2020. Model compression using progressive channel pruning. IEEE Transactions on Circuits and Systems for Video Technology (2020)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_16_1","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_2_17_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"doi-asserted-by":"crossref","unstructured":"J. Guo W. Ouyang and D. Xu. 2020. Channel pruning guided by classification loss and feature importance. In AAAI.","key":"e_1_3_2_2_18_1","DOI":"10.1609\/aaai.v34i07.6720"},{"doi-asserted-by":"crossref","unstructured":"Xiao Jin Baoyun Peng Yichao Wu Yu Liu Jiaheng Liu Ding Liang Junjie Yan and Xiaolin Hu. 2019. Knowledge distillation via route constrained optimization. In ICCV.","key":"e_1_3_2_2_19_1","DOI":"10.1109\/ICCV.2019.00143"},{"key":"e_1_3_2_2_20_1","volume-title":"Lift and Fit: Automatic 3D Shape Labeling from 2D Prompts. arXiv preprint arXiv:2407.11382","author":"Li Jianhao","year":"2024","unstructured":"Jianhao Li, Tianyu Sun, Zhongdao Wang, Enze Xie, Bailan Feng, Hongbo Zhang, Ze Yuan, Ke Xu, Jiaheng Liu, and Ping Luo. 2024. Segment, Lift and Fit: Automatic 3D Shape Labeling from 2D Prompts. arXiv preprint arXiv:2407.11382 (2024)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_21_1","DOI":"10.1109\/TIP.2022.3193290"},{"key":"e_1_3_2_2_22_1","volume-title":"GeometryMotion-Transformer: An end-to-end framework for 3D action recognition","author":"Liu Jiaheng","year":"2022","unstructured":"Jiaheng Liu, Jinyang Guo, and Dong Xu. 2022. GeometryMotion-Transformer: An end-to-end framework for 3D action recognition. IEEE Transactions on Multimedia (2022)."},{"key":"e_1_3_2_2_23_1","volume-title":"3d-queryis: A query-based framework for 3d instance segmentation. arXiv preprint arXiv:2211.09375","author":"Liu Jiaheng","year":"2022","unstructured":"Jiaheng Liu, Tong He, Honghui Yang, Rui Su, Jiayi Tian, Junran Wu, Hongcheng Guo, Ke Xu, and Wanli Ouyang. 2022. 3d-queryis: A query-based framework for 3d instance segmentation. arXiv preprint arXiv:2211.09375 (2022)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_24_1","DOI":"10.1109\/CVPR52733.2024.02648"},{"unstructured":"Jiaheng Liu Jianhao Li Kaisiyuan Wang Hongcheng Guo Jian Yang Junran Peng Ke Xu Xianglong Liu and Jinyang Guo. 2024. LTA-PCS: Learnable Task-Agnostic Point Cloud Sampling. In CVPR.","key":"e_1_3_2_2_25_1"},{"volume-title":"2022 d. CoupleFace: relation matters for face recognition distillation","author":"Liu Jiaheng","unstructured":"Jiaheng Liu, Haoyu Qin, Yichao Wu, Jinyang Guo, Ding Liang, and Ke Xu. 2022 d. CoupleFace: relation matters for face recognition distillation. In ECCV. Springer.","key":"e_1_3_2_2_26_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_27_1","DOI":"10.18653\/v1\/2022.findings-naacl.142"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_28_1","DOI":"10.1109\/TIP.2020.3028288"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_29_1","DOI":"10.1609\/aaai.v34i04.5924"},{"doi-asserted-by":"crossref","unstructured":"Ze Liu Zheng Zhang Yue Cao Han Hu and Xin Tong. 2021. Group-free 3d object detection via transformers. In ICCV. 2949--2958.","key":"e_1_3_2_2_30_1","DOI":"10.1109\/ICCV48922.2021.00294"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_31_1","DOI":"10.1109\/CVPR52733.2024.01509"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_32_1","DOI":"10.1109\/ICCV48922.2021.00290"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_33_1","DOI":"10.1109\/JAS.2023.123660"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_34_1","DOI":"10.1109\/CVPR.2019.00409"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_35_1","DOI":"10.1007\/978-3-030-01252-6_17"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_36_1","DOI":"10.1109\/ICCV.2019.00511"},{"doi-asserted-by":"crossref","unstructured":"Baoyun Peng Xiao Jin Jiaheng Liu Dongsheng Li Yichao Wu Yu Liu Shunfeng Zhou and Zhaoning Zhang. 2019. Correlation Congruence for Knowledge Distillation. In ICCV.","key":"e_1_3_2_2_37_1","DOI":"10.1109\/ICCV.2019.00511"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_38_1","DOI":"10.1109\/CVPR42600.2020.00973"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_39_1","DOI":"10.1109\/TPAMI.2023.3276392"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_40_1","DOI":"10.1109\/ICCV.2019.00970"},{"key":"e_1_3_2_2_41_1","volume-title":"2019 d. Efficient neural architecture transformation search in channel-level for object detection. Advances in neural information processing systems","author":"Peng Junran","year":"2019","unstructured":"Junran Peng, Ming Sun, ZHAO-XIANG ZHANG, Tieniu Tan, and Junjie Yan. 2019 d. Efficient neural architecture transformation search in channel-level for object detection. Advances in neural information processing systems, Vol. 32 (2019)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_42_1","DOI":"10.1016\/j.patcog.2021.108199"},{"unstructured":"Charles R Qi Or Litany Kaiming He and Leonidas J Guibas. 2019. Deep hough voting for 3d object detection in point clouds. In ICCV. 9277--9286.","key":"e_1_3_2_2_43_1"},{"unstructured":"Charles R Qi Wei Liu Chenxia Wu Hao Su and Leonidas J Guibas. 2018. Frustum pointnets for 3d object detection from rgb-d data. In CVPR.","key":"e_1_3_2_2_44_1"},{"key":"e_1_3_2_2_45_1","volume-title":"Pointnet: Deep learning on point sets for 3d classification and segmentation. In CVPR. 652--660.","author":"Qi Charles R","year":"2017","unstructured":"Charles R Qi, Hao Su, Kaichun Mo, and Leonidas J Guibas. 2017. Pointnet: Deep learning on point sets for 3d classification and segmentation. In CVPR. 652--660."},{"key":"e_1_3_2_2_46_1","volume-title":"Pointnet: Deep hierarchical feature learning on point sets in a metric space. NIPS","author":"Qi Charles Ruizhongtai","year":"2017","unstructured":"Charles Ruizhongtai Qi, Li Yi, Hao Su, and Leonidas J Guibas. 2017. Pointnet: Deep hierarchical feature learning on point sets in a metric space. NIPS (2017)."},{"key":"e_1_3_2_2_47_1","volume-title":"The IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Qian Xie","year":"2020","unstructured":"Xie Qian, Lai Yu-kun, Wu Jing, Wang Zhoutao, Zhang Yiming, Xu Kai, and Wang Jun. 2020. MLCVNet: Multi-Level Context VoteNet for 3D Object Detection. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_48_1","volume-title":"Antoine Chassang, Carlo Gatta, and Yoshua Bengio.","author":"Romero Adriana","year":"2014","unstructured":"Adriana Romero, Nicolas Ballas, Samira Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2014. Fitnets: Hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)."},{"key":"e_1_3_2_2_49_1","volume-title":"Pv-rcnn: Point-voxel feature set abstraction for 3d object detection. In CVPR. 10529--10538.","author":"Shi Shaoshuai","year":"2020","unstructured":"Shaoshuai Shi, Chaoxu Guo, Li Jiang, Zhe Wang, Jianping Shi, Xiaogang Wang, and Hongsheng Li. 2020. Pv-rcnn: Point-voxel feature set abstraction for 3d object detection. In CVPR. 10529--10538."},{"doi-asserted-by":"crossref","unstructured":"S Shi X Wang H Pointrcnn Li et al. 2019. 3d object proposal generation and detection from point cloud. In CVPR. 16--20.","key":"e_1_3_2_2_50_1","DOI":"10.1109\/CVPR.2019.00086"},{"doi-asserted-by":"crossref","unstructured":"Shuran Song Samuel P Lichtenberg and Jianxiong Xiao. 2015. Sun rgb-d: A rgb-d scene understanding benchmark suite. In CVPR. 567--576.","key":"e_1_3_2_2_51_1","DOI":"10.1109\/CVPR.2015.7298655"},{"unstructured":"Yonglong Tian Dilip Krishnan and Phillip Isola. 2020. Contrastive Representation Distillation. In ICLR.","key":"e_1_3_2_2_52_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_53_1","DOI":"10.1007\/978-3-031-20077-9_8"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_54_1","DOI":"10.3390\/electronics13050887"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_55_1","DOI":"10.1109\/CVPR52729.2023.00907"},{"key":"e_1_3_2_2_56_1","first-page":"21300","article-title":"d. Towards efficient 3d object detection with knowledge distillation","volume":"35","author":"Yang Jihan","year":"2022","unstructured":"Jihan Yang, Shaoshuai Shi, Runyu Ding, Zhe Wang, and Xiaojuan Qi. 2022 d. Towards efficient 3d object detection with knowledge distillation. Advances in Neural Information Processing Systems, Vol. 35 (2022), 21300--21313.","journal-title":"Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_57_1","DOI":"10.1109\/CVPR52688.2022.00460"},{"volume-title":"Masked generative distillation","author":"Yang Zhendong","unstructured":"Zhendong Yang, Zhe Li, Mingqi Shao, Dachuan Shi, Zehuan Yuan, and Chun Yuan. 2022. Masked generative distillation. In ECCV. Springer, 53--69.","key":"e_1_3_2_2_58_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_59_1","DOI":"10.1109\/CVPR42600.2020.01105"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_60_1","DOI":"10.1109\/ICCV51070.2023.01924"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_61_1","DOI":"10.1109\/CVPR52729.2023.02087"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_62_1","DOI":"10.1007\/s11263-021-01573-6"},{"key":"e_1_3_2_2_63_1","volume-title":"Proceedings, Part XII. Springer-Verlag","author":"Zhang Zaiwei","year":"2020","unstructured":"Zaiwei Zhang, Bo Sun, Haitao Yang, and Qixing Huang. 2020. H3DNet: 3D Object Detection Using Hybrid Geometric Primitives. In Computer Vision -- ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part XII. Springer-Verlag, Berlin, Heidelberg, 311--329."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_64_1","DOI":"10.1109\/TCSVT.2021.3102025"}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"MM '24","name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681121","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681121","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:53Z","timestamp":1750294673000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681121"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":64,"alternative-id":["10.1145\/3664647.3681121","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681121","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}