{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T00:20:19Z","timestamp":1776212419188,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"China National Natural Science Foundation","award":["62202182"],"award-info":[{"award-number":["62202182"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681173","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"4995-5004","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":52,"title":["<scp>Mamba3D:<\/scp>\n            Enhancing Local Features for 3D Point Cloud Analysis via State Space Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-1966-2585","authenticated-orcid":false,"given":"Xu","family":"Han","sequence":"first","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6772-9427","authenticated-orcid":false,"given":"Yuan","family":"Tang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4797-9410","authenticated-orcid":false,"given":"Zhaoxuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6835-5607","authenticated-orcid":false,"given":"Xianzhi","family":"Li","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, Wuhan, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00967"},{"key":"e_1_3_2_1_2_1","volume-title":"Point Convolutional Neural Networks by Extension Operators. arXiv preprint arXiv:1803.10091","author":"Atzmon Matan","year":"2018","unstructured":"Matan Atzmon, Haggai Maron, and Yaron Lipman. 2018. Point Convolutional Neural Networks by Extension Operators. arXiv preprint arXiv:1803.10091 (2018)."},{"key":"e_1_3_2_1_3_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer Normalization. arXiv preprint arXiv:1607.06450 (2016)."},{"key":"e_1_3_2_1_4_1","volume-title":"ShapeNet: An Information-Rich 3D Model Repository. CoRR","author":"Chang Angel X.","year":"2015","unstructured":"Angel X. Chang, Thomas A. Funkhouser, Leonidas J. Guibas, Pat Hanrahan, Qi-Xing Huang, Zimo Li, Silvio Savarese, Manolis Savva, Shuran Song, Hao Su, Jianxiong Xiao, Li Yi, and Fisher Yu. 2015. ShapeNet: An Information-Rich 3D Model Repository. CoRR, Vol. abs\/1512.03012 (2015). showeprint[arXiv]1512.03012"},{"key":"e_1_3_2_1_5_1","volume-title":"Decoupled Local Aggregation for Point Cloud Learning. arXiv preprint arXiv:2308.16532","author":"Chen Binjie","year":"2023","unstructured":"Binjie Chen, Yunzhou Xia, Yu Zang, Cheng Wang, and Jonathan Li. 2023. Decoupled Local Aggregation for Point Cloud Learning. arXiv preprint arXiv:2308.16532 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Adv. Neural Inform. Process. Syst. (NeurIPS)","volume":"36","author":"Chen Guangyan","year":"2024","unstructured":"Guangyan Chen, Meiling Wang, Yi Yang, Kai Yu, Li Yuan, and Yufeng Yue. 2024. PointGPT: Auto-regressively Generative Pre-training from Point Clouds. Adv. Neural Inform. Process. Syst. (NeurIPS), Vol. 36 (2024)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3072214"},{"key":"e_1_3_2_1_8_1","volume-title":"PointVector: A Vector Representation in Point Cloud Analysis. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 9455--9465","author":"Deng Xin","year":"2023","unstructured":"Xin Deng, WenYu Zhang, Qing Ding, and XinMing Zhang. 2023. PointVector: A Vector Representation in Point Cloud Analysis. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 9455--9465."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2--7, 2019, Volume 1 (Long and Short Papers). Association for Computational Linguistics, 4171--4186."},{"key":"e_1_3_2_1_10_1","volume-title":"Int. Conf. Learn. Represent. (ICLR).","author":"Dong Runpei","year":"2023","unstructured":"Runpei Dong, Zekun Qi, Linfeng Zhang, Junbo Zhang, Jianjian Sun, Zheng Ge, Li Yi, and Kaisheng Ma. 2023. Autoencoders as Cross-Modal Teachers: Can Pretrained 2D Image Transformers Help 3D Representation Learning?. In Int. Conf. Learn. Represent. (ICLR)."},{"key":"e_1_3_2_1_11_1","volume-title":"Int. Conf. Learn. Represent. (ICLR).","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In Int. Conf. Learn. Represent. (ICLR)."},{"key":"e_1_3_2_1_12_1","volume-title":"IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR).","author":"Fan Haoqiang","unstructured":"Haoqiang Fan, Hao Su, and Leonidas J. Guibas. 2017. A Point Set Generation Network for 3D Object Reconstruction from a Single Image. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR)."},{"key":"e_1_3_2_1_13_1","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML). PMLR, 7616--7633","author":"Goel Karan","year":"2022","unstructured":"Karan Goel, Albert Gu, Chris Donahue, and Christopher R\u00e9. 2022. It's Raw! Audio Generation with State-Space Models. In Proc. Int. Conf. Mach. Learn. (ICML). PMLR, 7616--7633."},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML) (Proceedings of Machine Learning Research","volume":"3820","author":"Goyal Ankit","year":"2021","unstructured":"Ankit Goyal, Hei Law, Bowei Liu, Alejandro Newell, and Jia Deng. 2021. Revisiting Point Cloud Shape Classification with a Simple and Effective Baseline. In Proc. Int. Conf. Mach. Learn. (ICML) (Proceedings of Machine Learning Research, Vol. 139). PMLR, 3809--3820."},{"key":"e_1_3_2_1_15_1","volume-title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces. arXiv preprint arXiv:2312.00752","author":"Gu Albert","year":"2023","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-Time Sequence Modeling with Selective State Spaces. arXiv preprint arXiv:2312.00752 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Efficiently Modeling Long Sequences with Structured State Spaces. arXiv preprint arXiv:2111.00396","author":"Gu Albert","year":"2021","unstructured":"Albert Gu, Karan Goel, and Christopher R\u00e9. 2021. Efficiently Modeling Long Sequences with Structured State Spaces. arXiv preprint arXiv:2111.00396 (2021)."},{"key":"e_1_3_2_1_17_1","first-page":"572","article-title":"Combining Recurrent, Convolutional, and Continuous-time Models with Linear State Space","volume":"34","author":"Gu Albert","year":"2021","unstructured":"Albert Gu, Isys Johnson, Karan Goel, Khaled Saab, Tri Dao, Atri Rudra, and Christopher R\u00e9. 2021. Combining Recurrent, Convolutional, and Continuous-time Models with Linear State Space Layers. Adv. Neural Inform. Process. Syst. (NeurIPS), Vol. 34 (2021), 572--585.","journal-title":"Layers. Adv. Neural Inform. Process. Syst. (NeurIPS)"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0229-5"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3005434"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00007"},{"key":"e_1_3_2_1_21_1","volume-title":"Deep Residual Learning for Image Recognition. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). IEEE Computer Society, 770--778","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). IEEE Computer Society, 770--778."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Rudolph Emil Kalman. 1960. A New Approach to Linear Filtering and Prediction Problems. (1960).","DOI":"10.1115\/1.3662552"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00831"},{"key":"e_1_3_2_1_24_1","volume-title":"Large-Scale Point Cloud Semantic Segmentation With Superpoint Graphs. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 4558--4567","author":"Landrieu Loic","year":"2018","unstructured":"Loic Landrieu and Martin Simonovsky. 2018. Large-Scale Point Cloud Semantic Segmentation With Superpoint Graphs. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 4558--4567."},{"key":"e_1_3_2_1_25_1","unstructured":"Yangyan Li Rui Bu Mingchao Sun Wei Wu Xinhan Di and Baoquan Chen. 2018. PointCNN: Convolution On X-Transformed Points. In Adv. Neural Inform. Process. Syst. (NeurIPS). 828--838."},{"key":"e_1_3_2_1_26_1","volume-title":"PointMamba: A Simple State Space Model for Point Cloud Analysis. arXiv preprint arXiv:2402.10739","author":"Liang Dingkang","year":"2024","unstructured":"Dingkang Liang, Xin Zhou, Xinyu Wang, Xingkui Zhu, Wei Xu, Zhikang Zou, Xiaoqing Ye, and Xiang Bai. 2024. PointMamba: A Simple State Space Model for Point Cloud Analysis. arXiv preprint arXiv:2402.10739 (2024)."},{"key":"e_1_3_2_1_27_1","volume-title":"Masked Discrimination for Self-Supervised Learning on Point Clouds. In Eur. Conf. Comput. Vis. (ECCV).","author":"Liu Haotian","year":"2022","unstructured":"Haotian Liu, Mu Cai, and Yong Jae Lee. 2022. Masked Discrimination for Self-Supervised Learning on Point Clouds. In Eur. Conf. Comput. Vis. (ECCV)."},{"key":"e_1_3_2_1_28_1","volume-title":"Regress Before Construct: Regress Autoencoder for Point Cloud Self-supervised Learning. In ACM Int. Conf. Multimedia (ACM MM). 1738--1749","author":"Liu Yang","year":"2023","unstructured":"Yang Liu, Chen Chen, Can Wang, Xulin King, and Mengyuan Liu. 2023. Regress Before Construct: Regress Autoencoder for Point Cloud Self-supervised Learning. In ACM Int. Conf. Multimedia (ACM MM). 1738--1749."},{"key":"e_1_3_2_1_29_1","volume-title":"Point Cloud Classification Using Content-Based Transformer via Clustering in Feature Space","author":"Liu Yahui","year":"2023","unstructured":"Yahui Liu, Bin Tian, Yisheng Lv, Lingxi Li, and Fei-Yue Wang. 2023. Point Cloud Classification Using Content-Based Transformer via Clustering in Feature Space. IEEE\/CAA Journal of Automatica Sinica (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Decoupled Weight Decay Regularization. In Int. Conf. Learn. Represent. (ICLR).","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In Int. Conf. Learn. Represent. (ICLR)."},{"key":"e_1_3_2_1_31_1","volume-title":"Rethinking Network Design and Local Geometry in Point Cloud: A Simple Residual MLP Framework. In Int. Conf. Learn. Represent. (ICLR). OpenReview.net.","author":"Ma Xu","year":"2022","unstructured":"Xu Ma, Can Qin, Haoxuan You, Haoxi Ran, and Yun Fu. 2022. Rethinking Network Design and Local Geometry in Point Cloud: A Simple Residual MLP Framework. In Int. Conf. Learn. Represent. (ICLR). OpenReview.net."},{"key":"e_1_3_2_1_32_1","first-page":"2846","article-title":"S4ND: Modeling Images and Videos as Multidimensional Signals with State","volume":"35","author":"Nguyen Eric","year":"2022","unstructured":"Eric Nguyen, Karan Goel, Albert Gu, Gordon Downs, Preey Shah, Tri Dao, Stephen Baccus, and Christopher R\u00e9. 2022. S4ND: Modeling Images and Videos as Multidimensional Signals with State Spaces. Adv. Neural Inform. Process. Syst. (NeurIPS), Vol. 35 (2022), 2846--2861.","journal-title":"Spaces. Adv. Neural Inform. Process. Syst. (NeurIPS)"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28238"},{"key":"e_1_3_2_1_34_1","volume-title":"Masked Autoencoders for Point Cloud Self-supervised Learning. In Eur. Conf. Comput. Vis. (ECCV).","author":"Pang Yatian","year":"2022","unstructured":"Yatian Pang, Wenxiao Wang, Francis E. H. Tay, Wei Liu, Yonghong Tian, and Li Yuan. 2022. Masked Autoencoders for Point Cloud Self-supervised Learning. In Eur. Conf. Comput. Vis. (ECCV)."},{"key":"e_1_3_2_1_35_1","volume-title":"Self-Positioning Point-Based Transformer for Point Cloud Understanding. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 21814--21823","author":"Park Jinyoung","year":"2023","unstructured":"Jinyoung Park, Sanghyeok Lee, Sihyeon Kim, Yunyang Xiong, and Hyunwoo J Kim. 2023. Self-Positioning Point-Based Transformer for Point Cloud Understanding. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 21814--21823."},{"key":"e_1_3_2_1_36_1","volume-title":"IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 77--85","author":"Qi Charles Ruizhongtai","unstructured":"Charles Ruizhongtai Qi, Hao Su, Kaichun Mo, and Leonidas J. Guibas. 2017. PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 77--85."},{"key":"e_1_3_2_1_37_1","volume-title":"Guibas","author":"Qi Charles Ruizhongtai","year":"2017","unstructured":"Charles Ruizhongtai Qi, Li Yi, Hao Su, and Leonidas J. Guibas. 2017. PointNet: Deep Hierarchical Feature Learning on Point Sets in a Metric Space. In Adv. Neural Inform. Process. Syst. (NeurIPS). 5099--5108."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00607"},{"key":"e_1_3_2_1_39_1","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML). PMLR, 28223--28243","author":"Qi Zekun","year":"2023","unstructured":"Zekun Qi, Runpei Dong, Guofan Fan, Zheng Ge, Xiangyu Zhang, Kaisheng Ma, and Li Yi. 2023. Contrast with Reconstruct: Contrastive 3D Representation Learning Guided by Generative Pretraining. In Proc. Int. Conf. Mach. Learn. (ICML). PMLR, 28223--28243."},{"key":"e_1_3_2_1_40_1","volume-title":"ShapeLLM: Universal 3D Object Understanding for Embodied Interaction. arXiv preprint arXiv:2402.17766","author":"Qi Zekun","year":"2024","unstructured":"Zekun Qi, Runpei Dong, Shaochen Zhang, Haoran Geng, Chunrui Han, Zheng Ge, Li Yi, and Kaisheng Ma. 2024. ShapeLLM: Universal 3D Object Understanding for Embodied Interaction. arXiv preprint arXiv:2402.17766 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Mohamed Elhoseiny, and Bernard Ghanem.","author":"Qian Guocheng","year":"2022","unstructured":"Guocheng Qian, Yuchen Li, Houwen Peng, Jinjie Mai, Hasan Abed Al Kader Hammoud, Mohamed Elhoseiny, and Bernard Ghanem. 2022. PointNeXt: Revisiting PointNet with Improved Training and Scaling Strategies. In Adv. Neural Inform. Process. Syst. (NeurIPS)."},{"key":"e_1_3_2_1_42_1","volume-title":"Dense-Resolution Network for Point Cloud Classification and Segmentation. In IEEE Winter Conf. Appl. Comput. Vis. (WACV). 3812--3821","author":"Qiu Shi","year":"2021","unstructured":"Shi Qiu, Saeed Anwar, and Nick Barnes. 2021. Dense-Resolution Network for Point Cloud Classification and Segmentation. In IEEE Winter Conf. Appl. Comput. Vis. (WACV). 3812--3821."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3074240"},{"key":"e_1_3_2_1_44_1","volume-title":"Surface Representation for Point Clouds. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 18942--18952","author":"Ran Haoxi","year":"2022","unstructured":"Haoxi Ran, Jun Liu, and Chengjie Wang. 2022. Surface Representation for Point Clouds. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 18942--18952."},{"key":"e_1_3_2_1_45_1","volume-title":"Discrete Variational Autoencoders. arXiv preprint arXiv:1609.02200","author":"Rolfe Jason Tyler","year":"2016","unstructured":"Jason Tyler Rolfe. 2016. Discrete Variational Autoencoders. arXiv preprint arXiv:1609.02200 (2016)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980567"},{"key":"e_1_3_2_1_47_1","unstructured":"Charu Sharma and Manohar Kaul. 2020. Self-Supervised Few-Shot Learning on Point Clouds. In Adv. Neural Inform. Process. Syst. (NeurIPS)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00086"},{"key":"e_1_3_2_1_49_1","volume-title":"Point-LGMask: Local and Global Contexts Embedding for Point Cloud Pre-training with Multi-Ratio Masking","author":"Tang Yuan","year":"2023","unstructured":"Yuan Tang, Xianzhi Li, Jinfeng Xu, Qiao Yu, Long Hu, Yixue Hao, and Min Chen. 2023. Point-LGMask: Local and Global Contexts Embedding for Point Cloud Pre-training with Multi-Ratio Masking. IEEE Transactions on Multimedia (2023)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00167"},{"key":"e_1_3_2_1_51_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In Adv. Neural Inform. Process. Syst. (NeurIPS). 5998--6008."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00964"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3326362"},{"key":"e_1_3_2_1_54_1","unstructured":"Ziyi Wang Xumin Yu Yongming Rao Jie Zhou and Jiwen Lu. 2022. P2P: Tuning Pre-trained Image Models for Point Cloud Analysis with Point-to-Pixel Prompting. In Adv. Neural Inform. Process. Syst. (NeurIPS)."},{"key":"e_1_3_2_1_55_1","volume-title":"Point transformer v3: Simpler, faster, stronger. arXiv preprint arXiv:2312.10035","author":"Wu Xiaoyang","year":"2023","unstructured":"Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, and Hengshuang Zhao. 2023. Point transformer v3: Simpler, faster, stronger. arXiv preprint arXiv:2312.10035 (2023)."},{"key":"e_1_3_2_1_56_1","first-page":"33330","article-title":"Point transformer v2: Grouped vector attention and partition-based pooling","volume":"35","author":"Wu Xiaoyang","year":"2022","unstructured":"Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, and Hengshuang Zhao. 2022. Point transformer v2: Grouped vector attention and partition-based pooling. Advances in Neural Information Processing Systems, Vol. 35 (2022), 33330--33342.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_57_1","volume-title":"IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 1912--1920","author":"Wu Zhirong","year":"2015","unstructured":"Zhirong Wu, Shuran Song, Aditya Khosla, Fisher Yu, Linguang Zhang, Xiaoou Tang, and Jianxiong Xiao. 2015. 3d shapenets: A deep representation for volumetric shapes. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 1912--1920."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_34"},{"key":"e_1_3_2_1_59_1","volume-title":"Juan Carlos Niebles, and Silvio Savarese","author":"Xue Le","year":"2023","unstructured":"Le Xue, Ning Yu, Shu Zhang, Junnan Li, Roberto Mart\u00edn-Mart\u00edn, Jiajun Wu, Caiming Xiong, Ran Xu, Juan Carlos Niebles, and Silvio Savarese. 2023. ULIP-2: Towards Scalable Multimodal Pre-training for 3D Understanding. arXiv preprint arXiv:2305.08275 (2023)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/2980179.2980238"},{"key":"e_1_3_2_1_61_1","volume-title":"MetaFormer Is Actually What You Need for Vision. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 10819--10829","author":"Yu Weihao","year":"2022","unstructured":"Weihao Yu, Mi Luo, Pan Zhou, Chenyang Si, Yichen Zhou, Xinchao Wang, Jiashi Feng, and Shuicheng Yan. 2022. MetaFormer Is Actually What You Need for Vision. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 10819--10829."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01871"},{"key":"e_1_3_2_1_63_1","volume-title":"Point2Vec for Self-Supervised Representation Learning on Point Clouds. arXiv preprint arXiv:2303.16570","author":"Zeid Karim Abou","year":"2023","unstructured":"Karim Abou Zeid, Jonas Schult, Alexander Hermans, and Bastian Leibe. 2023. Point2Vec for Self-Supervised Representation Learning on Point Clouds. arXiv preprint arXiv:2303.16570 (2023)."},{"key":"e_1_3_2_1_64_1","volume-title":"Instance-aware Dynamic Prompt Tuning for Pre-trained Point Cloud Models. In Int. Conf. Comput. Vis. (ICCV). 14161--14170","author":"Zha Yaohua","year":"2023","unstructured":"Yaohua Zha, Jinpeng Wang, Tao Dai, Bin Chen, Zhi Wang, and Shu-Tao Xia. 2023. Instance-aware Dynamic Prompt Tuning for Pre-trained Point Cloud Models. In Int. Conf. Comput. Vis. (ICCV). 14161--14170."},{"key":"e_1_3_2_1_65_1","unstructured":"Renrui Zhang Ziyu Guo Peng Gao Rongyao Fang Bin Zhao Dong Wang Yu Qiao and Hongsheng Li. 2022. Point-M2AE: Multi-scale Masked Autoencoders for Hierarchical Point Cloud Pre-training. In Adv. Neural Inform. Process. Syst. (NeurIPS)."},{"key":"e_1_3_2_1_66_1","volume-title":"Point Could Mamba: Point Cloud Learning via State Space Model. arXiv preprint arXiv:2403.00762","author":"Zhang Tao","year":"2024","unstructured":"Tao Zhang, Xiangtai Li, Haobo Yuan, Shunping Ji, and Shuicheng Yan. 2024. Point Could Mamba: Point Cloud Learning via State Space Model. arXiv preprint arXiv:2403.00762 (2024)."},{"key":"e_1_3_2_1_67_1","volume-title":"PointWeb: Enhancing Local Neighborhood Features for Point Cloud Processing. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 5565--5573","author":"Zhao Hengshuang","year":"2019","unstructured":"Hengshuang Zhao, Li Jiang, Chi-Wing Fu, and Jiaya Jia. 2019. PointWeb: Enhancing Local Neighborhood Features for Point Cloud Processing. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 5565--5573."},{"key":"e_1_3_2_1_68_1","volume-title":"Point Transformer. In Int. Conf. Comput. Vis. (ICCV). IEEE, 16239--16248","author":"Zhao Hengshuang","year":"2021","unstructured":"Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip H. S. Torr, and Vladlen Koltun. 2021. Point Transformer. In Int. Conf. Comput. Vis. (ICCV). IEEE, 16239--16248."},{"key":"e_1_3_2_1_69_1","volume-title":"Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 14707--14717","author":"Zhou Xin","year":"2024","unstructured":"Xin Zhou, Dingkang Liang, Wei Xu, Xingkui Zhu, Yihan Xu, Zhikang Zou, and Xiang Bai. 2024. Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis. In IEEE\/CVF Conf. Comput. Vis. Pattern Recog. (CVPR). 14707--14717."},{"key":"e_1_3_2_1_70_1","volume-title":"Vision Mamba: Efficient Visual Representation Learning with Bidirectional State Space Model. arXiv preprint arXiv:2401.09417","author":"Zhu Lianghui","year":"2024","unstructured":"Lianghui Zhu, Bencheng Liao, Qian Zhang, Xinlong Wang, Wenyu Liu, and Xinggang Wang. 2024. Vision Mamba: Efficient Visual Representation Learning with Bidirectional State Space Model. arXiv preprint arXiv:2401.09417 (2024)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681173","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681173","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681173"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":70,"alternative-id":["10.1145\/3664647.3681173","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681173","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}