{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T06:29:11Z","timestamp":1761719351949,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","funder":[{"name":"Fundamental Research Funds for the Central Universities","award":["WU-KT24003"],"award-info":[{"award-number":["WU-KT24003"]}]},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62002299"],"award-info":[{"award-number":["62002299"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733359","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:29:43Z","timestamp":1750876183000},"page":"81-89","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["HM3D: A Lightweight Hierarchical Mamba Model for Efficient 3D Point Cloud Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-4932-9636","authenticated-orcid":false,"given":"Tianyi","family":"Chen","sequence":"first","affiliation":[{"name":"College of Computer and Information Science, Southwest University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4869-4537","authenticated-orcid":false,"given":"Xian-Feng","family":"Han","sequence":"additional","affiliation":[{"name":"College of Computer and Information Science, Southwest University, Chongqing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.170"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 77--85","author":"Charles R. Qi","key":"e_1_3_2_1_2_1","unstructured":"R. Qi Charles, Hao Su, Mo Kaichun, and Leonidas J. Guibas. 2017. PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 77--85."},{"key":"e_1_3_2_1_3_1","unstructured":"Guangyan Chen Meiling Wang Yi Yang Kai Yu Li Yuan and Yufeng Yue. 2023. PointGPT: Auto-regressively Generative Pre-training from Point Clouds. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2984780"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.691"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning. 10041--10071","author":"Dao Tri","year":"2024","unstructured":"Tri Dao and Albert Gu. 2024. Transformers are SSMs: Generalized Models and Efficient Algorithms Through Structured State Space Duality. In Proceedings of the 41st International Conference on Machine Learning. 10041--10071."},{"key":"e_1_3_2_1_7_1","volume-title":"International Conference on Learning Representations.","author":"Dong Runpei","year":"2023","unstructured":"Runpei Dong, Zekun Qi, Linfeng Zhang, Junbo Zhang, Jianjian Sun, Zheng Ge, Li Yi, and Kaisheng Ma. 2023. Autoencoders as Cross-Modal Teachers: Can Pretrained 2D Image Transformers Help 3D Representation Learning?. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3416519"},{"key":"e_1_3_2_1_9_1","volume-title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces. In First Conference on Language Modeling.","author":"Gu Albert","year":"2024","unstructured":"Albert Gu and Tri Dao. 2024. Mamba: Linear-Time Sequence Modeling with Selective State Spaces. In First Conference on Language Modeling."},{"key":"e_1_3_2_1_10_1","first-page":"1474","article-title":"HiPPO: Recurrent Memory with Optimal Polynomial Projections","volume":"33","author":"Gu Albert","year":"2020","unstructured":"Albert Gu, Tri Dao, Stefano Ermon, Atri Rudra, and Christopher R'e. 2020. HiPPO: Recurrent Memory with Optimal Polynomial Projections. In Advances in Neural Information Processing Systems, Vol. 33. 1474--1487.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","first-page":"35971","article-title":"On the Parameterization and Initialization of Diagonal State Space Models","volume":"35","author":"Gu Albert","year":"2022","unstructured":"Albert Gu, Karan Goel, Ankit Gupta, and Christopher R\u00e9. 2022b. On the Parameterization and Initialization of Diagonal State Space Models. In Advances in Neural Information Processing Systems, Vol. 35. 35971--35983.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","volume-title":"Efficiently Modeling Long Sequences with Structured State Spaces. In International Conference on Learning Representations.","author":"Gu Albert","year":"2022","unstructured":"Albert Gu, Karan Goel, and Christopher Re. 2022a. Efficiently Modeling Long Sequences with Structured State Spaces. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_13_1","first-page":"572","article-title":"Combining Recurrent, Convolutional, and Continuous-time Models with Linear State Space Layers","volume":"34","author":"Gu Albert","year":"2021","unstructured":"Albert Gu, Isys Johnson, Karan Goel, Khaled Saab, Tri Dao, Atri Rudra, and Christopher R\u00e9. 2021. Combining Recurrent, Convolutional, and Continuous-time Models with Linear State Space Layers. In Advances in Neural Information Processing Systems, Vol. 34. 572--585.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681173"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-023-10486-4"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3147907"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3198318"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01979"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.17018"},{"key":"e_1_3_2_1_20_1","first-page":"47242","article-title":"3DET-Mamba: Causal Sequence Modelling for End-to-End 3D Object Detection","volume":"37","author":"Li Mingsheng","year":"2024","unstructured":"Mingsheng Li, Jiakang Yuan, Sijin Chen, Lin Zhang, Anyu Zhu, Xin Chen, and Tao Chen. 2024. 3DET-Mamba: Causal Sequence Modelling for End-to-End 3D Object Detection. In Advances in Neural Information Processing Systems, Vol. 37. 47242--47260.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"Advances in Neural Information Processing Systems","volume":"37","author":"Liang Dingkang","year":"2024","unstructured":"Dingkang Liang, Xin Zhou, Wei Xu, Xingkui Zhu, Zhikang Zou, Xiaoqing Ye, Xiao Tan, and Xiang Bai. 2024. PointMamba: A Simple State Space Model for Point Cloud Analysis. In Advances in Neural Information Processing Systems, Vol. 37."},{"key":"e_1_3_2_1_22_1","first-page":"103031","article-title":"VMamba: Visual State Space Model","volume":"37","author":"Liu Yue","year":"2024","unstructured":"Yue Liu, Yunjie Tian, Yuzhong Zhao, Hongtian Yu, Lingxi Xie, Yaowei Wang, Qixiang Ye, Jianbin Jiao, and Yunfan Liu. 2024. VMamba: Visual State Space Model. In Advances in Neural Information Processing Systems, Vol. 37. 103031--103063.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3198836"},{"key":"e_1_3_2_1_24_1","volume-title":"Rethinking Network Design and Local Geometry in Point Cloud: A Simple Residual MLP Framework. In International Conference on Learning Representations.","author":"Ma Xu","year":"2022","unstructured":"Xu Ma, Can Qin, Haoxuan You, Haoxi Ran, and Yun Fu. 2022. Rethinking Network Design and Local Geometry in Point Cloud: A Simple Residual MLP Framework. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3132430"},{"key":"e_1_3_2_1_26_1","volume-title":"Masked Autoencoders for Point Cloud Self-supervised Learning. In European conference on computer vision. 604--621","author":"Pang Yatian","year":"2022","unstructured":"Yatian Pang, Wenxiao Wang, Francis EH Tay, Wei Liu, Yonghong Tian, and Li Yuan. 2022. Masked Autoencoders for Point Cloud Self-supervised Learning. In European conference on computer vision. 604--621."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01644"},{"key":"e_1_3_2_1_28_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Qi Charles Ruizhongtai","year":"2017","unstructured":"Charles Ruizhongtai Qi, Li Yi, Hao Su, and Leonidas J Guibas. 2017. PointNet: Deep Hierarchical Feature Learning on Point Sets in a Metric Space. In Advances in Neural Information Processing Systems, Vol. 30."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.701"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160452"},{"key":"e_1_3_2_1_31_1","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, Vol. 30."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3326362"},{"key":"e_1_3_2_1_33_1","volume-title":"Stronger. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 4840--4851","author":"Wu Xiaoyang","year":"2024","unstructured":"Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, and Hengshuang Zhao. 2024. Point Transformer V3: Simpler, Faster, Stronger. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 4840--4851."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1912--1920","author":"Wu Zhirong","year":"2015","unstructured":"Zhirong Wu, Shuran Song, Aditya Khosla, Fisher Yu, Linguang Zhang, Xiaoou Tang, and Jianxiong Xiao. 2015. 3D ShapeNets: A Deep Representation for Volumetric Shapes. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1912--1920."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01417"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01871"},{"key":"e_1_3_2_1_37_1","volume-title":"Point Transformer. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 16239--16248","author":"Zhao Hengshuang","year":"2021","unstructured":"Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, and Vladlen Koltun. 2021. Point Transformer. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 16239--16248."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00472"},{"key":"e_1_3_2_1_39_1","first-page":"77799","article-title":"Point Cloud Matters: Rethinking the Impact of Different Observation Spaces on Robot Learning","volume":"37","author":"Zhu Haoyi","year":"2024","unstructured":"Haoyi Zhu, Yating Wang, Di Huang, Weicai Ye, Wanli Ouyang, and Tong He. 2024b. Point Cloud Matters: Rethinking the Impact of Different Observation Spaces on Robot Learning. In Advances in Neural Information Processing Systems, Vol. 37. 77799--77830.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning","volume":"235","author":"Zhu Lianghui","year":"2024","unstructured":"Lianghui Zhu, Bencheng Liao, Qian Zhang, Xinlong Wang, Wenyu Liu, and Xinggang Wang. 2024a. Vision Mamba: Efficient Visual Representation Learning with Bidirectional State Space Model. In Proceedings of the 41st International Conference on Machine Learning, Vol. 235. 62429--62442."}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Chicago IL USA","acronym":"ICMR '25"},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733359","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:11:44Z","timestamp":1755749504000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733359"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":40,"alternative-id":["10.1145\/3731715.3733359","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733359","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}