{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T21:40:04Z","timestamp":1757540404293,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612409","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"5380-5388","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Bridging Language and Geometric Primitives for Zero-shot Point Cloud Segmentation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8832-9016","authenticated-orcid":false,"given":"Runnan","family":"Chen","sequence":"first","affiliation":[{"name":"The University of Hong Kong, Hong Kong SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0107-8099","authenticated-orcid":false,"given":"Xinge","family":"Zhu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3104-3017","authenticated-orcid":false,"given":"Nenglun","family":"Chen","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong SAR, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0059-3745","authenticated-orcid":false,"given":"Wei","family":"Li","sequence":"additional","affiliation":[{"name":"Inceptio, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7237-988X","authenticated-orcid":false,"given":"Yuexin","family":"Ma","sequence":"additional","affiliation":[{"name":"ShanghaiTech University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5296-6307","authenticated-orcid":false,"given":"Ruigang","family":"Yang","sequence":"additional","affiliation":[{"name":"Inceptio, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1291-0197","authenticated-orcid":false,"given":"Wenping","family":"Wang","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, College Station, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Joint 2d-3d-semantic data for indoor scene understanding. arXiv preprint arXiv:1702.01105","author":"Armeni Iro","year":"2017","unstructured":"Iro Armeni, Sasha Sax, Amir R Zamir, and Silvio Savarese. 2017. Joint 2d-3d-semantic data for indoor scene understanding. arXiv preprint arXiv:1702.01105 (2017)."},{"volume-title":"Proc. of the IEEE\/CVF International Conf. on Computer Vision (ICCV).","author":"Behley J.","key":"e_1_3_2_1_2_1","unstructured":"J. Behley, M. Garbade, A. Milioto, J. Quenzel, S. Behnke, C. Stachniss, and J. Gall. 2019. SemanticKITTI: A Dataset for Semantic Scene Understanding of LiDAR Sequences. In Proc. of the IEEE\/CVF International Conf. on Computer Vision (ICCV)."},{"key":"e_1_3_2_1_3_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Bucher Maxime","year":"2019","unstructured":"Maxime Bucher, Tuan-Hung Vu, Matthieu Cord, and Patrick P\u00e9rez. 2019. Zero-shot semantic segmentation. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_4_1","volume-title":"Qiang Xu, Anush Krishnan, Yu Pan, Giancarlo Baldan, and Oscar Beijbom.","author":"Caesar Holger","year":"2019","unstructured":"Holger Caesar, Varun Bankiti, Alex H. Lang, Sourabh Vora, Venice Erin Liong, Qiang Xu, Anush Krishnan, Yu Pan, Giancarlo Baldan, and Oscar Beijbom. 2019. nuScenes: A multimodal dataset for autonomous driving. arXiv preprint arXiv:1903.11027 (2019)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00914"},{"key":"e_1_3_2_1_6_1","volume-title":"Studies on attention modeling for visual understanding. HKU Theses Online (HKUTO)","author":"Chen Runnan","year":"2023","unstructured":"Runnan Chen. 2023. Studies on attention modeling for visual understanding. HKU Theses Online (HKUTO) (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"2023 a. Towards Label-free Scene Understanding by Vision Foundation Models. arXiv preprint arXiv:2306.03899","author":"Chen Runnan","year":"2023","unstructured":"Runnan Chen, Youquan Liu, Lingdong Kong, Nenglun Chen, Xinge Zhu, Yuexin Ma, Tongliang Liu, and Wenping Wang. 2023 a. Towards Label-free Scene Understanding by Vision Foundation Models. arXiv preprint arXiv:2306.03899 (2023)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00678"},{"key":"e_1_3_2_1_9_1","volume-title":"Towards 3d scene understanding by referring synthetic models. arXiv preprint arXiv:2203.10546","author":"Chen Runnan","year":"2022","unstructured":"Runnan Chen, Xinge Zhu, Nenglun Chen, Dawei Wang, Wei Li, Yuexin Ma, Ruigang Yang, and Wenping Wang. 2022. Towards 3d scene understanding by referring synthetic models. arXiv preprint arXiv:2203.10546 (2022)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01236"},{"key":"e_1_3_2_1_11_1","volume-title":"Mitigating the hubness problem for zero-shot learning of 3d objects. arXiv preprint arXiv:1907.06371","author":"Cheraghian Ali","year":"2019","unstructured":"Ali Cheraghian, Shafin Rahman, Dylan Campbell, and Lars Petersson. 2019b. Mitigating the hubness problem for zero-shot learning of 3d objects. arXiv preprint arXiv:1907.06371 (2019)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093545"},{"key":"e_1_3_2_1_13_1","volume-title":"Zero-shot learning on 3d point cloud objects and beyond. arXiv preprint arXiv:2104.04980","author":"Cheraghian Ali","year":"2021","unstructured":"Ali Cheraghian, Shafinn Rahman, Townim F Chowdhury, Dylan Campbell, and Lars Petersson. 2021. Zero-shot learning on 3d point cloud objects and beyond. arXiv preprint arXiv:2104.04980 (2021)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.23919\/MVA.2019.8758063"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00319"},{"key":"e_1_3_2_1_16_1","unstructured":"MMDetection3D Contributors. 2020. MMDetection3D: OpenMMLab next-generation platform for general 3D object detection."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"e_1_3_2_1_18_1","volume-title":"Devise: A deep visual-semantic embedding model. Advances in neural information processing systems","author":"Frome Andrea","year":"2013","unstructured":"Andrea Frome, Greg S Corrado, Jon Shlens, Samy Bengio, Jeff Dean, Marc'Aurelio Ranzato, and Tomas Mikolov. 2013. Devise: A deep visual-semantic embedding model. Advances in neural information processing systems, Vol. 26 (2013)."},{"key":"e_1_3_2_1_19_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Fu Yao","year":"2019","unstructured":"Yao Fu, Yansong Feng, and John P Cunningham. 2019. Paraphrase generation with latent bag of words. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413593"},{"key":"e_1_3_2_1_21_1","first-page":"21713","article-title":"Uncertainty-aware learning for zero-shot semantic segmentation","volume":"33","author":"Hu Ping","year":"2020","unstructured":"Ping Hu, Stan Sclaroff, and Kate Saenko. 2020a. Uncertainty-aware learning for zero-shot semantic segmentation. Advances in Neural Information Processing Systems, Vol. 33 (2020), 21713--21724.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01112"},{"key":"e_1_3_2_1_23_1","volume-title":"2023 a. Rethinking range view representation for lidar segmentation. arXiv preprint arXiv:2303.05367","author":"Kong Lingdong","year":"2023","unstructured":"Lingdong Kong, Youquan Liu, Runnan Chen, Yuexin Ma, Xinge Zhu, Yikang Li, Yuenan Hou, Yu Qiao, and Ziwei Liu. 2023 a. Rethinking range view representation for lidar segmentation. arXiv preprint arXiv:2303.05367 (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Learning Representations 2023 Workshop on Scene Representations for Autonomous Driving.","author":"Kong Lingdong","year":"2023","unstructured":"Lingdong Kong, Youquan Liu, Xin Li, Runnan Chen, Wenwei Zhang, Jiawei Ren, Liang Pan, Kai Chen, and Ziwei Liu. 2023 b. Benchmarking 3D Perception Robustness to Common Corruptions and Sensor Failure. In International Conference on Learning Representations 2023 Workshop on Scene Representations for Autonomous Driving."},{"key":"e_1_3_2_1_25_1","volume-title":"2023 c. Robo3d: Towards robust and reliable 3d perception against corruptions. arXiv preprint arXiv:2303.17597","author":"Kong Lingdong","year":"2023","unstructured":"Lingdong Kong, Youquan Liu, Xin Li, Runnan Chen, Wenwei Zhang, Jiawei Ren, Liang Pan, Kai Chen, and Ziwei Liu. 2023 c. Robo3d: Towards robust and reliable 3d perception against corruptions. arXiv preprint arXiv:2303.17597 (2023)."},{"key":"e_1_3_2_1_26_1","first-page":"10317","article-title":"Consistent structural relation learning for zero-shot segmentation","volume":"33","author":"Li Peike","year":"2020","unstructured":"Peike Li, Yunchao Wei, and Yi Yang. 2020. Consistent structural relation learning for zero-shot segmentation. Advances in Neural Information Processing Systems, Vol. 33 (2020), 10317--10327.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_27_1","volume-title":"Segment Any Point Cloud Sequences by Distilling Vision Foundation Models. arXiv preprint arXiv:2306.09347","author":"Liu Youquan","year":"2023","unstructured":"Youquan Liu, Lingdong Kong, Jun Cen, Runnan Chen, Wenwei Zhang, Liang Pan, Kai Chen, and Ziwei Liu. 2023. Segment Any Point Cloud Sequences by Distilling Vision Foundation Models. arXiv preprint arXiv:2306.09347 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"See More and Know More: Zero-shot Point Cloud Segmentation via Multi-modal Visual Data. arXiv preprint arXiv:2307.10782","author":"Lu Yuhang","year":"2023","unstructured":"Yuhang Lu, Qi Jiang, Runnan Chen, Yuenan Hou, Xinge Zhu, and Yuexin Ma. 2023. See More and Know More: Zero-shot Point Cloud Segmentation via Multi-modal Visual Data. arXiv preprint arXiv:2307.10782 (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2020.3023340"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00107"},{"key":"e_1_3_2_1_31_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013a. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_1_32_1","volume-title":"Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean. 2013b. Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems, Vol. 26 (2013)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 652--660","author":"Qi Charles R","year":"2017","unstructured":"Charles R Qi, Hao Su, Kaichun Mo, and Leonidas J Guibas. 2017. Pointnet: Deep learning on point sets for 3d classification and segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition. 652--660."},{"key":"e_1_3_2_1_35_1","article-title":"Hubs in space: Popular nearest neighbors in high-dimensional data","volume":"11","author":"Radovanovic Milos","year":"2010","unstructured":"Milos Radovanovic, Alexandros Nanopoulos, and Mirjana Ivanovic. 2010. Hubs in space: Popular nearest neighbors in high-dimensional data. Journal of Machine Learning Research, Vol. 11, sept (2010), 2487--2531.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-23528-8_9"},{"key":"e_1_3_2_1_37_1","first-page":"596","article-title":"Fixmatch: Simplifying semi-supervised learning with consistency and confidence","volume":"33","author":"Sohn Kihyuk","year":"2020","unstructured":"Kihyuk Sohn, David Berthelot, Nicholas Carlini, Zizhao Zhang, Han Zhang, Colin A Raffel, Ekin Dogus Cubuk, Alexey Kurakin, and Chun-Liang Li. 2020. Fixmatch: Simplifying semi-supervised learning with consistency and confidence. Advances in Neural Information Processing Systems, Vol. 33 (2020), 596--608.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00651"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143967"},{"key":"e_1_3_2_1_40_1","volume-title":"Zero-shot learning-a comprehensive evaluation of the good, the bad and the ugly","author":"Xian Yongqin","year":"2018","unstructured":"Yongqin Xian, Christoph H Lampert, Bernt Schiele, and Zeynep Akata. 2018. Zero-shot learning-a comprehensive evaluation of the good, the bad and the ugly. IEEE transactions on pattern analysis and machine intelligence, Vol. 41, 9 (2018), 2251--2265."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01572"},{"key":"e_1_3_2_1_42_1","volume-title":"Human-centric Scene Understanding for 3D Large-scale Scenarios. arXiv preprint arXiv:2307.14392","author":"Xu Yiteng","year":"2023","unstructured":"Yiteng Xu, Peishan Cong, Yichen Yao, Runnan Chen, Yuenan Hou, Xinge Zhu, Xuming He, Jingyi Yu, and Yuexin Ma. 2023. Human-centric Scene Understanding for 3D Large-scale Scenarios. arXiv preprint arXiv:2307.14392 (2023)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00689"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.321"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00981"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612409","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612409","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:03:29Z","timestamp":1755821009000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612409"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":45,"alternative-id":["10.1145\/3581783.3612409","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612409","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}