{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:48:53Z","timestamp":1774021733631,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":88,"publisher":"ACM","funder":[{"name":"National Key R&D Program of China","award":["2022ZD0160801"],"award-info":[{"award-number":["2022ZD0160801"]}]},{"name":"Beijing Natural Science Foundation","award":["4244081"],"award-info":[{"award-number":["4244081"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730601","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["OctGPT: Octree-based Multiscale Autoregressive Models for 3D Shape Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8215-6142","authenticated-orcid":false,"given":"Si-Tong","family":"Wei","sequence":"first","affiliation":[{"name":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4585-6247","authenticated-orcid":false,"given":"Rui-Huan","family":"Wang","sequence":"additional","affiliation":[{"name":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9449-3935","authenticated-orcid":false,"given":"Chuan-Zhi","family":"Zhou","sequence":"additional","affiliation":[{"name":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4702-036X","authenticated-orcid":false,"given":"Baoquan","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Intelligent Science and Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9700-8188","authenticated-orcid":false,"given":"Peng-Shuai","family":"Wang","sequence":"additional","affiliation":[{"name":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","volume-title":"NeurIPS","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et\u00a0al. 2020. Language models are few-shot learners. In NeurIPS."},{"key":"e_1_3_3_2_3_1","unstructured":"Angel\u00a0X. Chang Thomas Funkhouser Leonidas\u00a0J. Guibas Pat Hanrahan Qixing Huang Zimo Li Silvio Savarese Manolis Savva Shuran Song Hao Su Jianxiong Xiao Li Yi and Fisher Yu. 2015. ShapeNet: An information-rich 3D model repository. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1512.03012 (2015)."},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Kevin Chen Christopher\u00a0B. Choy Manolis Savva Angel\u00a0X Chang Thomas Funkhouser and Silvio Savarese. 2018. Text2Shape: Generating Shapes from Natural Language by Learning Joint Embeddings. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1803.08495 (2018).","DOI":"10.1007\/978-3-030-20893-6_7"},{"key":"e_1_3_3_2_5_1","unstructured":"Yiwen Chen Tong He Di Huang Weicai Ye Sijin Chen Jiaxiang Tang Xin Chen Zhongang Cai Lei Yang Gang Yu et\u00a0al. 2024a. MeshAnything: Artist-Created Mesh Generation with Autoregressive Transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.10163 (2024)."},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"crossref","unstructured":"Yongwei Chen Yushi Lan Shangchen Zhou Tengfei Wang and XIngang Pan. 2024b. SAR3D: Autoregressive 3D object generation and understanding via multi-scale 3D VQVAE. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.16856 (2024).","DOI":"10.1109\/CVPR52734.2025.02642"},{"key":"e_1_3_3_2_7_1","unstructured":"Yiwen Chen Yikai Wang Yihao Luo Zhengyi Wang Zilong Chen Jun Zhu Chi Zhang and Guosheng Lin. 2024c. Meshanything v2: Artist-created mesh generation with adjacent mesh tokenization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.02555 (2024)."},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00609"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20062-5_6"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00433"},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00215"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01315"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00827"},{"key":"e_1_3_3_2_15_1","volume-title":"NeurIPS","author":"Goodfellow Ian","year":"2016","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2016. Generative adversarial networks. In NeurIPS."},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"crossref","unstructured":"Meng-Hao Guo Jun-Xiong Cai Zheng-Ning Liu Tai-Jiang Mu Ralph\u00a0R Martin and Shi-Min Hu. 2021. PCT: Point cloud transformer. Comput. Vis. Media 7 2 (2021).","DOI":"10.1007\/s41095-021-0229-5"},{"key":"e_1_3_3_2_17_1","unstructured":"Anchit Gupta Wenhan Xiong Yixin Nie Ian Jones and Barlas O\u011fuz. 2023. 3DGen: Triplane latent diffusion for textured mesh generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.05371 (2023)."},{"key":"e_1_3_3_2_18_1","unstructured":"Zekun Hao David\u00a0W Romero Tsung-Yi Lin and Ming-Yu Liu. 2024. Meshtron: High-Fidelity Artist-Like 3D Mesh Generation at Scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.09548 (2024)."},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72684-2_17"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"crossref","unstructured":"Amir Hertz Or Perel Raja Giryes Olga Sorkine-Hornung and Daniel Cohen-Or. 2022. SPAGHETTI: Editing Implicit Shapes Through Part Aware Generation. ACM Trans. Graph. (SIGGRAPH) 41 4 (2022).","DOI":"10.1145\/3528223.3530084"},{"key":"e_1_3_3_2_21_1","volume-title":"NeurIPS","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. In NeurIPS."},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550469.3555394"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00270"},{"key":"e_1_3_3_2_24_1","unstructured":"Heewoo Jun and Alex Nichol. 2023. Shap-E: Generating conditional 3D implicit functions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.02463 (2023)."},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00831"},{"key":"e_1_3_3_2_26_1","volume-title":"ECCV","author":"Lan Yushi","year":"2024","unstructured":"Yushi Lan, Fangzhou Hong, Shuai Yang, Shangchen Zhou, Xuyi Meng, Bo Dai, Xingang Pan, and Chen\u00a0Change Loy. 2024. LN3Diff: Scalable Latent Neural Fields Diffusion for Speedy 3D Generation. In ECCV."},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01216"},{"key":"e_1_3_3_2_28_1","volume-title":"NeurIPS","author":"Li Tianhong","year":"2024","unstructured":"Tianhong Li, Yonglong Tian, He Li, Mingyang Deng, and Kaiming He. 2024. Autoregressive Image Generation without Vector Quantization. In NeurIPS."},{"key":"e_1_3_3_2_29_1","volume-title":"NeurIPS","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong\u00a0Jae Lee. 2024. Visual instruction tuning. In NeurIPS."},{"key":"e_1_3_3_2_30_1","unstructured":"Minghua Liu Ruoxi Shi Linghao Chen Zhuoyang Zhang Chao Xu Xinyue Wei Hansheng Chen Chong Zeng Jiayuan Gu and Hao Su. 2023b. One-2-3-45++: Fast single image to 3D objects with consistent multi-view generation and 3D diffusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.07885 (2023)."},{"key":"e_1_3_3_2_31_1","volume-title":"ICLR","author":"Liu Zhen","year":"2023","unstructured":"Zhen Liu, Yao Feng, Michael\u00a0J. Black, Derek Nowrouzezahrai, Liam Paull, and Weiyang Liu. 2023a. MeshDiffusion: Score-based Generative 3D Mesh Modeling. In ICLR."},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/37401.37422"},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00286"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00040"},{"key":"e_1_3_3_2_35_1","volume-title":"ICML","author":"Nash Charlie","year":"2020","unstructured":"Charlie Nash, Yaroslav Ganin, SM\u00a0Ali Eslami, and Peter Battaglia. 2020. PolyGen: An autoregressive generative model of 3D meshes. In ICML."},{"key":"e_1_3_3_2_36_1","unstructured":"Alex Nichol Heewoo Jun Prafulla Dhariwal Pamela Mishkin and Mark Chen. 2022. Point-E: A system for generating 3D point clouds from complex prompts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.08751 (2022)."},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"crossref","unstructured":"Yutaka Ohtake Alexander Belyaev Marc Alexa Greg Turk and Hans-Peter Seidel. 2003. Multi-level partition of unity implicits. ACM Trans. Graph. (SIGGRAPH) 22 3 (2003).","DOI":"10.1145\/882262.882293"},{"key":"e_1_3_3_2_38_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_2_39_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et\u00a0al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.07193 (2023)."},{"key":"e_1_3_3_2_40_1","volume-title":"NeurIPS","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et\u00a0al. 2022. Training language models to follow instructions with human feedback. In NeurIPS."},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20086-1_35"},{"key":"e_1_3_3_2_42_1","volume-title":"NeurIPS","author":"Paschalidou Despoina","year":"2021","unstructured":"Despoina Paschalidou, Amlan Kar, Maria Shugrina, Karsten Kreis, Andreas Geiger, and Sanja Fidler. 2021. Atiss: Autoregressive transformers for indoor scene synthesis. In NeurIPS."},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_31"},{"key":"e_1_3_3_2_44_1","unstructured":"Xuelin Qian Yu Wang Simian Luo Yinda Zhang Ying Tai Zhenyu Zhang Chengjie Wang Xiangyang Xue Bo Zhao Tiejun Huang Yunsheng Wu and Yanwei Fu. 2024. Pushing Auto-regressive Models for 3D Shape Generation at Capacity and Scalability. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.12225 (2024)."},{"key":"e_1_3_3_2_45_1","volume-title":"ICML","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In ICML."},{"key":"e_1_3_3_2_46_1","volume-title":"CVPR","author":"Ren Xuanchi","year":"2024","unstructured":"Xuanchi Ren, Jiahui Huang, Xiaohui Zeng, Ken Museth, Sanja Fidler, and Francis Williams. 2024. XCube (\\( \\mathcal {X}^3 \\)): Large-Scale 3D Generative Modeling using Sparse Voxel Hierarchies. In CVPR."},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687699"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02001"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02000"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01855"},{"key":"e_1_3_3_2_51_1","unstructured":"Jianlin Su Murtadha Ahmed Yu Lu Shengfeng Pan Wen Bo and Yunfeng Liu. 2024. RoFormer: Enhanced transformer with rotary position embedding. Neurocomputing (2024)."},{"key":"e_1_3_3_2_52_1","unstructured":"Peize Sun Yi Jiang Shoufa Chen Shilong Zhang Bingyue Peng Ping Luo and Zehuan Yuan. 2024. Autoregressive Model Beats Diffusion: Llama for Scalable Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.06525 (2024)."},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20080-9_25"},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093430"},{"key":"e_1_3_3_2_55_1","unstructured":"Jiaxiang Tang Zhaoshuo Li Zekun Hao Xian Liu Gang Zeng Ming-Yu Liu and Qinsheng Zhang. 2024. EdgeRunner: Auto-regressive Auto-encoder for Artistic Mesh Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.18114 (2024)."},{"key":"e_1_3_3_2_56_1","volume-title":"NeurIPS","author":"Tian Keyu","year":"2024","unstructured":"Keyu Tian, Yi Jiang, Zehuan Yuan, Bingyue Peng, and Liwei Wang. 2024. Visual Autoregressive Modeling: Scalable Image Generation via Next-Scale Prediction. In NeurIPS."},{"key":"e_1_3_3_2_57_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_2_58_1","volume-title":"NeurIPS","author":"Oord Aaron van\u00a0den","year":"2017","unstructured":"Aaron van\u00a0den Oord, Oriol Vinyals, and Koray Kavukcuoglu. 2017. Neural Discrete Representation Learning. In NeurIPS."},{"key":"e_1_3_3_2_59_1","volume-title":"NeurIPS","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In NeurIPS."},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"crossref","unstructured":"Peng-Shuai Wang. 2023. OctFormer: Octree-based Transformers for 3D Point Clouds. ACM Trans. Graph. (SIGGRAPH) 42 4 (2023).","DOI":"10.1145\/3592131"},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"crossref","unstructured":"Peng-Shuai Wang Yang Liu Yu-Xiao Guo Chun-Yu Sun and Xin Tong. 2017. O-CNN: Octree-based convolutional neural networks for 3D shape analysis. ACM Trans. Graph. (SIGGRAPH) 36 4 (2017).","DOI":"10.1145\/3072959.3073608"},{"key":"e_1_3_3_2_62_1","doi-asserted-by":"crossref","unstructured":"Peng-Shuai Wang Yang Liu and Xin Tong. 2022. Dual Octree Graph Networks for Learning Adaptive Volumetric Shape Representations. ACM Trans. Graph. (SIGGRAPH) 41 4 (2022).","DOI":"10.1145\/3528223.3530087"},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"crossref","unstructured":"Peng-Shuai Wang Chun-Yu Sun Yang Liu and Xin Tong. 2018. Adaptive O-CNN: A patch-based deep representation of 3D shapes. ACM Trans. Graph. (SIGGRAPH ASIA) 37 6 (2018).","DOI":"10.1145\/3272127.3275050"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00021"},{"key":"e_1_3_3_2_65_1","unstructured":"Xinlong Wang Xiaosong Zhang Zhengxiong Luo Quan Sun Yufeng Cui Jinsheng Wang Fan Zhang Yueze Wang Zhen Li Qiying Yu et\u00a0al. 2024b. Emu3: Next-token prediction is all you need. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.18869 (2024)."},{"key":"e_1_3_3_2_66_1","unstructured":"Zhengyi Wang Jonathan Lorraine Yikai Wang Hang Su Jun Zhu Sanja Fidler and Xiaohui Zeng. 2024a. LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.09595 (2024)."},{"key":"e_1_3_3_2_67_1","volume-title":"NeurIPS","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc\u00a0V Le, Denny Zhou, et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. In NeurIPS."},{"key":"e_1_3_3_2_68_1","unstructured":"Haohan Weng Zibo Zhao Biwen Lei Xianghui Yang Jian Liu Zeqiang Lai Zhuo Chen Yuhong Liu Jie Jiang Chunchao Guo Tong Zhang Shenghua Gao and C.\u00a0L.\u00a0Philip Chen. 2024. Scaling Mesh Generation via Compressive Tokenization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.07025 (2024)."},{"key":"e_1_3_3_2_69_1","volume-title":"NeurIPS","author":"Wu Jiajun","year":"2016","unstructured":"Jiajun Wu, Chengkai Zhang, Tianfan Xue, William\u00a0T. Freeman, and Joshua\u00a0B. Tenenbaum. 2016. Learning a probabilistic latent space of object shapes via 3D generative-adversarial modeling. In NeurIPS."},{"key":"e_1_3_3_2_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00463"},{"key":"e_1_3_3_2_71_1","volume-title":"NeurIPS","author":"Wu Xiaoyang","year":"2022","unstructured":"Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, and Hengshuang Zhao. 2022. Point Transformer V2: Grouped Vector Attention and Partition-based Pooling. In NeurIPS."},{"key":"e_1_3_3_2_72_1","doi-asserted-by":"crossref","unstructured":"Jianfeng Xiang Zelong Lv Sicheng Xu Yu Deng Ruicheng Wang Bowen Zhang Dong Chen Xin Tong and Jiaolong Yang. 2024. Structured 3D Latents for Scalable and Versatile 3D Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.01506 (2024).","DOI":"10.1109\/CVPR52734.2025.02000"},{"key":"e_1_3_3_2_73_1","doi-asserted-by":"crossref","unstructured":"Bojun Xiong Jialun Liu Jiakui Hu Chenming Wu Jinbo Wu Xing Liu Chen Zhao Errui Ding and Zhouhui Lian. 2024a. TexGaussian: Generating High-quality PBR Material via Octree-based 3D Gaussian Splatting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.19654 (2024).","DOI":"10.1109\/CVPR52734.2025.00060"},{"key":"e_1_3_3_2_74_1","doi-asserted-by":"crossref","unstructured":"Bojun Xiong Si-Tong Wei Xin-Yang Zheng Yan-Pei Cao Zhouhui Lian and Peng-Shuai Wang. 2024b. OctFusion: Octree-based Diffusion Models for 3D Shape Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.14732 (2024).","DOI":"10.1111\/cgf.70198"},{"key":"e_1_3_3_2_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00614"},{"key":"e_1_3_3_2_76_1","doi-asserted-by":"crossref","unstructured":"Yu-Qi Yang Yu-Xiao Guo Jian-Yu Xiong Yang Liu Hao Pan Peng-Shuai Wang Xin Tong and Baining Guo. 2024. Swin3D: A pretrained transformer backbone for 3D indoor scene understanding. Comput. Vis. Media (2024).","DOI":"10.26599\/CVM.2025.9450383"},{"key":"e_1_3_3_2_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01871"},{"key":"e_1_3_3_2_78_1","volume-title":"NeurIPS","author":"Zeng Xiaohui","year":"2022","unstructured":"Xiaohui Zeng, Arash Vahdat, Francis Williams, Zan Gojcic, Or Litany, Sanja Fidler, and Karsten Kreis. 2022. LION: Latent Point Diffusion Models for 3D Shape Generation. In NeurIPS."},{"key":"e_1_3_3_2_79_1","unstructured":"Bowen Zhang Yiji Cheng Jiaolong Yang Chunyu Wang Feng Zhao Yansong Tang Dong Chen and Baining Guo. 2024a. GaussianCube: Structuring Gaussian Splatting using Optimal Transport for 3D Generative Modeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.19655 (2024)."},{"key":"e_1_3_3_2_80_1","volume-title":"NeurIPS","author":"Zhang Biao","year":"2022","unstructured":"Biao Zhang, Matthias Nie\u00dfner, and Peter Wonka. 2022. 3DILG: Irregular latent grids for 3D generative modeling. In NeurIPS."},{"key":"e_1_3_3_2_81_1","doi-asserted-by":"crossref","unstructured":"Biao Zhang Jiapeng Tang Matthias Niessner and Peter Wonka. 2023. 3DShape2VecSet: A 3D shape representation for neural fields and generative diffusion models. ACM Trans. Graph. (SIGGRAPH) (2023).","DOI":"10.1145\/3592442"},{"key":"e_1_3_3_2_82_1","unstructured":"Jinzhi Zhang Feng Xiong and Mu Xu. 2024c. 3D representation in 512-Byte: Variational tokenizer is the key for autoregressive 3D generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.02202 (2024)."},{"key":"e_1_3_3_2_83_1","doi-asserted-by":"crossref","unstructured":"Jinzhi Zhang Feng Xiong and Mu Xu. 2024d. G3PT: Unleash the power of autoregressive modeling in 3D generation via cross-scale querying transformer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.06322 (2024).","DOI":"10.24963\/ijcai.2024\/262"},{"key":"e_1_3_3_2_84_1","doi-asserted-by":"crossref","unstructured":"Longwen Zhang Ziyu Wang Qixuan Zhang Qiwei Qiu Anqi Pang Haoran Jiang Wei Yang Lan Xu and Jingyi Yu. 2024b. CLAY: A Controllable Large-scale Generative Model for Creating High-quality 3D Assets. ACM Trans. Graph. (SIGGRAPH) 43 4 (2024).","DOI":"10.1145\/3658146"},{"key":"e_1_3_3_2_85_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"e_1_3_3_2_86_1","unstructured":"Yue Zhao Yuanjun Xiong and Philipp Kr\u00e4henb\u00fchl. 2024. Image and Video Tokenization with Binary Spherical Quantization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.07548 (2024)."},{"key":"e_1_3_3_2_87_1","doi-asserted-by":"crossref","unstructured":"Xin-Yang Zheng Yang Liu Peng-Shuai Wang and Xin Tong. 2022. SDF-StyleGAN: Implicit SDF-Based StyleGAN for 3D Shape Generation. Comput. Graph. Forum (SGP) (2022).","DOI":"10.1111\/cgf.14602"},{"key":"e_1_3_3_2_88_1","doi-asserted-by":"crossref","unstructured":"Xin-Yang Zheng Hao Pan Peng-Shuai Wang Xin Tong Yang Liu and Heung-Yeung Shum. 2023. Locally Attentional SDF Diffusion for Controllable 3D Shape Generation. ACM Trans. Graph. (SIGGRAPH) 42 4 (2023).","DOI":"10.1145\/3592103"},{"key":"e_1_3_3_2_89_1","doi-asserted-by":"crossref","unstructured":"Kun Zhou Minmin Gong Xin Huang and Baining Guo. 2011. Data-parallel octrees for surface reconstruction. IEEE. T. Vis. Comput. Gr. 17 5 (2011).","DOI":"10.1109\/TVCG.2010.75"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730601","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:56:08Z","timestamp":1774018568000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730601"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":88,"alternative-id":["10.1145\/3721238.3730601","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730601","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}