{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T20:32:14Z","timestamp":1777149134367,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730732","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["PrimitiveAnything: Human-Crafted 3D Primitive Assembly Generation with Auto-Regressive transformer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7596-2698","authenticated-orcid":false,"given":"Jingwen","family":"Ye","sequence":"first","affiliation":[{"name":"Tencent AIPD, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1575-6112","authenticated-orcid":false,"given":"Yuze","family":"He","sequence":"additional","affiliation":[{"name":"Tencent AIPD, Shenzhen, China and Tsinghua University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6104-817X","authenticated-orcid":false,"given":"Yanning","family":"Zhou","sequence":"additional","affiliation":[{"name":"Tencent AIPD, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7188-7353","authenticated-orcid":false,"given":"Yiqin","family":"Zhu","sequence":"additional","affiliation":[{"name":"Tencent AIPD, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7684-7202","authenticated-orcid":false,"given":"Kaiwen","family":"Xiao","sequence":"additional","affiliation":[{"name":"Tencent AIPD, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5774-1916","authenticated-orcid":false,"given":"Yong-Jin","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6488-2546","authenticated-orcid":false,"given":"Wei","family":"Yang","sequence":"additional","affiliation":[{"name":"Tencent AIPD, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5151-6547","authenticated-orcid":false,"given":"Xiao","family":"Han","sequence":"additional","affiliation":[{"name":"Tencent AIPD, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_2_3_1","unstructured":"Raphael Bensadoun Tom Monnier Yanir Kleiman Filippos Kokkinos Yawar Siddiqui Mahendra Kariya Omri Harosh Roman Shapovalov Benjamin Graham Emilien Garreau et\u00a0al. 2024. Meta 3d gen. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.02599 (2024)."},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Irving Biederman. 1985. Human image understanding: Recent research and a theory. Computer Vision Graphics and Image Processing 32 1 (Oct 1985) 29\u201373. https:\/\/doi.org\/10.1016\/0734-189x(85)90002-7","DOI":"10.1016\/0734-189X(85)90002-7"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Irving Biederman. 2005. Recognition-by-components: A theory of human image understanding. Psychological Review (Sep 2005) 115\u2013147. https:\/\/doi.org\/10.1037\/0033-295x.94.2.115","DOI":"10.1037\/0033-295X.94.2.115"},{"key":"e_1_3_3_2_6_1","volume-title":"Proc. IEEE Conf. on Systems and Control, 1975","author":"Binford Thomas","year":"1975","unstructured":"Thomas Binford. 1975. Visual perception by computer. In Proc. IEEE Conf. on Systems and Control, 1975."},{"key":"e_1_3_3_2_7_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_2_8_1","volume-title":"ShapeNet: An Information-Rich 3D Model Repository","author":"Chang Angel\u00a0X.","year":"2015","unstructured":"Angel\u00a0X. Chang, Thomas Funkhouser, Leonidas Guibas, Pat Hanrahan, Qixing Huang, Zimo Li, Silvio Savarese, Manolis Savva, Shuran Song, Hao Su, Jianxiong Xiao, Li Yi, and Fisher Yu. 2015. ShapeNet: An Information-Rich 3D Model Repository. Technical Report arXiv:https:\/\/arXiv.org\/abs\/1512.03012 [cs.GR]. Stanford University \u2014 Princeton University \u2014 Toyota Technological Institute at Chicago."},{"key":"e_1_3_3_2_9_1","unstructured":"Yiwen Chen Tong He Di Huang Weicai Ye Sijin Chen Jiaxiang Tang Xin Chen Zhongang Cai Lei Yang Gang Yu et\u00a0al. 2024a. MeshAnything: Artist-Created Mesh Generation with Autoregressive Transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.10163 (2024)."},{"key":"e_1_3_3_2_10_1","unstructured":"Yiwen Chen Yikai Wang Yihao Luo Zhengyi Wang Zilong Chen Jun Zhu Chi Zhang and Guosheng Lin. 2024b. Meshanything v2: Artist-created mesh generation with adjacent mesh tokenization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.02555 (2024)."},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00012"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00858"},{"key":"e_1_3_3_2_13_1","unstructured":"Laurent Chevalier Fabrice Jaillet and Atilla Baskurt. 2003. Segmentation and superquadric modeling of 3D objects. (2003)."},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00011"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657425"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.264"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00048"},{"key":"e_1_3_3_2_20_1","unstructured":"Zhirui Gao Renjiao Yi Yuhang Huang Wei Chen Chenyang Zhu and Kai Xu. 2024. Learning Part-aware 3D Representations by Fusing 2D Gaussians and Superquadrics. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.10789 (2024)."},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00725"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657444"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_35"},{"key":"e_1_3_3_2_24_1","unstructured":"Yuze He Wang Zhao Shaohui Liu Yubin Hu Yushi Bai Yu-Hui Wen and Yong-Jin Liu. 2024. AlphaTablets: A Generic Plane Representation for 3D Planar Reconstruction from Monocular Videos. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.19950 (2024)."},{"key":"e_1_3_3_2_25_1","volume-title":"The Twelfth International Conference on Learning Representations","author":"Hong Yicong","year":"2024","unstructured":"Yicong Hong, Kai Zhang, Jiuxiang Gu, Sai Bi, Yang Zhou, Difan Liu, Feng Liu, Kalyan Sunkavalli, Trung Bui, and Hao Tan. 2024. LRM: Large Reconstruction Model for Single Image to 3D. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=sllU8vvsFF"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657458"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00337"},{"key":"e_1_3_3_2_28_1","volume-title":"Forty-first International Conference on Machine Learning","author":"Hui Ka-Hei","year":"2024","unstructured":"Ka-Hei Hui, Aditya Sanghi, Arianna Rampini, Kamal\u00a0Rahimi Malekshan, Zhengzhe Liu, Hooman Shayani, and Chi-Wing Fu. 2024. Make-a-shape: a ten-million-scale 3d shape model. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_3_2_29_1","volume-title":"International Conference on Learning Representations","author":"Jang Eric","year":"2017","unstructured":"Eric Jang, Shixiang Gu, and Ben Poole. 2017. Categorical Reparameterization with Gumbel-Softmax. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=rkE3y85ee"},{"key":"e_1_3_3_2_30_1","unstructured":"Heewoo Jun and Alex Nichol. 2023. Shap-e: Generating conditional 3d implicit functions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.02463 (2023)."},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00736"},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"crossref","unstructured":"Ales Leonardis Ales Jaklic and Franc Solina. 1997. Superquadrics for segmenting and modeling range data. IEEE Transactions on Pattern Analysis and Machine Intelligence 19 11 (1997) 1289\u20131295.","DOI":"10.1109\/34.632988"},{"key":"e_1_3_3_2_33_1","volume-title":"The Twelfth International Conference on Learning Representations","author":"Li Jiahao","year":"2024","unstructured":"Jiahao Li, Hao Tan, Kai Zhang, Zexiang Xu, Fujun Luan, Yinghao Xu, Yicong Hong, Kalyan Sunkavalli, Greg Shakhnarovich, and Sai Bi. 2024c. Instant3D: Fast Text-to-3D with Sparse-view Generation and Large Reconstruction Model. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=2lDQLiH1W4"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"crossref","unstructured":"Jun Li Kai Xu Siddhartha Chaudhuri Ersin Yumer Hao Zhang and Leonidas Guibas. 2017. Grass: Generative recursive autoencoders for shape structures. ACM Transactions on Graphics (TOG) 36 4 (2017) 1\u201314.","DOI":"10.1145\/3072959.3073637"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00276"},{"key":"e_1_3_3_2_36_1","unstructured":"Songlin Li Despoina Paschalidou and Leonidas Guibas. 2024b. PASTA: Controllable Part-Aware Shape Generation with Autoregressive Transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.13677 (2024)."},{"key":"e_1_3_3_2_37_1","unstructured":"Weiyu Li Jiarui Liu Hongyu Yan Rui Chen Yixun Liang Xuelin Chen Ping Tan and Xiaoxiao Long. 2024a. CraftsMan3D: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.14979 (2024)."},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591522"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00270"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00847"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"crossref","unstructured":"Kaichun Mo Paul Guerrero Li Yi Hao Su Peter Wonka Niloy Mitra and Leonidas Guibas. 2019. StructureNet: Hierarchical Graph Networks for 3D Shape Generation. ACM Transactions on Graphics (TOG) Siggraph Asia 2019 38 6 (2019) Article 242.","DOI":"10.1145\/3355089.3356527"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"crossref","unstructured":"Tom Monnier Jake Austin Angjoo Kanazawa Alexei Efros and Mathieu Aubry. 2023. Differentiable blocks world: Qualitative 3d decomposition by rendering primitives. Advances in Neural Information Processing Systems 36 (2023) 5791\u20135807.","DOI":"10.52202\/075280-0254"},{"key":"e_1_3_3_2_44_1","unstructured":"Alex Nichol Heewoo Jun Prafulla Dhariwal Pamela Mishkin and Mark Chen. 2022. Point-e: A system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.08751 (2022)."},{"key":"e_1_3_3_2_45_1","unstructured":"Despoina Paschalidou Amlan Kar Maria Shugrina Karsten Kreis Andreas Geiger and Sanja Fidler. 2021. Atiss: Autoregressive transformers for indoor scene synthesis. Advances in Neural Information Processing Systems 34 (2021) 12013\u201312026."},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01059"},{"key":"e_1_3_3_2_47_1","unstructured":"William Peebles and Saining Xie. 2022. Scalable Diffusion Models with Transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.09748 (2022)."},{"key":"e_1_3_3_2_48_1","first-page":"695","volume-title":"AAAI","author":"Pentland Alex","year":"1986","unstructured":"Alex Pentland. 1986. Parts: Structured Descriptions of Shape.. In AAAI. 695\u2013701."},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657415"},{"key":"e_1_3_3_2_50_1","unstructured":"Dustin Podell Zion English Kyle Lacey Andreas Blattmann Tim Dockhorn Jonas M\u00fcller Joe Penna and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.01952 (2023)."},{"key":"e_1_3_3_2_51_1","volume-title":"The Eleventh International Conference on Learning Representations","author":"Poole Ben","year":"2023","unstructured":"Ben Poole, Ajay Jain, Jonathan\u00a0T. Barron, and Ben Mildenhall. 2023. DreamFusion: Text-to-3D using 2D Diffusion. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=FjNys5c7VyY"},{"key":"e_1_3_3_2_52_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et\u00a0al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_3_2_53_1","first-page":"8821","volume-title":"International conference on machine learning","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-shot text-to-image generation. In International conference on machine learning. Pmlr, 8821\u20138831."},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00634"},{"key":"e_1_3_3_2_55_1","unstructured":"Lawrence\u00a0G Roberts. 1963. Machine perception of three-dimensional solids. Ph.\u00a0D. Dissertation. Massachusetts Institute of Technology."},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01855"},{"key":"e_1_3_3_2_57_1","unstructured":"Jiaxiang Tang Zhaoshuo Li Zekun Hao Xian Liu Gang Zeng Ming-Yu Liu and Qinsheng Zhang. 2024. Edgerunner: Auto-regressive auto-encoder for artistic mesh generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.18114 (2024)."},{"key":"e_1_3_3_2_58_1","unstructured":"Keyu Tian Yi Jiang Zehuan Yuan Bingyue Peng and Liwei Wang. 2024. Visual autoregressive modeling: Scalable image generation via next-scale prediction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.02905 (2024)."},{"key":"e_1_3_3_2_59_1","unstructured":"Dmitry Tochilkin David Pankratz Zexiang Liu Zixuan Huang Adam Letts Yangguang Li Ding Liang Christian Laforte Varun Jampani and Yan-Pei Cao. 2024. TripoSR: Fast 3D Object Reconstruction from a Single Image. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.02151 (2024)."},{"key":"e_1_3_3_2_60_1","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.160"},{"key":"e_1_3_3_2_62_1","doi-asserted-by":"crossref","unstructured":"Arash Vahdat Francis Williams Zan Gojcic Or Litany Sanja Fidler Karsten Kreis et\u00a0al. 2022. Lion: Latent point diffusion models for 3d shape generation. Advances in Neural Information Processing Systems 35 (2022) 10021\u201310039.","DOI":"10.52202\/068431-0728"},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00021"},{"key":"e_1_3_3_2_65_1","unstructured":"Zhengyi Wang Jonathan Lorraine Yikai Wang Hang Su Jun Zhu Sanja Fidler and Xiaohui Zeng. 2024a. LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models. arxiv:https:\/\/arXiv.org\/abs\/2411.09595\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2411.09595"},{"key":"e_1_3_3_2_66_1","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems","author":"Wang Zhengyi","year":"2023","unstructured":"Zhengyi Wang, Cheng Lu, Yikai Wang, Fan Bao, Chongxuan Li, Hang Su, and Jun Zhu. 2023b. ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation. In Thirty-seventh Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=ppJuFSOAnM"},{"key":"e_1_3_3_2_67_1","doi-asserted-by":"crossref","unstructured":"Zhengyi Wang Yikai Wang Yifei Chen Chendong Xiang Shuo Chen Dajiang Yu Chongxuan Li Hang Su and Jun Zhu. 2024b. CRM: Single Image to 3D Textured Mesh with Convolutional Reconstruction Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.05034 (2024).","DOI":"10.1007\/978-3-031-72751-1_4"},{"key":"e_1_3_3_2_68_1","doi-asserted-by":"crossref","unstructured":"Jianfeng Xiang Zelong Lv Sicheng Xu Yu Deng Ruicheng Wang Bowen Zhang Dong Chen Xin Tong and Jiaolong Yang. 2024. Structured 3D Latents for Scalable and Versatile 3D Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.01506 (2024).","DOI":"10.1109\/CVPR52734.2025.02000"},{"key":"e_1_3_3_2_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00612"},{"key":"e_1_3_3_2_70_1","unstructured":"Jiale Xu Weihao Cheng Yiming Gao Xintao Wang Shenghua Gao and Ying Shan. 2024. InstantMesh: Efficient 3D Mesh Generation from a Single Image with Sparse-view Large Reconstruction Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.07191 (2024)."},{"key":"e_1_3_3_2_71_1","unstructured":"Xianghui Yang Huiwen Shi Bowen Zhang Fan Yang Jiacheng Wang Hongxu Zhao Xinhai Liu Xinzhou Wang Qingxiang Lin Jiaao Yu Lifu Wang Zhuo Chen Sicong Liu Yuhong Liu Yong Yang Di Wang Jie Jiang and Chunchao Guo. 2024. Tencent Hunyuan3D-1.0: A Unified Framework for Text-to-3D and Image-to-3D Generation. arxiv:https:\/\/arXiv.org\/abs\/2411.02293\u00a0[cs.CV]"},{"key":"e_1_3_3_2_72_1","doi-asserted-by":"crossref","unstructured":"Xin Yu Ze Yuan Yuan-Chen Guo Ying-Tian Liu Jianhui Liu Yangguang Li Yan-Pei Cao Ding Liang and Xiaojuan Qi. 2024. TEXGen: a Generative Diffusion Model for Mesh Textures. ACM Trans. Graph. 43 6 Article 213 (2024) 14\u00a0pages. https:\/\/doi.org\/10.1145\/3687909","DOI":"10.1145\/3687909"},{"key":"e_1_3_3_2_73_1","doi-asserted-by":"crossref","unstructured":"Biao Zhang Jiapeng Tang Matthias Niessner and Peter Wonka. 2023. 3dshape2vecset: A 3d shape representation for neural fields and generative diffusion models. ACM Transactions on Graphics (TOG) 42 4 (2023) 1\u201316.","DOI":"10.1145\/3592442"},{"key":"e_1_3_3_2_74_1","unstructured":"Jinzhi Zhang Feng Xiong and Mu Xu. 2024d. 3D representation in 512-Byte: Variational tokenizer is the key for autoregressive 3D generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.02202 (2024)."},{"key":"e_1_3_3_2_75_1","doi-asserted-by":"crossref","unstructured":"Kai Zhang Sai Bi Hao Tan Yuanbo Xiangli Nanxuan Zhao Kalyan Sunkavalli and Zexiang Xu. 2024a. GS-LRM: Large Reconstruction Model for 3D Gaussian Splatting. European Conference on Computer Vision (2024).","DOI":"10.1007\/978-3-031-72670-5_1"},{"key":"e_1_3_3_2_76_1","doi-asserted-by":"crossref","unstructured":"Longwen Zhang Ziyu Wang Qixuan Zhang Qiwei Qiu Anqi Pang Haoran Jiang Wei Yang Lan Xu and Jingyi Yu. 2024c. CLAY: A Controllable Large-scale Generative Model for Creating High-quality 3D Assets. ACM Transactions on Graphics (TOG) 43 4 (2024) 1\u201320.","DOI":"10.1145\/3658146"},{"key":"e_1_3_3_2_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657504"},{"key":"e_1_3_3_2_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00132"},{"key":"e_1_3_3_2_79_1","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems","author":"Zhao Zibo","year":"2023","unstructured":"Zibo Zhao, Wen Liu, Xin Chen, Xianfang Zeng, Rui Wang, Pei Cheng, BIN FU, Tao Chen, Gang YU, and Shenghua Gao. 2023. Michelangelo: Conditional 3D Shape Generation based on Shape-Image-Text Aligned Latent Representation. In Thirty-seventh Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=xmxgMij3LY"},{"key":"e_1_3_3_2_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.103"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730732","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:58:15Z","timestamp":1774018695000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730732"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":79,"alternative-id":["10.1145\/3721238.3730732","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730732","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}