{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T01:01:19Z","timestamp":1778115679603,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62302309,62171248"],"award-info":[{"award-number":["62302309,62171248"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20220818101014030,JCYJ20220818101012025"],"award-info":[{"award-number":["JCYJ20220818101014030,JCYJ20220818101012025"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680920","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"10843-10852","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Large Point-to-Gaussian Model for Image-to-3D Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8553-6856","authenticated-orcid":false,"given":"Longfei","family":"Lu","sequence":"first","affiliation":[{"name":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5981-8566","authenticated-orcid":false,"given":"Huachen","family":"Gao","sequence":"additional","affiliation":[{"name":"Tencent, Hunyuan, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0594-6404","authenticated-orcid":false,"given":"Tao","family":"Dai","sequence":"additional","affiliation":[{"name":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9789-452X","authenticated-orcid":false,"given":"Yaohua","family":"Zha","sequence":"additional","affiliation":[{"name":"Tsinghua Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2990-505X","authenticated-orcid":false,"given":"Zhi","family":"Hou","sequence":"additional","affiliation":[{"name":"Tencent, Hunyuan, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3928-4606","authenticated-orcid":false,"given":"Junta","family":"Wu","sequence":"additional","affiliation":[{"name":"Tencent, Hunyuan, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8639-982X","authenticated-orcid":false,"given":"Shu-Tao","family":"Xia","sequence":"additional","affiliation":[{"name":"Tsinghua Shenzhen International Graduate School, Tsinghua University &amp; Peng Cheng Laboratory, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"pixelsplat: 3d gaussian splats from image pairs for scalable generalizable 3d reconstruction. arXiv preprint arXiv:2312.12337","author":"Charatan David","year":"2023","unstructured":"David Charatan, Sizhe Li, Andrea Tagliasacchi, and Vincent Sitzmann. 2023. pixelsplat: 3d gaussian splats from image pairs for scalable generalizable 3d reconstruction. arXiv preprint arXiv:2312.12337 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00229"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"e_1_3_2_1_4_1","volume-title":"MVSplat: Efficient 3D Gaussian Splatting from Sparse Multi-View Images. arXiv preprint arXiv:2403.14627","author":"Chen Yuedong","year":"2024","unstructured":"Yuedong Chen, Haofei Xu, Chuanxia Zheng, Bohan Zhuang, Marc Pollefeys, Andreas Geiger, Tat-Jen Cham, and Jianfei Cai. 2024. MVSplat: Efficient 3D Gaussian Splatting from Sparse Multi-View Images. arXiv preprint arXiv:2403.14627 (2024)."},{"key":"e_1_3_2_1_5_1","volume-title":"Text-to-3d using gaussian splatting. arXiv preprint arXiv:2309.16585","author":"Chen Zilong","year":"2023","unstructured":"Zilong Chen, Feng Wang, and Huaping Liu. 2023. Text-to-3d using gaussian splatting. arXiv preprint arXiv:2309.16585 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"2023 d. Text-to-3d using gaussian splatting. arXiv preprint arXiv:2309.16585","author":"Chen Zilong","year":"2023","unstructured":"Zilong Chen, Feng Wang, and Huaping Liu. 2023 d. Text-to-3d using gaussian splatting. arXiv preprint arXiv:2309.16585 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"V3D: Video Diffusion Models are Effective 3D Generators. arXiv preprint arXiv:2403.06738","author":"Chen Zilong","year":"2024","unstructured":"Zilong Chen, Yikai Wang, Feng Wang, Zhengyi Wang, and Huaping Liu. 2024. V3D: Video Diffusion Models are Effective 3D Generators. arXiv preprint arXiv:2403.06738 (2024)."},{"key":"e_1_3_2_1_8_1","volume-title":"Samir Yitzhak Gadre, et al","author":"Deitke Matt","year":"2024","unstructured":"Matt Deitke, Ruoshi Liu, Matthew Wallingford, Huong Ngo, Oscar Michel, Aditya Kusupati, Alan Fan, Christian Laforte, Vikram Voleti, Samir Yitzhak Gadre, et al. 2024. Objaverse-xl: A universe of 10m 3d objects. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"e_1_3_2_1_10_1","volume-title":"Google Scanned Objects: A High-Quality Dataset of 3D Scanned Household Items. (Apr","author":"Downs Laura","year":"2022","unstructured":"Laura Downs, Anthony Francis, Nate Koenig, Brandon Kinman, Ryan Hickman, Krista Reymann, ThomasB. McHugh, and Vincent Vanhoucke. 2022. Google Scanned Objects: A High-Quality Dataset of 3D Scanned Household Items. (Apr 2022)."},{"key":"e_1_3_2_1_11_1","volume-title":"FDC-NeRF: Learning Pose-Free Neural Radiance Fields with Flow-Depth Consistency. In ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 3615--3619","author":"Gao Huachen","year":"2024","unstructured":"Huachen Gao, Shihe Shen, Zhe Zhang, Kaiqiang Xiong, Rui Peng, Zhirui Gao, Qi Wang, Yugui Xie, and Ronggang Wang. 2024. FDC-NeRF: Learning Pose-Free Neural Radiance Fields with Flow-Depth Consistency. In ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 3615--3619."},{"key":"e_1_3_2_1_12_1","volume-title":"Lrm: Large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400","author":"Hong Yicong","year":"2023","unstructured":"Yicong Hong, Kai Zhang, Jiuxiang Gu, Sai Bi, Yang Zhou, Difan Liu, Feng Liu, Kalyan Sunkavalli, Trung Bui, and Hao Tan. 2023. Lrm: Large reconstruction model for single image to 3d. arXiv preprint arXiv:2311.04400 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Shap-e: Generating conditional 3d implicit functions. arXiv preprint arXiv:2305.02463","author":"Jun Heewoo","year":"2023","unstructured":"Heewoo Jun and Alex Nichol. 2023. Shap-e: Generating conditional 3d implicit functions. arXiv preprint arXiv:2305.02463 (2023)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"e_1_3_2_1_15_1","volume-title":"Instant3d: Fast text-to-3d with sparse-view generation and large reconstruction model. arXiv preprint arXiv:2311.06214","author":"Li Jiahao","year":"2023","unstructured":"Jiahao Li, Hao Tan, Kai Zhang, Zexiang Xu, Fujun Luan, Yinghao Xu, Yicong Hong, Kalyan Sunkavalli, Greg Shakhnarovich, and Sai Bi. 2023. Instant3d: Fast text-to-3d with sparse-view generation and large reconstruction model. arXiv preprint arXiv:2311.06214 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Sweetdreamer: Aligning geometric priors in 2d diffusion for consistent text-to-3d. arXiv preprint arXiv:2310.02596","author":"Li Weiyu","year":"2023","unstructured":"Weiyu Li, Rui Chen, Xuelin Chen, and Ping Tan. 2023. Sweetdreamer: Aligning geometric priors in 2d diffusion for consistent text-to-3d. arXiv preprint arXiv:2310.02596 (2023)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"e_1_3_2_1_18_1","volume-title":"Antonio Torralba, Sanja Fidler, and Karsten Kreis.","author":"Ling Huan","year":"2023","unstructured":"Huan Ling, Seung Wook Kim, Antonio Torralba, Sanja Fidler, and Karsten Kreis. 2023. Align your gaussians: Text-to-4d with dynamic 3d gaussians and composed diffusion models. arXiv preprint arXiv:2312.13763 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"One-2--3--45: Fast single image to 3d objects with consistent multi-view generation and 3d diffusion. arXiv preprint arXiv:2311.07885","author":"Liu Minghua","year":"2023","unstructured":"Minghua Liu, Ruoxi Shi, Linghao Chen, Zhuoyang Zhang, Chao Xu, Xinyue Wei, Hansheng Chen, Chong Zeng, Jiayuan Gu, and Hao Su. 2023. One-2--3--45: Fast single image to 3d objects with consistent multi-view generation and 3d diffusion. arXiv preprint arXiv:2311.07885 (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Liu Minghua","year":"2024","unstructured":"Minghua Liu, Chao Xu, Haian Jin, Linghao Chen, Mukund Varma T, Zexiang Xu, and Hao Su. 2024. One-2--3--45: Any single image to 3d mesh in 45 seconds without per-shape optimization. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"e_1_3_2_1_22_1","volume-title":"UniDream: Unifying Diffusion Priors for Relightable Text-to-3D Generation. arXiv preprint arXiv:2312.08754","author":"Liu Zexiang","year":"2023","unstructured":"Zexiang Liu, Yangguang Li, Youtian Lin, Xin Yu, Sida Peng, Yan-Pei Cao, Xiaojuan Qi, Xiaoshui Huang, Ding Liang, and Wanli Ouyang. 2023. UniDream: Unifying Diffusion Priors for Relightable Text-to-3D Generation. arXiv preprint arXiv:2312.08754 (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"Point-voxel cnn for efficient 3d deep learning. Advances in neural information processing systems","author":"Liu Zhijian","year":"2019","unstructured":"Zhijian Liu, Haotian Tang, Yujun Lin, and Song Han. 2019. Point-voxel cnn for efficient 3d deep learning. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Xiaoxiao Long Yuan-Chen Guo Cheng Lin Yuan Liu Zhiyang Dou Lingjie Liu Yuexin Ma Song-Hai Zhang Marc Habermann Christian Theobalt et al. 2023. Wonder3d: Single image to 3d using cross-domain diffusion. arXiv preprint arXiv:2310.15008 (2023).","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_13"},{"key":"e_1_3_2_1_26_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Luo Tiange","year":"2024","unstructured":"Tiange Luo, Chris Rockwell, Honglak Lee, and Justin Johnson. 2024. Scalable 3d captioning with pretrained models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_27_1","volume-title":"Geodream: Disentangling 2d and geometric priors for high-fidelity and consistent 3d generation. arXiv preprint arXiv:2311.17971","author":"Ma Baorui","year":"2023","unstructured":"Baorui Ma, Haoge Deng, Junsheng Zhou, Yu-Shen Liu, Tiejun Huang, and Xinlong Wang. 2023. Geodream: Disentangling 2d and geometric priors for high-fidelity and consistent 3d generation. arXiv preprint arXiv:2311.17971 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Machine Learning. PMLR, 7246--7257","author":"Ma Baorui","year":"2021","unstructured":"Baorui Ma, Zhizhong Han, Yu-Shen Liu, and Matthias Zwicker. 2021. Neural-Pull: Learning Signed Distance Function from Point clouds by Learning to Pull Space onto Surface. In International Conference on Machine Learning. PMLR, 7246--7257."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00621"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00622"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01242"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"e_1_3_2_1_34_1","volume-title":"Point-e: A system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:2212.08751","author":"Nichol Alex","year":"2022","unstructured":"Alex Nichol, Heewoo Jun, Prafulla Dhariwal, Pamela Mishkin, and Mark Chen. 2022. Point-e: A system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:2212.08751 (2022)."},{"key":"e_1_3_2_1_35_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_1_36_1","first-page":"56932","article-title":"Gens: Generalizable neural surface reconstruction from multi-view images","volume":"36","author":"Peng Rui","year":"2023","unstructured":"Rui Peng, Xiaodong Gu, Luyang Tang, Shihe Shen, Fanqi Yu, and Ronggang Wang. 2023. Gens: Generalizable neural surface reconstruction from multi-view images. Advances in Neural Information Processing Systems, Vol. 36 (2023), 56932--56945.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00845"},{"key":"e_1_3_2_1_38_1","volume-title":"Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988","author":"Poole Ben","year":"2022","unstructured":"Ben Poole, Ajay Jain, Jonathan T Barron, and Ben Mildenhall. 2022. Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_40_1","volume-title":"Let 2d diffusion model know 3d-consistency for robust text-to-3d generation. arXiv preprint arXiv:2303.07937","author":"Seo Junyoung","year":"2023","unstructured":"Junyoung Seo, Wooseok Jang, Min-Seop Kwak, Hyeonsu Kim, Jaehoon Ko, Junho Kim, Jin-Hwa Kim, Jiyoung Lee, and Seungryong Kim. 2023. Let 2d diffusion model know 3d-consistency for robust text-to-3d generation. arXiv preprint arXiv:2303.07937 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"Zero123: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110","author":"Shi Ruoxi","year":"2023","unstructured":"Ruoxi Shi, Hansheng Chen, Zhuoyang Zhang, Minghua Liu, Chao Xu, Xinyue Wei, Linghao Chen, Chong Zeng, and Hao Su. 2023. Zero123: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110 (2023)."},{"key":"e_1_3_2_1_42_1","volume-title":"Mvdream: Multi-view diffusion for 3d generation. arXiv preprint arXiv:2308.16512","author":"Shi Yichun","year":"2023","unstructured":"Yichun Shi, Peng Wang, Jianglong Ye, Mai Long, Kejie Li, and Xiao Yang. 2023. Mvdream: Multi-view diffusion for 3d generation. arXiv preprint arXiv:2308.16512 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"Splatter image: Ultra-fast single-view 3d reconstruction. arXiv preprint arXiv:2312.13150","author":"Szymanowicz Stanislaw","year":"2023","unstructured":"Stanislaw Szymanowicz, Christian Rupprecht, and Andrea Vedaldi. 2023. Splatter image: Ultra-fast single-view 3d reconstruction. arXiv preprint arXiv:2312.13150 (2023)."},{"key":"e_1_3_2_1_44_1","volume-title":"LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation. arXiv preprint arXiv:2402.05054","author":"Tang Jiaxiang","year":"2024","unstructured":"Jiaxiang Tang, Zhaoxi Chen, Xiaokang Chen, Tengfei Wang, Gang Zeng, and Ziwei Liu. 2024. LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation. arXiv preprint arXiv:2402.05054 (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"Dreamgaussian: Generative gaussian splatting for efficient 3d content creation. arXiv preprint arXiv:2309.16653","author":"Tang Jiaxiang","year":"2023","unstructured":"Jiaxiang Tang, Jiawei Ren, Hang Zhou, Ziwei Liu, and Gang Zeng. 2023. Dreamgaussian: Generative gaussian splatting for efficient 3d content creation. arXiv preprint arXiv:2309.16653 (2023)."},{"key":"e_1_3_2_1_46_1","volume-title":"Sv3d: Novel multi-view synthesis and 3d generation from a single image using latent video diffusion. arXiv preprint arXiv:2403.12008","author":"Voleti Vikram","year":"2024","unstructured":"Vikram Voleti, Chun-Han Yao, Mark Boss, Adam Letts, David Pankratz, Dmitry Tochilkin, Christian Laforte, Robin Rombach, and Varun Jampani. 2024. Sv3d: Novel multi-view synthesis and 3d generation from a single image using latent video diffusion. arXiv preprint arXiv:2403.12008 (2024)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"e_1_3_2_1_48_1","volume-title":"Imagedream: Image-prompt multi-view diffusion for 3d generation. arXiv preprint arXiv:2312.02201","author":"Wang Peng","year":"2023","unstructured":"Peng Wang and Yichun Shi. 2023. Imagedream: Image-prompt multi-view diffusion for 3d generation. arXiv preprint arXiv:2312.02201 (2023)."},{"key":"e_1_3_2_1_49_1","volume-title":"Pf-lrm: Pose-free large reconstruction model for joint pose and shape prediction. arXiv preprint arXiv:2311.12024","author":"Wang Peng","year":"2023","unstructured":"Peng Wang, Hao Tan, Sai Bi, Yinghao Xu, Fujun Luan, Kalyan Sunkavalli, Wenping Wang, Zexiang Xu, and Kai Zhang. 2023. Pf-lrm: Pose-free large reconstruction model for joint pose and shape prediction. arXiv preprint arXiv:2311.12024 (2023)."},{"key":"e_1_3_2_1_50_1","volume-title":"DUSt3R: Geometric 3D Vision Made Easy. arXiv preprint arXiv:2312.14132","author":"Wang Shuzhe","year":"2023","unstructured":"Shuzhe Wang, Vincent Leroy, Yohann Cabon, Boris Chidlovskii, and Jerome Revaud. 2023. DUSt3R: Geometric 3D Vision Made Easy. arXiv preprint arXiv:2312.14132 (2023)."},{"key":"e_1_3_2_1_51_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Wang Zhengyi","year":"2024","unstructured":"Zhengyi Wang, Cheng Lu, Yikai Wang, Fan Bao, Chongxuan Li, Hang Su, and Jun Zhu. 2024. Prolificdreamer: High-fidelity and diverse text-to-3d generation with variational score distillation. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00545"},{"key":"e_1_3_2_1_53_1","volume-title":"Agg: Amortized generative 3d gaussians for single image to 3d. arXiv preprint arXiv:2401.04099","author":"Xu Dejia","year":"2024","unstructured":"Dejia Xu, Ye Yuan, Morteza Mardani, Sifei Liu, Jiaming Song, Zhangyang Wang, and Arash Vahdat. 2024. Agg: Amortized generative 3d gaussians for single image to 3d. arXiv preprint arXiv:2401.04099 (2024)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02003"},{"key":"e_1_3_2_1_55_1","unstructured":"Yinghao Xu Hao Tan Fujun Luan Sai Bi Peng Wang Jiahao Li Zifan Shi Kalyan Sunkavalli Gordon Wetzstein Zexiang Xu et al. 2023. Dmv3d: Denoising multi-view diffusion using 3d large reconstruction model. arXiv preprint arXiv:2311.09217 (2023)."},{"key":"e_1_3_2_1_56_1","volume-title":"Gaussiandreamer: Fast generation from text to 3d gaussian splatting with point cloud priors. arXiv preprint arXiv:2310.08529","author":"Yi Taoran","year":"2023","unstructured":"Taoran Yi, Jiemin Fang, Guanjun Wu, Lingxi Xie, Xiaopeng Zhang, Wenyu Liu, Qi Tian, and Xinggang Wang. 2023. Gaussiandreamer: Fast generation from text to 3d gaussian splatting with point cloud priors. arXiv preprint arXiv:2310.08529 (2023)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28522"},{"key":"e_1_3_2_1_58_1","volume-title":"LCM: Locally Constrained Compact Point Cloud Model for Masked Point Modeling. arXiv preprint arXiv:2405.17149","author":"Zha Yaohua","year":"2024","unstructured":"Yaohua Zha, Naiqi Li, Yanzi Wang, Tao Dai, Hang Guo, Bin Chen, Zhi Wang, Zhihao Ouyang, and Shu-Tao Xia. 2024. LCM: Locally Constrained Compact Point Cloud Model for Masked Point Modeling. arXiv preprint arXiv:2405.17149 (2024)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01302"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_61_1","volume-title":"Gps-gaussian: Generalizable pixel-wise 3d gaussian splatting for real-time human novel view synthesis. arXiv preprint arXiv:2312.02155","author":"Zheng Shunyuan","year":"2023","unstructured":"Shunyuan Zheng, Boyao Zhou, Ruizhi Shao, Boning Liu, Shengping Zhang, Liqiang Nie, and Yebin Liu. 2023. Gps-gaussian: Generalizable pixel-wise 3d gaussian splatting for real-time human novel view synthesis. arXiv preprint arXiv:2312.02155 (2023)."},{"key":"e_1_3_2_1_62_1","volume-title":"International Conference on Learning Representations","author":"Zhou Junsheng","year":"2024","unstructured":"Junsheng Zhou, Jinsheng Wang, Baorui Ma, Yu-Shen Liu, Tiejun Huang, and Xinlong Wang. 2024. Uni3D: Exploring Unified 3D Representation at Scale. International Conference on Learning Representations (2024)."},{"key":"e_1_3_2_1_63_1","volume-title":"Triplane meets gaussian splatting: Fast and generalizable single-view 3d reconstruction with transformers. arXiv preprint arXiv:2312.09147","author":"Zou Zi-Xin","year":"2023","unstructured":"Zi-Xin Zou, Zhipeng Yu, Yuan-Chen Guo, Yangguang Li, Ding Liang, Yan-Pei Cao, and Song-Hai Zhang. 2023. Triplane meets gaussian splatting: Fast and generalizable single-view 3d reconstruction with transformers. arXiv preprint arXiv:2312.09147 (2023)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680920","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680920","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:34Z","timestamp":1750295854000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680920"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":63,"alternative-id":["10.1145\/3664647.3680920","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680920","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}