{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:57:56Z","timestamp":1776931076962,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,15]]},"DOI":"10.1145\/3757377.3763876","type":"proceedings-article","created":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T16:30:41Z","timestamp":1765211441000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Generating 360\u00b0 Video is What You Need For a 3D Scene"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8892-1191","authenticated-orcid":false,"given":"Zhaoyang","family":"Zhang","sequence":"first","affiliation":[{"name":"Yale University, New Haven, CT, USA and Adobe Research, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1060-6941","authenticated-orcid":false,"given":"Yannick","family":"Hold-Geoffroy","sequence":"additional","affiliation":[{"name":"Adobe Research, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3808-6092","authenticated-orcid":false,"given":"Milo\u0161","family":"Ha\u0161an","sequence":"additional","affiliation":[{"name":"Adobe Research, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1776-7696","authenticated-orcid":false,"given":"Ziwen","family":"Chen","sequence":"additional","affiliation":[{"name":"Oregon State University, Corvallis, OR, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5926-6266","authenticated-orcid":false,"given":"Fujun","family":"Luan","sequence":"additional","affiliation":[{"name":"Adobe Research, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2495-4979","authenticated-orcid":false,"given":"Julie","family":"Dorsey","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3674-295X","authenticated-orcid":false,"given":"Yiwei","family":"Hu","sequence":"additional","affiliation":[{"name":"Adobe Research, San Jose, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,14]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Adobe. 2025. Select Subject."},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0T. Barron Ben Mildenhall Dor Verbin Pratul\u00a0P. Srinivasan and Peter Hedman. 2022. Mip-NeRF 360: Unbounded Anti-Aliased Neural Radiance Fields. arxiv:https:\/\/arXiv.org\/abs\/2111.12077\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2111.12077","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0T. Barron Ben Mildenhall Dor Verbin Pratul\u00a0P. Srinivasan and Peter Hedman. 2023. Zip-NeRF: Anti-Aliased Grid-Based Neural Radiance Fields. arxiv:https:\/\/arXiv.org\/abs\/2304.06706\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2304.06706","DOI":"10.1109\/ICCV51070.2023.01804"},{"key":"e_1_3_3_2_5_1","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal Daniel Mendelevitch Maciej Kilian Dominik Lorenz Yam Levi Zion English Vikram Voleti Adam Letts Varun Jampani and Robin Rombach. 2023a. Stable Video Diffusion: Scaling Latent Video Diffusion Models to Large Datasets. arxiv:https:\/\/arXiv.org\/abs\/2311.15127\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2311.15127"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"e_1_3_3_2_7_1","unstructured":"Tim Brooks Bill Peebles Connor Holmes Will DePue Yufei Guo Li Jing David Schnurr Joe Taylor Troy Luhman Eric Luhman Clarence Ng Ricky Wang and Aditya Ramesh. 2024. Video generation models as world simulators. (2024). https:\/\/openai.com\/research\/video-generation-models-as-world-simulators"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"crossref","unstructured":"David Charatan Sizhe Li Andrea Tagliasacchi and Vincent Sitzmann. 2024. pixelSplat: 3D Gaussian Splats from Image Pairs for Scalable Generalizable 3D Reconstruction. arxiv:https:\/\/arXiv.org\/abs\/2312.12337\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2312.12337","DOI":"10.1109\/CVPR52733.2024.01840"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"crossref","unstructured":"Zhaoxi Chen Guangcong Wang and Ziwei Liu. 2022. Text2Light: Zero-Shot Text-Driven HDR Panorama Generation. ACM Transactions on Graphics (TOG) 41 6 Article 195 (2022) 16\u00a0pages.","DOI":"10.1145\/3550454.3555447"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"crossref","unstructured":"Zilong Chen Yikai Wang Feng Wang Zhengyi Wang and Huaping Liu. 2024. V3D: Video Diffusion Models are Effective 3D Generators. arxiv:https:\/\/arXiv.org\/abs\/2403.06738\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.06738","DOI":"10.1109\/TPAMI.2025.3581312"},{"key":"e_1_3_3_2_11_1","unstructured":"Jaeyoung Chung Suyoung Lee Hyeongjin Nam Jaerin Lee and Kyoung\u00a0Mu Lee. 2023. LucidDreamer: Domain-free Generation of 3D Gaussian Splatting Scenes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.13384 (2023)."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00682"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3596711.3596779"},{"key":"e_1_3_3_2_14_1","volume-title":"Forty-first International Conference on Machine Learning","author":"Esser Patrick","year":"2024","unstructured":"Patrick Esser, Sumith Kulal, Andreas Blattmann, Rahim Entezari, Jonas M\u00fcller, Harry Saini, Yam Levi, Dominik Lorenz, Axel Sauer, Frederic Boesel, et\u00a0al. 2024. Scaling rectified flow transformers for high-resolution image synthesis. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"crossref","unstructured":"Lin Gao Jie Yang Botao Zhang Jiamu Sun Yujie Yuan Hongbo Fu and Yu-Kun Lai. 2024. Real-time Large-scale Deformation of Gaussian Splatting. ACM Transactions on Graphics (SIGGRAPH Asia 2024) (2024).","DOI":"10.1145\/3687756"},{"key":"e_1_3_3_2_16_1","unstructured":"Junlin Han Filippos Kokkinos and Philip Torr. 2024. VFusion3D: Learning Scalable 3D Generative Models from Video Diffusion Models. arxiv:https:\/\/arXiv.org\/abs\/2403.12034\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.12034"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00727"},{"key":"e_1_3_3_2_18_1","unstructured":"Edward\u00a0J. Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2021. LoRA: Low-Rank Adaptation of Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2106.09685\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2106.09685"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00926"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02060"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"crossref","unstructured":"Kaiwen Jiang Yang Fu Mukund Varma\u00a0T Yash Belhe Xiaolong Wang Hao Su and Ravi Ramamoorthi. 2024. A Construct-Optimize Approach to Sparse View Synthesis without Camera Pose. SIGGRAPH (2024).","DOI":"10.1145\/3641519.3657427"},{"key":"e_1_3_3_2_22_1","volume-title":"Proceedings of the fourth Eurographics symposium on Geometry processing","volume":"7","author":"Kazhdan Michael","year":"2006","unstructured":"Michael Kazhdan, Matthew Bolitho, and Hugues Hoppe. 2006. Poisson surface reconstruction. In Proceedings of the fourth Eurographics symposium on Geometry processing , Vol.\u00a07."},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"crossref","unstructured":"Bernhard Kerbl Georgios Kopanas Thomas Leimk\u00fchler and George Drettakis. 2023. 3D Gaussian Splatting for Real-Time Radiance Field Rendering. ACM Transactions on Graphics 42 4 (July 2023). https:\/\/repo-sam.inria.fr\/fungraph\/3d-gaussian-splatting\/","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_2_24_1","first-page":"19730","volume-title":"International conference on machine learning","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023a. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730\u201319742."},{"key":"e_1_3_3_2_25_1","unstructured":"Jiahao Li Hao Tan Kai Zhang Zexiang Xu Fujun Luan Yinghao Xu Yicong Hong Kalyan Sunkavalli Greg Shakhnarovich and Sai Bi. 2023b. Instant3D: Fast Text-to-3D with Sparse-View Generation and Large Reconstruction Model. arxiv:https:\/\/arXiv.org\/abs\/2311.06214\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2311.06214"},{"key":"e_1_3_3_2_26_1","unstructured":"Renjie Li Panwang Pan Bangbang Yang Dejia Xu Shijie Zhou Xuanyang Zhang Zeming Li Achuta Kadambi Zhangyang Wang Zhengzhong Tu and Zhiwen Fan. 2024b. 4K4DGen: Panoramic 4D Generation at 4K Resolution. arxiv:https:\/\/arXiv.org\/abs\/2406.13527\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2406.13527"},{"key":"e_1_3_3_2_27_1","unstructured":"Xinyang Li Zhangyu Lai Linning Xu Yansong Qu Liujuan Cao Shengchuan Zhang Bo Dai and Rongrong Ji. 2024a. Director3D: Real-world Camera Trajectory and 3D Scene Generation from Text. arXiv:https:\/\/arXiv.org\/abs\/2406.17601 (2024)."},{"key":"e_1_3_3_2_28_1","unstructured":"Haotian Liu Chunyuan Li Yuheng Li and Yong\u00a0Jae Lee. 2023a. Improved Baselines with Visual Instruction Tuning."},{"key":"e_1_3_3_2_29_1","volume-title":"NeurIPS","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong\u00a0Jae Lee. 2023b. Visual Instruction Tuning. In NeurIPS."},{"key":"e_1_3_3_2_30_1","unstructured":"Minghua Liu Chao Xu Haian Jin Linghao Chen Mukund Varma\u00a0T Zexiang Xu and Hao Su. 2024b. One-2-3-45: Any single image to 3d mesh in 45 seconds without per-shape optimization. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_31_1","unstructured":"Ruoshi Liu Rundi Wu Basile\u00a0Van Hoorick Pavel Tokmakov Sergey Zakharov and Carl Vondrick. 2023c. Zero-1-to-3: Zero-shot One Image to 3D Object. arxiv:https:\/\/arXiv.org\/abs\/2303.11328\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2303.11328"},{"key":"e_1_3_3_2_32_1","volume-title":"European Conference on Computer Vision (ECCV)","author":"Liu Yuheng","year":"2024","unstructured":"Yuheng Liu, Xinke Li, Xueting Li, Lu Qi, Chongshou Li, and Ming-Hsuan Yang. 2024a. Pyramid Diffusion for Fine 3D Large Scene Generation. In European Conference on Computer Vision (ECCV)."},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","unstructured":"Thomas M\u00fcller Alex Evans Christoph Schied and Alexander Keller. 2022. Instant Neural Graphics Primitives with a Multiresolution Hash Encoding. ACM Trans. Graph. 41 4 Article 102 (July 2022) 15\u00a0pages. 10.1145\/3528223.3530127","DOI":"10.1145\/3528223.3530127"},{"key":"e_1_3_3_2_35_1","unstructured":"OpenAI. 2023. ChatGPT: A Large Language Model. https:\/\/chat.openai.com Accessed: 2025-03-07."},{"key":"e_1_3_3_2_36_1","unstructured":"Hao Ouyang Tiancheng Sun Stephen Lombardi and Kathryn Heal. 2023. Text2Immersion: Generative Immersive Scene with 3D Gaussians. Arxiv (2023)."},{"key":"e_1_3_3_2_37_1","unstructured":"Rishab Parthasarathy Zachary Ankner and Aaron Gokaslan. 2024. Vid3D: Synthesis of Dynamic 3D Scenes using 2D Video Diffusion. arxiv:https:\/\/arXiv.org\/abs\/2406.11196\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2406.11196"},{"key":"e_1_3_3_2_38_1","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga et\u00a0al. 2019. Pytorch: An imperative style high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_3_2_40_1","unstructured":"Adam Polyak Amit Zohar Andrew Brown Andros Tjandra Animesh Sinha Ann Lee Apoorv Vyas Bowen Shi Chih-Yao Ma Ching-Yao Chuang David Yan Dhruv Choudhary Dingkang Wang Geet Sethi Guan Pang Haoyu Ma Ishan Misra Ji Hou Jialiang Wang Kiran Jagadeesh Kunpeng Li Luxin Zhang Mannat Singh Mary Williamson Matt Le Matthew Yu Mitesh\u00a0Kumar Singh Peizhao Zhang Peter Vajda Quentin Duval Rohit Girdhar Roshan Sumbaly Sai\u00a0Saketh Rambhatla Sam Tsai Samaneh Azadi Samyak Datta Sanyuan Chen Sean Bell Sharadh Ramaswamy Shelly Sheynin Siddharth Bhattacharya Simran Motwani Tao Xu Tianhe Li Tingbo Hou Wei-Ning Hsu Xi Yin Xiaoliang Dai Yaniv Taigman Yaqiao Luo Yen-Cheng Liu Yi-Chiao Wu Yue Zhao Yuval Kirstain Zecheng He Zijian He Albert Pumarola Ali Thabet Artsiom Sanakoyeu Arun Mallya Baishan Guo Boris Araya Breena Kerr Carleigh Wood Ce Liu Cen Peng Dimitry Vengertsev Edgar Schonfeld Elliot Blanchard Felix Juefei-Xu Fraylie Nord Jeff Liang John Hoffman Jonas Kohler Kaolin Fire Karthik Sivakumar Lawrence Chen Licheng Yu Luya Gao Markos Georgopoulos Rashel Moritz Sara\u00a0K. Sampson Shikai Li Simone Parmeggiani Steve Fine Tara Fowler Vladan Petrovic and Yuming Du. 2024. Movie Gen: A Cast of Media Foundation Models. arxiv:https:\/\/arXiv.org\/abs\/2410.13720\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2410.13720"},{"key":"e_1_3_3_2_41_1","first-page":"652","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Qi Charles\u00a0R","year":"2017","unstructured":"Charles\u00a0R Qi, Hao Su, Kaichun Mo, and Leonidas\u00a0J Guibas. 2017. Pointnet: Deep learning on point sets for 3d classification and segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition. 652\u2013660."},{"key":"e_1_3_3_2_42_1","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_2_43_1","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. arxiv:https:\/\/arXiv.org\/abs\/2112.10752\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2112.10752"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"key":"e_1_3_3_2_45_1","unstructured":"Yichun Shi Peng Wang Jianglong Ye Mai Long Kejie Li and Xiao Yang. 2024. MVDream: Multi-view Diffusion for 3D Generation. arxiv:https:\/\/arXiv.org\/abs\/2308.16512\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2308.16512"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"crossref","unstructured":"Jaidev Shriram Alex Trevithick Lingjie Liu and Ravi Ramamoorthi. 2025. RealmDreamer: Text-Driven 3D Scene Generation with Inpainting and Depth Diffusion. International Conference on 3D Vision (3DV).","DOI":"10.1109\/3DV66043.2025.00086"},{"key":"e_1_3_3_2_47_1","unstructured":"Shitao Tang Fuyang Zhang Jiacheng Chen Peng Wang and Yasutaka Furukawa. 2023. MVDiffusion: Enabling Holistic Multi-view Image Generation with Correspondence-Aware Diffusion. arXiv (2023)."},{"key":"e_1_3_3_2_48_1","first-page":"84839","volume-title":"Advances in Neural Information Processing Systems","volume":"37","author":"Tian Keyu","year":"2024","unstructured":"Keyu Tian, Yi Jiang, Zehuan Yuan, Bingyue Peng, and Liwei Wang. 2024. Visual Autoregressive Modeling: Scalable Image Generation via Next-Scale Prediction. In Advances in Neural Information Processing Systems , A.\u00a0Globerson, L.\u00a0Mackey, D.\u00a0Belgrave, A.\u00a0Fan, U.\u00a0Paquet, J.\u00a0Tomczak, and C.\u00a0Zhang (Eds.), Vol.\u00a037. Curran Associates, Inc., 84839\u201384865. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/file\/9a24e284b187f662681440ba15c416fb-Paper-Conference.pdf"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"crossref","unstructured":"Vikram Voleti Chun-Han Yao Mark Boss Adam Letts David Pankratz Dmitry Tochilkin Christian Laforte Robin Rombach and Varun Jampani. 2024. SV3D: Novel Multi-view Synthesis and 3D Generation from a Single Image using Latent Video Diffusion. arxiv:https:\/\/arXiv.org\/abs\/2403.12008\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.12008","DOI":"10.1007\/978-3-031-73232-4_25"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"crossref","unstructured":"Matthew Wallingford Anand Bhattad Aditya Kusupati Vivek Ramanujan Matt Deitke Aniruddha Kembhavi Roozbeh Mottaghi Wei-Chiu Ma and Ali Farhadi. 2024. From an Image to a Scene: Learning to Imagine the World from a Million 360\u00b0 Videos. Advances in Neural Information Processing Systems 37 (2024) 17743\u201317760.","DOI":"10.52202\/079017-0564"},{"key":"e_1_3_3_2_51_1","doi-asserted-by":"crossref","unstructured":"Hanyang Wang Fangfu Liu Jiawei Chi and Yueqi Duan. 2025. VideoScene: Distilling Video Diffusion Model to Generate 3D Scenes in One Step. arxiv:https:\/\/arXiv.org\/abs\/2504.01956\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2504.01956","DOI":"10.1109\/CVPR52734.2025.01536"},{"key":"e_1_3_3_2_52_1","unstructured":"Peng Wang Hao Tan Sai Bi Yinghao Xu Fujun Luan Kalyan Sunkavalli Wenping Wang Zexiang Xu and Kai Zhang. 2023. PF-LRM: Pose-Free Large Reconstruction Model for Joint Pose and Shape Prediction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.12024 (2023)."},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"crossref","unstructured":"Qian Wang Weiqi Li Chong Mou Xinhua Cheng and Jian Zhang. 2024. 360DVD: Controllable Panorama Video Generation with 360-Degree Video Diffusion Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.06578 (2024).","DOI":"10.1109\/CVPR52733.2024.00660"},{"key":"e_1_3_3_2_54_1","volume-title":"The European Conference on Computer Vision Workshops (ECCVW)","author":"Wang Xintao","year":"2018","unstructured":"Xintao Wang, Ke Yu, Shixiang Wu, Jinjin Gu, Yihao Liu, Chao Dong, Yu Qiao, and Chen\u00a0Change Loy. 2018. ESRGAN: Enhanced super-resolution generative adversarial networks. In The European Conference on Computer Vision Workshops (ECCVW)."},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20068-7_39"},{"key":"e_1_3_3_2_56_1","unstructured":"Xinyue Wei Kai Zhang Sai Bi Hao Tan Fujun Luan Valentin Deschaintre Kalyan Sunkavalli Hao Su and Zexiang Xu. 2024. MeshLRM: Large Reconstruction Model for High-Quality Mesh. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.12385 (2024)."},{"key":"e_1_3_3_2_57_1","unstructured":"Haoning Wu Zicheng Zhang Weixia Zhang Chaofeng Chen Chunyi Li Liang Liao Annan Wang Erli Zhang Wenxiu Sun Qiong Yan Xiongkuo Min Guangtai Zhai and Weisi Lin. 2023. Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.17090 (2023). Equal Contribution by Wu Haoning and Zhang Zicheng. Project Lead by Wu Haoning. Corresponding Authors: Zhai Guangtai and Lin Weisi.."},{"key":"e_1_3_3_2_58_1","unstructured":"Tong Wu Shuai Yang Ryan Po Yinghao Xu Ziwei Liu Dahua Lin and Gordon Wetzstein. 2025. Video World Models with Long-term Spatial Memory. arxiv:https:\/\/arXiv.org\/abs\/2506.05284\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2506.05284"},{"key":"e_1_3_3_2_59_1","unstructured":"Tong Wu Yu-Jie Yuan Ling-Xiao Zhang Jie Yang Yan-Pei Cao Ling-Qi Yan and Lin Gao. 2024. Recent Advances in 3D Gaussian Splatting. arxiv:https:\/\/arXiv.org\/abs\/2403.11134\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2403.11134"},{"key":"e_1_3_3_2_60_1","unstructured":"Desai Xie Sai Bi Zhixin Shu Kai Zhang Zexiang Xu Yi Zhou S\u00f6ren Pirk Arie Kaufman Xin Sun and Hao Tan. 2024. LRM-Zero: Training Large Reconstruction Models with Synthesized Data. arxiv:https:\/\/arXiv.org\/abs\/2406.09371\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2406.09371"},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"crossref","unstructured":"Shuai Yang Jing Tan Mengchen Zhang Tong Wu Yixuan Li Gordon Wetzstein Ziwei Liu and Dahua Lin. 2024. LayerPano3D: Layered 3D Panorama for Hyper-Immersive Scene Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.13252 (2024).","DOI":"10.1145\/3721238.3730643"},{"key":"e_1_3_3_2_62_1","unstructured":"Vickie Ye Ruilong Li Justin Kerr Matias Turkulainen Brent Yi Zhuoyang Pan Otto Seiskari Jianbo Ye Jeffrey Hu Matthew Tancik and Angjoo Kanazawa. 2024b. gsplat: An Open-Source Library for Gaussian Splatting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.06765 (2024). arxiv:https:\/\/arXiv.org\/abs\/2409.06765\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2409.06765"},{"key":"e_1_3_3_2_63_1","unstructured":"Weicai Ye Chenhao Ji Zheng Chen Junyao Gao Xiaoshui Huang Song-Hai Zhang Wanli Ouyang Tong He Cairong Zhao and Guofeng Zhang. 2024a. DiffPano: Scalable and Consistent Text to Panorama Generation with Spherical Epipolar-Aware Diffusion. arxiv:https:\/\/arXiv.org\/abs\/2410.24203\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2410.24203"},{"key":"e_1_3_3_2_64_1","unstructured":"Hong-Xing Yu Haoyi Duan Charles Herrmann William\u00a0T. Freeman and Jiajun Wu. 2024a. WonderWorld: Interactive 3D Scene Generation from a Single Image. arxiv:https:\/\/arXiv.org\/abs\/2406.09394\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2406.09394"},{"key":"e_1_3_3_2_65_1","volume-title":"12th International Conference on Learning Representations, ICLR 2024","author":"Yu Lijun","year":"2024","unstructured":"Lijun Yu, Jos\u00e9 Lezama, Nitesh\u00a0B Gundavarapu, Luca Versari, Kihyuk Sohn, David Minnen, Yong Cheng, Agrim Gupta, Xiuye Gu, Alexander\u00a0G Hauptmann, et\u00a0al. 2024b. LANGUAGE MODEL BEATS DIFFUSION-TOKENIZER IS KEY TO VISUAL GENERATION. In 12th International Conference on Learning Representations, ICLR 2024."},{"key":"e_1_3_3_2_66_1","unstructured":"Wangbo Yu Jinbo Xing Li Yuan Wenbo Hu Xiaoyu Li Zhipeng Huang Xiangjun Gao Tien-Tsin Wong Ying Shan and Yonghong Tian. 2024c. ViewCrafter: Taming Video Diffusion Models for High-fidelity Novel View Synthesis. arxiv:https:\/\/arXiv.org\/abs\/2409.02048\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2409.02048"},{"key":"e_1_3_3_2_67_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i4.16440"},{"key":"e_1_3_3_2_68_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Zhang Cheng","year":"2024","unstructured":"Cheng Zhang, Qianyi Wu, Camilo Cruz\u00a0Gambardella, Xiaoshui Huang, Dinh Phung, Wanli Ouyang, and Jianfei Cai. 2024b. Taming Stable Diffusion for Text to 360\u25e6 Panorama Image Generation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_3_2_69_1","doi-asserted-by":"crossref","unstructured":"Kai Zhang Sai Bi Hao Tan Yuanbo Xiangli Nanxuan Zhao Kalyan Sunkavalli and Zexiang Xu. 2024a. GS-LRM: Large Reconstruction Model for 3D Gaussian Splatting. European Conference on Computer Vision (2024).","DOI":"10.1007\/978-3-031-72670-5_1"},{"key":"e_1_3_3_2_70_1","volume-title":"Open-Sora: Democratizing Efficient Video Production for All","author":"Zheng Zangwei","year":"2024","unstructured":"Zangwei Zheng, Xiangyu Peng, Tianji Yang, Chenhui Shen, Shenggui Li, Hongxin Liu, Yukun Zhou, Tianyi Li, and Yang You. 2024. Open-Sora: Democratizing Efficient Video Production for All. https:\/\/github.com\/hpcaitech\/Open-Sora"},{"key":"e_1_3_3_2_71_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72658-319"},{"key":"e_1_3_3_2_72_1","unstructured":"Chen Ziwen Hao Tan Kai Zhang Sai Bi Fujun Luan Yicong Hong Li Fuxin and Zexiang Xu. 2024. Long-LRM: Long-sequence Large Reconstruction Model for Wide-coverage Gaussian Splats. arXiv preprint 2410.12781 (2024)."}],"event":{"name":"SA Conference Papers '25: SIGGRAPH Asia 2025 Conference Papers","location":"Hong Kong Hong Kong","acronym":"SA Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the SIGGRAPH Asia 2025 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3757377.3763876","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T03:32:52Z","timestamp":1765251172000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3757377.3763876"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,14]]},"references-count":71,"alternative-id":["10.1145\/3757377.3763876","10.1145\/3757377"],"URL":"https:\/\/doi.org\/10.1145\/3757377.3763876","relation":{},"subject":[],"published":{"date-parts":[[2025,12,14]]},"assertion":[{"value":"2025-12-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}