{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,20]],"date-time":"2026-05-20T16:49:15Z","timestamp":1779295755854,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611800","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"6898-6906","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":33,"title":["RoomDreamer: Text-Driven 3D Indoor Scene Synthesis with Coherent Geometry and Texture"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8366-5088","authenticated-orcid":false,"given":"Liangchen","family":"Song","sequence":"first","affiliation":[{"name":"University at Buffalo, Buffalo, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0900-1512","authenticated-orcid":false,"given":"Liangliang","family":"Cao","sequence":"additional","affiliation":[{"name":"Apple Inc., Cupertino, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9075-2307","authenticated-orcid":false,"given":"Hongyu","family":"Xu","sequence":"additional","affiliation":[{"name":"Apple Inc., Cupertino, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6707-4616","authenticated-orcid":false,"given":"Kai","family":"Kang","sequence":"additional","affiliation":[{"name":"Apple Inc., Cupertino, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5711-4375","authenticated-orcid":false,"given":"Feng","family":"Tang","sequence":"additional","affiliation":[{"name":"Apple Inc., Cupertino, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7901-8793","authenticated-orcid":false,"given":"Junsong","family":"Yuan","sequence":"additional","affiliation":[{"name":"University at Buffalo, Buffalo, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0899-8294","authenticated-orcid":false,"given":"Zhao","family":"Yang","sequence":"additional","affiliation":[{"name":"Apple Inc., Cupertino, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 12608--12618","author":"Titas Anciukevivc","year":"2023","unstructured":"Titas Anciukevivc ius, Zexiang Xu, Matthew Fisher, Paul Henderson, Hakan Bilen, Niloy J Mitra, and Paul Guerrero. 2023. Renderdiffusion: Image diffusion for 3d reconstruction, inpainting and generation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 12608--12618."},{"key":"e_1_3_2_1_2_1","volume-title":"Despoina Paschalidou, Xingguang Yan, Gordon Wetzstein, Leonidas Guibas, and Andrea Tagliasacchi.","author":"Bahmani Sherwin","year":"2023","unstructured":"Sherwin Bahmani, Jeong Joon Park, Despoina Paschalidou, Xingguang Yan, Gordon Wetzstein, Leonidas Guibas, and Andrea Tagliasacchi. 2023. CC3D: Layout-Conditioned Generation of Compositional 3D Scenes. arXiv preprint arXiv:2303.12074 (2023)."},{"key":"e_1_3_2_1_3_1","volume-title":"MultiDiffusion: Fusing Diffusion Paths for Controlled Image Generation. arXiv preprint arXiv:2302.08113","author":"Bar-Tal Omer","year":"2023","unstructured":"Omer Bar-Tal, Lior Yariv, Yaron Lipman, and Tali Dekel. 2023. MultiDiffusion: Fusing Diffusion Paths for Controlled Image Generation. arXiv preprint arXiv:2302.08113 (2023)."},{"key":"e_1_3_2_1_4_1","first-page":"25102","article-title":"Gaudi: A neural architect for immersive 3d scene generation","volume":"35","author":"Bautista Miguel Angel","year":"2022","unstructured":"Miguel Angel Bautista, Pengsheng Guo, Samira Abnar, Walter Talbott, Alexander Toshev, Zhuoyuan Chen, Laurent Dinh, Shuangfei Zhai, Hanlin Goh, Daniel Ulbricht, et al. 2022. Gaudi: A neural architect for immersive 3d scene generation. Advances in Neural Information Processing Systems, Vol. 35 (2022), 25102--25116.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","volume-title":"Efros","author":"Brooks Tim","year":"2023","unstructured":"Tim Brooks, Aleksander Holynski, and Alexei A. Efros. 2023. InstructPix2Pix: Learning to Follow Image Editing Instructions. In CVPR."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"e_1_3_2_1_7_1","volume-title":"Axel Levy, Miika Aittala, Shalini De Mello, Tero Karras, and Gordon Wetzstein.","author":"Chan Eric R","year":"2023","unstructured":"Eric R Chan, Koki Nagano, Matthew A Chan, Alexander W Bergman, Jeong Joon Park, Axel Levy, Miika Aittala, Shalini De Mello, Tero Karras, and Gordon Wetzstein. 2023. Generative Novel View Synthesis with 3D-Aware Diffusion Models. arXiv preprint arXiv:2304.02602 (2023)."},{"key":"e_1_3_2_1_8_1","volume-title":"Fantasia3D: Disentangling Geometry and Appearance for High-quality Text-to-3D Content Creation. arXiv preprint arXiv:2303.13873","author":"Chen Rui","year":"2023","unstructured":"Rui Chen, Yongwei Chen, Ningxin Jiao, and Kui Jia. 2023. Fantasia3D: Disentangling Geometry and Appearance for High-quality Text-to-3D Content Creation. arXiv preprint arXiv:2303.13873 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Set-the-Scene: Global-Local Training for Generating Controllable NeRF Scenes. arXiv preprint arXiv:2303.13450","author":"Cohen-Bar Dana","year":"2023","unstructured":"Dana Cohen-Bar, Elad Richardson, Gal Metzer, Raja Giryes, and Daniel Cohen-Or. 2023. Set-the-Scene: Global-Local Training for Generating Controllable NeRF Scenes. arXiv preprint arXiv:2303.13450 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00093"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021","author":"Dehghan Afshin","year":"2021","unstructured":"Afshin Dehghan, Gilad Baruch, Zhuoyuan Chen, Yuri Feigin, Peter Fu, Thomas Gebauer, Daniel Kurz, Tal Dimry, Brandon Joffe, Arik Schwartz, and Elad Shulman. 2021. ARKitScenes: A Diverse Real-World Dataset For 3D Indoor Scene Understanding Using Mobile RGB-D Data. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks 1, NeurIPS Datasets and Benchmarks 2021, December 2021, virtual, Joaquin Vanschoren and Sai-Kit Yeung (Eds.)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Congyue Deng Chiyu Jiang Charles R Qi Xinchen Yan Yin Zhou Leonidas Guibas Dragomir Anguelov et al. 2022. NeRDi: Single-View NeRF Synthesis with Language-Guided Diffusion as General Image Priors. arXiv preprint arXiv:2212.03267 (2022).","DOI":"10.1109\/CVPR52729.2023.01977"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01404"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"e_1_3_2_1_15_1","volume-title":"NerfDiff: Single-image View Synthesis with NeRF-guided Distillation from 3D-aware Diffusion. arXiv preprint arXiv:2302.10109","author":"Gu Jiatao","year":"2023","unstructured":"Jiatao Gu, Alex Trevithick, Kai-En Lin, Josh Susskind, Christian Theobalt, Lingjie Liu, and Ravi Ramamoorthi. 2023. NerfDiff: Single-image View Synthesis with NeRF-guided Distillation from 3D-aware Diffusion. arXiv preprint arXiv:2302.10109 (2023)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Ayaan Haque Matthew Tancik Alexei Efros Aleksander Holynski and Angjoo Kanazawa. 2023. Instruct-NeRF2NeRF: Editing 3D Scenes with Instructions. (2023).","DOI":"10.1109\/ICCV51070.2023.01808"},{"key":"e_1_3_2_1_17_1","volume-title":"Delta Denoising Score. arXiv preprint arXiv:2304.07090","author":"Hertz Amir","year":"2023","unstructured":"Amir Hertz, Kfir Aberman, and Daniel Cohen-Or. 2023. Delta Denoising Score. arXiv preprint arXiv:2304.07090 (2023)."},{"key":"e_1_3_2_1_18_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, Vol. 33 (2020), 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_19_1","volume-title":"Text2Room: Extracting Textured 3D Meshes from 2D Text-to-Image Models. arXiv preprint arXiv:2303.11989","author":"H\u00f6llein Lukas","year":"2023","unstructured":"Lukas H\u00f6llein, Ang Cao, Andrew Owens, Justin Johnson, and Matthias Nie\u00dfner. 2023. Text2Room: Extracting Textured 3D Meshes from 2D Text-to-Image Models. arXiv preprint arXiv:2303.11989 (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00610"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3414685.3417861"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"e_1_3_2_1_23_1","volume-title":"Pavel Tokmakov, Sergey Zakharov, and Carl Vondrick.","author":"Liu Ruoshi","year":"2023","unstructured":"Ruoshi Liu, Rundi Wu, Basile Van Hoorick, Pavel Tokmakov, Sergey Zakharov, and Carl Vondrick. 2023. Zero-1-to-3: Zero-shot One Image to 3D Object. arxiv: 2303.11328 [cs.CV]"},{"key":"e_1_3_2_1_24_1","volume-title":"RealFusion: 360\u00b0 Reconstruction of Any Object from a Single Image. arXiv e-prints","author":"Melas-Kyriazi Luke","year":"2023","unstructured":"Luke Melas-Kyriazi, Christian Rupprecht, Iro Laina, and Andrea Vedaldi. 2023. RealFusion: 360\u00b0 Reconstruction of Any Object from a Single Image. arXiv e-prints (2023), arXiv-2302."},{"key":"e_1_3_2_1_25_1","volume-title":"Peter Kontschieder, and Matthias Nie\u00dfner.","author":"M\u00fcller Norman","year":"2022","unstructured":"Norman M\u00fcller, Yawar Siddiqui, Lorenzo Porzi, Samuel Rota Bul\u00f2, Peter Kontschieder, and Matthias Nie\u00dfner. 2022. DiffRF: Rendering-Guided 3D Radiance Field Diffusion. arXiv preprint arXiv:2212.01206 (2022)."},{"key":"e_1_3_2_1_26_1","first-page":"12013","article-title":"Atiss: Autoregressive transformers for indoor scene synthesis","volume":"34","author":"Paschalidou Despoina","year":"2021","unstructured":"Despoina Paschalidou, Amlan Kar, Maria Shugrina, Karsten Kreis, Andreas Geiger, and Sanja Fidler. 2021. Atiss: Autoregressive transformers for indoor scene synthesis. Advances in Neural Information Processing Systems, Vol. 34 (2021), 12013--12026.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_27_1","volume-title":"DreamFusion: Text-to-3D using 2D Diffusion. arXiv","author":"Poole Ben","year":"2022","unstructured":"Ben Poole, Ajay Jain, Jonathan T. Barron, and Ben Mildenhall. 2022. DreamFusion: Text-to-3D using 2D Diffusion. arXiv (2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"Chris Hallacy, A. Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, A. Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In ICML."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3019967"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00634"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01234"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2008.4562950"},{"key":"e_1_3_2_1_34_1","volume-title":"Denoising Diffusion Implicit Models. In 9th International Conference on Learning Representations, ICLR 2021","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"e_1_3_2_1_35_1","volume-title":"Generative modeling by estimating gradients of the data distribution. Advances in neural information processing systems","author":"Song Yang","year":"2019","unstructured":"Yang Song and Stefano Ermon. 2019. Generative modeling by estimating gradients of the data distribution. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_36_1","volume-title":"Stable-dreamfusion: Text-to-3D with Stable-diffusion. https:\/\/github.com\/ashawkey\/stable-dreamfusion.","author":"Tang Jiaxiang","year":"2022","unstructured":"Jiaxiang Tang. 2022. Stable-dreamfusion: Text-to-3D with Stable-diffusion. https:\/\/github.com\/ashawkey\/stable-dreamfusion."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01059"},{"key":"e_1_3_2_1_38_1","volume-title":"Score Jacobian Chaining: Lifting Pretrained 2D Diffusion Models for 3D Generation. arXiv preprint arXiv:2212.00774","author":"Wang Haochen","year":"2022","unstructured":"Haochen Wang, Xiaodan Du, Jiahao Li, Raymond A Yeh, and Greg Shakhnarovich. 2022. Score Jacobian Chaining: Lifting Pretrained 2D Diffusion Models for 3D Generation. arXiv preprint arXiv:2212.00774 (2022)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201362"},{"key":"e_1_3_2_1_40_1","unstructured":"Dejia Xu Yifan Jiang Peihao Wang Zhiwen Fan Yi Wang and Zhangyang Wang. 2022. NeuralLift-360: Lifting An In-the-wild 2D Photo to A 3D Object with 360\u00b0 Views. arXiv preprint arXiv:2211.16431."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01066"},{"key":"e_1_3_2_1_42_1","volume-title":"Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543","author":"Zhang Lvmin","year":"2023","unstructured":"Lvmin Zhang and Maneesh Agrawala. 2023. Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543 (2023)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Qinsheng Zhang Jiaming Song Xun Huang Yongxin Chen and Ming yu Liu. 2023. DiffCollage: Parallel Generation of Large Content with Diffusion Models. In CVPR.","DOI":"10.1109\/CVPR52729.2023.00982"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2012.24"},{"key":"e_1_3_2_1_45_1","volume-title":"CVPR","author":"Zhou Zhizhuo","unstructured":"Zhizhuo Zhou and Shubham Tulsiani. 2023. SparseFusion: Distilling View-conditioned Diffusion for 3D Reconstruction. In CVPR"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611800","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611800","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:09:57Z","timestamp":1755821397000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611800"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":45,"alternative-id":["10.1145\/3581783.3611800","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611800","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}