{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:45:52Z","timestamp":1765309552990,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFF0905104"],"award-info":[{"award-number":["2023YFF0905104"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62132012, 62402262"],"award-info":[{"award-number":["62132012, 62402262"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Young\u00a0Elite\u00a0Scientists Sponsorship Program by CAST","award":["YESS20240286"],"award-info":[{"award-number":["YESS20240286"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754832","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:56:44Z","timestamp":1761375404000},"page":"9385-9394","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Synthesizing 3D Scenes via Diffusion Model that Incorporates Indoor Scene Characteristics"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-2683-7441","authenticated-orcid":false,"given":"Liang","family":"Yue","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0353-1977","authenticated-orcid":false,"given":"Shao-Kui","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing Normal University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5287-8320","authenticated-orcid":false,"given":"Lin","family":"Yuan","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8868-9997","authenticated-orcid":false,"given":"Yi-Tao","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5761-2828","authenticated-orcid":false,"given":"Zirui","family":"Zhou","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0460-1586","authenticated-orcid":false,"given":"Song-Hai","family":"Zhang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Pervasive Computing, Ministry of Education, Beijing, China and Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/NILES50944.2020.9257973"},{"key":"e_1_3_2_2_2_1","unstructured":"Mikolaj Binkowski Danica J. Sutherland Michael Arbel and Arthur Gretton. 2021. Demystifying MMD GANs. arXiv:1801.01401 [stat.ML]"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01565"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.1990.126052"},{"key":"e_1_3_2_2_5_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]"},{"key":"e_1_3_2_2_6_1","first-page":"8780","volume-title":"Wortman Vaughan (Eds.)","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion Models Beat GANs on Image Synthesis. In Advances in Neural Information Processing Systems, M. Ranzato, A. Beygelzimer, Y. Dauphin, P.S. Liang, and J. Wortman Vaughan (Eds.), Vol. 34. Curran Associates, Inc., 8780-8794. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf"},{"key":"e_1_3_2_2_7_1","unstructured":"Chuan Fang Xiaotao Hu Kunming Luo and Ping Tan. 2023. Ctrl-Room: Controllable Text-to-3D Room Meshes Generation with Layout Constraints. arXiv:2310.03602 [cs.CV]"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"e_1_3_2_2_9_1","volume-title":"3D-FUTURE: 3D Furniture shape with TextURE. arXiv preprint arXiv:2009.09633","author":"Fu Huan","year":"2020","unstructured":"Huan Fu, Rongfei Jia, Lin Gao, Mingming Gong, Binqiang Zhao, Steve Maybank, and Dacheng Tao. 2020b. 3D-FUTURE: 3D Furniture shape with TextURE. arXiv preprint arXiv:2009.09633 (2020)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130805"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.gmod.2020.101073"},{"key":"e_1_3_2_2_12_1","unstructured":"Ian J. Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative Adversarial Networks. arXiv:1406.2661 [stat.ML]"},{"key":"e_1_3_2_2_13_1","volume-title":"Advances in Neural Information Processing Systems","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/8a1d694707eb0fefe65871369074926d-Paper.pdf"},{"key":"e_1_3_2_2_14_1","first-page":"6840","volume-title":"Lin (Eds.)","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 6840-6851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00727"},{"key":"e_1_3_2_2_16_1","unstructured":"Bahjat Kawar Roy Ganz and Michael Elad. 2023. Enhancing Diffusion-Based Image Synthesis with Robust Classifier Guidance. arXiv:2208.08664 [cs.CV]"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1177\/027836498600500106"},{"key":"e_1_3_2_2_18_1","first-page":"1","article-title":"Grains: Generative recursive autoencoders for indoor scenes","volume":"38","author":"Li Manyi","year":"2019","unstructured":"Manyi Li, Akshay Gadi Patil, Kai Xu, Siddhartha Chaudhuri, Owais Khan, Ariel Shamir, Changhe Tu, Baoquan Chen, Daniel Cohen-Or, and Hao Zhang. 2019. Grains: Generative recursive autoencoders for indoor scenes. ACM Transactions on Graphics (TOG), Vol. 38, 2 (2019), 1-16.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"e_1_3_2_2_19_1","volume-title":"Automatic Synthesis of Virtual Wheelchair Training Scenarios. In 2020 IEEE Conference on Virtual Reality and 3D User Interfaces (VR). IEEE, 539-547","author":"Li Wanwan","year":"2020","unstructured":"Wanwan Li, Javier Talavera, Amilcar Gomez Samayoa, Jyh-Ming Lien, and Lap-Fai Yu. 2020. Automatic Synthesis of Virtual Wheelchair Training Scenarios. In 2020 IEEE Conference on Virtual Reality and 3D User Interfaces (VR). IEEE, 539-547."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-69487-0_10"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"volume-title":"GRPE: Relative Positional Encoding for Graph Transformer. In ICLR2022 Machine Learning for Drug Discovery. https:\/\/openreview.net\/forum?id=GNfAFN_p1d","author":"Park Wonpyo","key":"e_1_3_2_2_24_1","unstructured":"Wonpyo Park, Woong-Gi Chang, Donggeon Lee, Juntae Kim, and seung-won hwang. 2022. GRPE: Relative Positional Encoding for Graph Transformer. In ICLR2022 Machine Learning for Drug Discovery. https:\/\/openreview.net\/forum?id=GNfAFN_p1d"},{"key":"e_1_3_2_2_25_1","first-page":"12013","volume-title":"Wortman Vaughan (Eds.)","volume":"34","author":"Paschalidou Despoina","year":"2021","unstructured":"Despoina Paschalidou, Amlan Kar, Maria Shugrina, Karsten Kreis, Andreas Geiger, and Sanja Fidler. 2021. ATISS: Autoregressive Transformers for Indoor Scene Synthesis. In Advances in Neural Information Processing Systems, M. Ranzato, A. Beygelzimer, Y. Dauphin, P.S. Liang, and J. Wortman Vaughan (Eds.), Vol. 34. Curran Associates, Inc., 12013-12026. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/64986d86a17424eeac96b08a6d519059-Paper.pdf"},{"key":"e_1_3_2_2_26_1","unstructured":"Ngoc-Quan Pham Thanh-Le Ha Tuan-Nam Nguyen Thai-Son Nguyen Elizabeth Salesky Sebastian Stueker Jan Niehues and Alexander Waibel. 2020. Relative Positional Encoding for Speech Recognition and Direct Translation. arXiv:2005.09940 [eess.AS]"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58586-0_10"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00618"},{"key":"e_1_3_2_2_29_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"8831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8821-8831. https:\/\/proceedings.mlr.press\/v139\/ramesh21a.html"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00634"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_32_1","volume-title":"Weber","author":"Sadat Seyedmorteza","year":"2024","unstructured":"Seyedmorteza Sadat, Manuel Kansy, Otmar Hilliges, and Romann M. Weber. 2024. No Training, No Problem: Rethinking Classifier-Free Guidance for Diffusion Models. arXiv:2407.02687 [cs.LG] https:\/\/arxiv.org\/abs\/2407.02687"},{"key":"e_1_3_2_2_33_1","volume-title":"Garnett (Eds.)","volume":"29","author":"Salimans Tim","year":"2016","unstructured":"Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, Xi Chen, and Xi Chen. 2016. Improved Techniques for Training GANs. In Advances in Neural Information Processing Systems, D. Lee, M. Sugiyama, U. Luxburg, I. Guyon, and R. Garnett (Eds.), Vol. 29. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2016\/file\/8a3363abe792db2d8761d6403605aeb7-Paper.pdf"},{"key":"e_1_3_2_2_34_1","volume-title":"Advances in Neural Information Processing Systems","author":"Shen Yifei","year":"2024","unstructured":"Yifei Shen, Xinyang Jiang, Yifan Yang, Yezhen Wang, Dongqi Han, and Dongsheng Li. 2024. Understanding and Improving Training-free Loss-based Diffusion Guidance. In Advances in Neural Information Processing Systems, A. Globerson, L. Mackey, D. Belgrave, A. Fan, U. Paquet, J. Tomczak, and C. Zhang (Eds.), Vol. 37. Curran Associates, Inc., 108974-109002. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/file\/c4edc5113b4ffd4632718558fb66b9ef-Paper-Conference.pdf"},{"key":"e_1_3_2_2_35_1","unstructured":"Rahul Shenoy Zhihong Pan Kaushik Balakrishnan Qisen Cheng Yongmoon Jeon Heejune Yang and Jaewon Kim. 2024. Gradient-Free Classifier Guidance for Diffusion Model Sampling. arXiv:2411.15393 [cs.CV] https:\/\/arxiv.org\/abs\/2411.15393"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00092"},{"key":"e_1_3_2_2_37_1","volume-title":"Garnett (Eds.)","volume":"28","author":"Sohn Kihyuk","year":"2015","unstructured":"Kihyuk Sohn, Honglak Lee, and Xinchen Yan. 2015. Learning Structured Output Representation using Deep Conditional Generative Models. In Advances in Neural Information Processing Systems, C. Cortes, N. Lawrence, D. Lee, M. Sugiyama, and R. Garnett (Eds.), Vol. 28. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2015\/file\/8d55a249e6baa5c06772297520da2051-Paper.pdf"},{"key":"e_1_3_2_2_38_1","volume-title":"Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. dtextquotesingle Alch\u00e9-Buc","author":"Song Yang","year":"2019","unstructured":"Yang Song and Stefano Ermon. 2019. Generative Modeling by Estimating Gradients of the Data Distribution. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. dtextquotesingle Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2019\/file\/3001ef257407d5a371a96dcd947c7d93-Paper.pdf"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"crossref","unstructured":"Qi Sun Hang Zhou Wengang Zhou Li Li and Houqiang Li. 2024. Forest2Seq: Revitalizing Order Prior for Sequential Indoor Scene Synthesis. arXiv:2407.05388 [cs.CV] https:\/\/arxiv.org\/abs\/2407.05388","DOI":"10.1007\/978-3-031-72698-9_15"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"crossref","unstructured":"Jiapeng Tang Yinyu Nie Lev Markhasin Angela Dai Justus Thies and Matthias Nie\u00dfner. 2023. DiffuScene: Scene Graph Denoising Diffusion Probabilistic Model for Generative Indoor Scene Synthesis. arXiv:2303.14207 [cs.CV]","DOI":"10.1109\/CVPR52733.2024.01938"},{"key":"e_1_3_2_2_41_1","volume-title":"\u0141 ukasz Kaiser, and Illia Polosukhin","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00669"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3322941"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00021"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01212"},{"key":"e_1_3_2_2_46_1","volume-title":"Fast and scalable position-based layout synthesis. arXiv preprint arXiv:1809.10526","author":"Weiss Tomer","year":"2018","unstructured":"Tomer Weiss, Alan Litteneker, Noah Duncan, Masaki Nakada, Chenfanfu Jiang, Lap-Fai Yu, and Demetri Terzopoulos. 2018. Fast and scalable position-based layout synthesis. arXiv preprint arXiv:1809.10526 (2018)."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00988"},{"key":"e_1_3_2_2_48_1","first-page":"22370","volume-title":"Zhang (Eds.)","volume":"37","author":"Ye Haotian","year":"2024","unstructured":"Haotian Ye, Haowei Lin, Jiaqi Han, Minkai Xu, Sheng Liu, Yitao Liang, Jianzhu Ma, James Zou, and Stefano Ermon. 2024. TFG: Unified Training-Free Guidance for Diffusion Models. In Advances in Neural Information Processing Systems, A. Globerson, L. Mackey, D. Belgrave, A. Fan, U. Paquet, J. Tomczak, and C. Zhang (Eds.), Vol. 37. Curran Associates, Inc., 22370-22417. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/file\/2818054fc6de6dacdda0f142a3475933-Paper-Conference.pdf"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2185520.2185552"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02118"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-019-1929-5"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2021.3050143"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680798"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475194"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2024.3488744"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.gmod.2021.101104"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3381866"},{"key":"e_1_3_2_2_58_1","volume-title":"Rethinking Positional Encoding. CoRR","author":"Zheng Jianqiao","year":"2021","unstructured":"Jianqiao Zheng, Sameera Ramasinghe, and Simon Lucey. 2021. Rethinking Positional Encoding. CoRR, Vol. abs\/2107.02561 (2021). arXiv:2107.02561 https:\/\/arxiv.org\/abs\/2107.02561"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754832","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:41:45Z","timestamp":1765309305000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754832"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":58,"alternative-id":["10.1145\/3746027.3754832","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754832","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}