{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:55:43Z","timestamp":1781538943025,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810834","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1015-1024","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["RainbowDreamer: Taming Semantic Controls for Attribute-Consistent Text-to-3D Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8246-7177","authenticated-orcid":false,"given":"Weiyi","family":"Bu","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3607-2236","authenticated-orcid":false,"given":"Xiaodong","family":"Cun","sequence":"additional","affiliation":[{"name":"Great Bay University, Dongguan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0252-9664","authenticated-orcid":false,"given":"Rui","family":"Yin","sequence":"additional","affiliation":[{"name":"Hangzhou City University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8269-6501","authenticated-orcid":false,"given":"Jiantao","family":"Yuan","sequence":"additional","affiliation":[{"name":"Hangzhou City University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6629-1264","authenticated-orcid":false,"given":"Wei","family":"Qi","sequence":"additional","affiliation":[{"name":"Hangzhou City University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00764"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657527"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Hila Chefer Yuval Alaluf Yael Vinker Lior Wolf and Daniel Cohen-Or. 2023. Attend-and-excite: Attention-based semantic guidance for text-to-image diffusion models. ACM transactions on Graphics (TOG) 42 4 (2023) 1\u201310.","DOI":"10.1145\/3592116"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"e_1_3_3_1_6_2","first-page":"128","volume-title":"European Conference on Computer Vision","author":"Chen Yongwei","year":"2024","unstructured":"Yongwei Chen, Tengfei Wang, Tong Wu, Xingang Pan, Kui Jia, and Ziwei Liu. 2024. Comboverse: Compositional 3d assets creation using spatially-aware diffusion guidance. In European Conference on Computer Vision. Springer, 128\u2013146."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27886"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02022"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02012"},{"key":"e_1_3_3_1_10_2","first-page":"29839","volume-title":"Advances in Neural Information Processing Systems","author":"Gao Ruihan","year":"2024","unstructured":"Ruihan Gao, Kangle Deng, Gengshan Yang, Wenzhen Yuan, and Jun-Yan Zhu. 2024. Tactile DreamFusion: Exploiting Tactile Sensing for 3D Generation. In Advances in Neural Information Processing Systems , A.\u00a0Globerson, L.\u00a0Mackey, D.\u00a0Belgrave, A.\u00a0Fan, U.\u00a0Paquet, J.\u00a0Tomczak, and C.\u00a0Zhang (Eds.), Vol.\u00a037. Curran Associates, Inc., 29839\u201329863."},{"key":"e_1_3_3_1_11_2","volume-title":"The Eleventh International Conference on Learning Representations","author":"Hertz Amir","year":"2023","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2023. Prompt-to-Prompt Image Editing with Cross-Attention Control. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_3_1_12_2","first-page":"6840","volume-title":"Advances in Neural Information Processing Systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems , H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 6840\u20136851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_3_1_13_2","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.12598 (2022)."},{"key":"e_1_3_3_1_14_2","volume-title":"International Conference on Learning Representations","author":"Hu Edward\u00a0J","year":"2022","unstructured":"Edward\u00a0J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00473"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2425"},{"key":"e_1_3_3_1_17_2","unstructured":"Heewoo Jun and Alex Nichol. 2023. Shap-e: Generating conditional 3d implicit functions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.02463 (2023)."},{"key":"e_1_3_3_1_18_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Lee Kyungmin","year":"2024","unstructured":"Kyungmin Lee, Kihyuk Sohn, and Jinwoo Shin. 2024. DreamFlow: High-quality text-to-3D generation by Approximating Probability Flow. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_19_2","unstructured":"Chenghao Li Chaoning Zhang Joseph Cho Atish Waghwase Lik-Hang Lee Francois Rameau Yang Yang Sung-Ho Bae and Choong\u00a0Seon Hong. 2023. Generative ai meets 3d: A survey on text-to-3d in aigc era. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.06131 (2023)."},{"key":"e_1_3_3_1_20_2","unstructured":"Runjia Li Junlin Han Luke Melas-Kyriazi Chunyi Sun Zhaochong An Zhongrui Gui Shuyang Sun Philip Torr and Tomas Jakab. 2024. DreamBeast: Distilling 3D Fantastical Animals with Part-Aware Knowledge Transfer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.08271 (2024)."},{"key":"e_1_3_3_1_21_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Li Weiyu","year":"2024","unstructured":"Weiyu Li, Rui Chen, Xuelin Chen, and Ping Tan. 2024. SweetDreamer: Aligning Geometric Priors in 2D diffusion for Consistent Text-to-3D. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_22_2","unstructured":"Xiaolong Li Jiawei Mo Ying Wang Chethan Parameshwara Xiaohan Fei Ashwin Swaminathan CJ Taylor Zhuowen Tu Paolo Favaro and Stefano Soatto. 2024. Grounded Compositional and Diverse Text-to-3D with Pretrained Multi-View Diffusion Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.18065 (2024)."},{"key":"e_1_3_3_1_23_2","unstructured":"Xiaoyu Li Qi Zhang Di Kang Weihao Cheng Yiming Gao Jingbo Zhang Zhihao Liang Jing Liao Yan-Pei Cao and Ying Shan. 2024. Advances in 3d generation: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.17807 (2024)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00623"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"e_1_3_3_1_26_2","unstructured":"Yuanze Lin Ronald Clark and Philip Torr. 2024. Dreampolisher: Towards high-quality text-to-3d generation via geometric diffusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.17237 (2024)."},{"key":"e_1_3_3_1_27_2","unstructured":"Jian Liu Xiaoshui Huang Tianyu Huang Lu Chen Yuenan Hou Shixiang Tang Ziwei Liu Wanli Ouyang Wangmeng Zuo Junjun Jiang et\u00a0al. 2024. A comprehensive survey on 3d content generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.01166 (2024)."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"e_1_3_3_1_29_2","unstructured":"Baorui Ma Haoge Deng Junsheng Zhou Yu-Shen Liu Tiejun Huang and Xinlong Wang. 2023. Geodream: Disentangling 2d and geometric priors for high-fidelity and consistent 3d generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.17971 (2023)."},{"key":"e_1_3_3_1_30_2","unstructured":"Yiwei Ma Yijun Fan Jiayi Ji Haowei Wang Xiaoshuai Sun Guannan Jiang Annan Shu and Rongrong Ji. 2023. X-dreamer: Creating high-quality 3d content by bridging the domain gap between text-to-2d and text-to-3d generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.00085 (2023)."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"e_1_3_3_1_32_2","unstructured":"Alex Nichol Heewoo Jun Prafulla Dhariwal Pamela Mishkin and Mark Chen. 2022. Point-e: A system for generating 3d point clouds from complex prompts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.08751 (2022)."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00026"},{"key":"e_1_3_3_1_34_2","unstructured":"Ben Poole Ajay Jain Jonathan\u00a0T Barron and Ben Mildenhall. 2022. Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2209.14988 (2022)."},{"key":"e_1_3_3_1_35_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Qian Guocheng","year":"2024","unstructured":"Guocheng Qian, Jinjie Mai, Abdullah Hamdi, Jian Ren, Aliaksandr Siarohin, Bing Li, Hsin-Ying Lee, Ivan Skorokhodov, Peter Wonka, Sergey Tulyakov, and Bernard Ghanem. 2024. Magic123: One Image to High-Quality 3D Object Generation Using Both 2D and 3D Diffusion Priors. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=0jHkUDyEO9"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00946"},{"key":"e_1_3_3_1_37_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_1_38_2","first-page":"3536","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Rassin Royi","year":"2023","unstructured":"Royi Rassin, Eran Hirsch, Daniel Glickman, Shauli Ravfogel, Yoav Goldberg, and Gal Chechik. 2023. Linguistic Binding in Diffusion Models: Enhancing Attribute Correspondence through Attention Map Alignment. In Advances in Neural Information Processing Systems , A.\u00a0Oh, T.\u00a0Naumann, A.\u00a0Globerson, K.\u00a0Saenko, M.\u00a0Hardt, and S.\u00a0Levine (Eds.), Vol.\u00a036. Curran Associates, Inc., 3536\u20133559. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/0b08d733a5d45a547344c4e9d88bb8bc-Paper-Conference.pdf"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_3_1_41_2","unstructured":"Seed. 2025. Introduction to Techniques Used in Seed1.6. https:\/\/seed.bytedance.com\/zh\/seed1_6."},{"key":"e_1_3_3_1_42_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Shi Yichun","year":"2024","unstructured":"Yichun Shi, Peng Wang, Jianglong Ye, Mai Long, Kejie Li, and Xiao Yang. 2024. MVDream: Multi-view Diffusion for 3D Generation. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_43_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Sun Jingxiang","year":"2024","unstructured":"Jingxiang Sun, Bo Zhang, Ruizhi Shao, Lizhen Wang, Wen Liu, Zhenda Xie, and Yebin Liu. 2024. DreamCraft3D: Hierarchical 3D Generation with Bootstrapped Diffusion Prior. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_44_2","unstructured":"Shaorong Sun Shuchao Pang Yazhou Yao and Xiaoshui Huang. 2024. Comogen: A controllable text-to-3d multi-object generation framework. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.00590 (2024)."},{"key":"e_1_3_3_1_45_2","unstructured":"Boshi Tang Jianan Wang Zhiyong Wu and Lei Zhang. 2023. Stable score distillation for high-quality 3d generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.09305 (2023)."},{"key":"e_1_3_3_1_46_2","unstructured":"Jiaxiang Tang. 2022. Stable-dreamfusion: Text-to-3D with Stable-diffusion. https:\/\/github.com\/ashawkey\/stable-dreamfusion."},{"key":"e_1_3_3_1_47_2","unstructured":"Alexander Vilesov Pradyumna Chari and Achuta Kadambi. 2023. Cg3d: Compositional generation for text-to-3d via gaussian splatting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.17907 (2023)."},{"key":"e_1_3_3_1_48_2","unstructured":"Haofan Wang Matteo Spinelli Qixun Wang Xu Bai Zekui Qin and Anthony Chen. 2024. Instantstyle: Free lunch towards style-preserving in text-to-image generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.02733 (2024)."},{"key":"e_1_3_3_1_49_2","first-page":"20965","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Wang Hengyi","year":"2024","unstructured":"Hengyi Wang, Jingwen Wang, and Lourdes Agapito. 2024. Morpheus: Neural dynamic 360deg surface reconstruction from monocular rgb-d video. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 20965\u201320976."},{"key":"e_1_3_3_1_50_2","unstructured":"Peihao Wang Zhiwen Fan Dejia Xu Dilin Wang Sreyas Mohan Forrest Iandola Rakesh Ranjan Yilei Li Qiang Liu Zhangyang Wang et\u00a0al. 2023. Steindreamer: Variance reduction for text-to-3d score distillation via stein identity. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.00604 (2023)."},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28364"},{"key":"e_1_3_3_1_52_2","first-page":"8406","volume-title":"Advances in Neural Information Processing Systems","author":"Wang Zhengyi","year":"2023","unstructured":"Zhengyi Wang, Cheng Lu, Yikai Wang, Fan Bao, Chongxuan Li, Hang Su, and Jun Zhu. 2023. Prolificdreamer: High-fidelity and diverse text-to-3d generation with variational score distillation. In Advances in Neural Information Processing Systems , Vol.\u00a036. 8406\u20138441."},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00817"},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00944"},{"key":"e_1_3_3_1_55_2","unstructured":"Ling Yang Zixiang Zhang Junlin Han Bohan Zeng Runjia Li Philip Torr and Wentao Zhang. 2024. Semantic Score Distillation Sampling for Compositional Text-to-3D Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.09009 (2024)."},{"key":"e_1_3_3_1_56_2","unstructured":"Hu Ye Jun Zhang Sibo Liu Xiao Han and Wei Yang. 2023. Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.06721 (2023)."},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00649"},{"key":"e_1_3_3_1_58_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Yu Xin","year":"2024","unstructured":"Xin Yu, Yuan-Chen Guo, Yangguang Li, Ding Liang, Song-Hai Zhang, and Xiaojuan Qi. 2024. Text-to-3D with Classifier Score Distillation. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_59_2","unstructured":"Bohan Zeng Shanglin Li Yutang Feng Ling Yang Hong Li Sicheng Gao Jiaming Liu Conghui He Wentao Zhang Jianzhuang Liu et\u00a0al. 2023. Ipdreamer: Appearance-controllable 3d object generation with complex image prompts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.05375 (2023)."},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_1_61_2","first-page":"70","volume-title":"European Conference on Computer Vision","author":"Zhang Yang","year":"2024","unstructured":"Yang Zhang, Teoh\u00a0Tze Tzun, Lim\u00a0Wei Hern, and Kenji Kawaguchi. 2024. Enhancing semantic fidelity in text-to-image synthesis: Attention regulation in diffusion models. In European Conference on Computer Vision. Springer, 70\u201386."},{"key":"e_1_3_3_1_62_2","unstructured":"Junwei Zhou Xueting Li Lu Qi and Ming-Hsuan Yang. 2024. Layout-your-3D: Controllable and Precise 3D Generation with 2D Blueprint. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.15391 (2024)."}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:23:10Z","timestamp":1781536990000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810834"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":61,"alternative-id":["10.1145\/3805622.3810834","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810834","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}