{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:47:53Z","timestamp":1774021673626,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","funder":[{"name":"Beijing Natural Science Foundation","award":["221013, QY24384"],"award-info":[{"award-number":["221013, QY24384"]}]},{"name":"Beijing Training Program of Innovation and Entrepreneurship for Undergraduates","award":["S202414430024"],"award-info":[{"award-number":["S202414430024"]}]},{"DOI":"10.13039\/100020595","name":"National Science and Technology Council","doi-asserted-by":"publisher","award":["111-2221-E-006-112-MY3"],"award-info":[{"award-number":["111-2221-E-006-112-MY3"]}],"id":[{"id":"10.13039\/100020595","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Deutsche Forschungsgemeinschaft (DFG, German Research Foundation)","award":["EXC 2117 ? 422037984"],"award-info":[{"award-number":["EXC 2117 ? 422037984"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730670","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["IP-Prompter: Training-Free Theme-Specific Image Generation via Dynamic Visual Prompting"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6433-2678","authenticated-orcid":false,"given":"Yuxin","family":"Zhang","sequence":"first","affiliation":[{"name":"MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2675-1650","authenticated-orcid":false,"given":"Minyan","family":"Luo","sequence":"additional","affiliation":[{"name":"MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6502-145X","authenticated-orcid":false,"given":"Weiming","family":"Dong","sequence":"additional","affiliation":[{"name":"MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2411-3594","authenticated-orcid":false,"given":"Xiao","family":"Yang","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7787-6428","authenticated-orcid":false,"given":"Haibin","family":"Huang","sequence":"additional","affiliation":[{"name":"ByteDance Inc., San Jose, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8243-9513","authenticated-orcid":false,"given":"Chongyang","family":"Ma","sequence":"additional","affiliation":[{"name":"ByteDance Inc., San Jose, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5803-2185","authenticated-orcid":false,"given":"Oliver","family":"Deussen","sequence":"additional","affiliation":[{"name":"University of Konstanz, Konstanz, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6699-2944","authenticated-orcid":false,"given":"Tong-Yee","family":"Lee","sequence":"additional","affiliation":[{"name":"National Cheng-Kung University, Tainan, Taiwan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8343-9665","authenticated-orcid":false,"given":"Changsheng","family":"Xu","sequence":"additional","affiliation":[{"name":"MAIS, Institute of Automation, Chinese Academy of Sciences, Beijing, China and School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00290"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657423"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618154"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1813"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00630"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00840"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00662"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00868"},{"key":"e_1_3_3_2_10_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Gal Rinon","year":"2023","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit\u00a0H Bermano, Gal Chechik, and Daniel Cohen-Or. 2023. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_11_1","first-page":"322","volume-title":"European Conference on Computer Vision (ECCV)","author":"Gal Rinon","year":"2024","unstructured":"Rinon Gal, Or Lichter, Elad Richardson, Or Patashnik, Amit\u00a0H Bermano, Gal Chechik, and Daniel Cohen-Or. 2024. LCM-Lookahead for Encoder-based Text-to-Image Personalization. In European Conference on Computer Vision (ECCV). Springer, 322\u2013340."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618184"},{"key":"e_1_3_3_2_13_1","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Gu Yuchao","year":"2024","unstructured":"Yuchao Gu, Xintao Wang, Jay\u00a0Zhangjie Wu, Yujun Shi, Yunpeng Chen, Zihan Fan, Wuyou Xiao, Rui Zhao, Shuning Chang, Weijia Wu, et\u00a0al. 2024a. Mix-of-Show: Decentralized Low-Rank Adaptation for Multi-Concept Customization of Diffusion Models. In Advances in Neural Information Processing Systems (NeurIPS) , Vol.\u00a036."},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"crossref","unstructured":"Zheng Gu Shiyuan Yang Jing Liao Jing Huo and Yang Gao. 2024b. Analogist: Out-of-the-box Visual In-Context Learning with Image Diffusion Model. ACM Transactions on Graphics 43 4 Article 130 (July 2024) 15\u00a0pages.","DOI":"10.1145\/3658136"},{"key":"e_1_3_3_2_15_1","unstructured":"Yingqing He Menghan Xia Haoxin Chen Xiaodong Cun Yuan Gong Jinbo Xing Yong Zhang Xintao Wang Chao Weng Ying Shan et\u00a0al. 2023. Animate-A-Story: Storytelling with Retrieval-Augmented Video Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.06940 (2023)."},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00457"},{"key":"e_1_3_3_2_17_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Hu Edward\u00a0J","year":"2021","unstructured":"Edward\u00a0J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_18_1","unstructured":"Lianghua Huang Wei Wang Zhi-Fan Wu Huanzhang Dou Yupeng Shi Yutong Feng Chen Liang Yu Liu and Jingren Zhou. 2024a. Group diffusion transformers are unsupervised multitask learners. (2024)."},{"key":"e_1_3_3_2_19_1","unstructured":"Lianghua Huang Wei Wang Zhi-Fan Wu Yupeng Shi Huanzhang Dou Chen Liang Yutong Feng Yu Liu and Jingren Zhou. 2024b. In-context lora for diffusion transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.23775 (2024)."},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687658"},{"key":"e_1_3_3_2_21_1","unstructured":"Sangwon Jang Jaehyeong Jo Kimin Lee and Sung\u00a0Ju Hwang. 2024. Identity decoupling for multi-subject personalization of text-to-image models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.04243 (2024)."},{"key":"e_1_3_3_2_22_1","unstructured":"Jaeseok Jeong Junho Kim Yunjey Choi Gayoung Lee and Youngjung Uh. 2024. Visual Style Prompting with Swapping Self-Attention. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.12974 (2024)."},{"key":"e_1_3_3_2_23_1","unstructured":"Jiaxiu Jiang Yabo Zhang Kailai Feng Xiaohe Wu and Wangmeng Zuo. 2024. MC2: Multi-concept Guidance for Customized Multi-concept Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.05268 (2024)."},{"key":"e_1_3_3_2_24_1","volume-title":"International Conference on Machine Learning (ICML)","author":"Jin Chen","year":"2024","unstructured":"Chen Jin, Ryutaro Tanno, Amrutha Saseendran, Tom Diethe, and Philip\u00a0Alexander Teare. 2024. An Image is Worth Multiple Words: Discovering Object Level Concepts using Multi-Concept Prompt Learning. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_3_2_25_1","unstructured":"Gao Junyao Liu Yanchen Sun Yanan Tang Yinhao Zeng Yanhong Chen Kai and Zhao Cairong. 2024. StyleShot: A Snapshot on Any Style. arXiv preprint arxiv:https:\/\/arXiv.org\/abs\/2407.01414 (2024)."},{"key":"e_1_3_3_2_26_1","first-page":"253","volume-title":"European Conference on Computer Vision (ECCV)","author":"Kong Zhe","year":"2024","unstructured":"Zhe Kong, Yong Zhang, Tianyu Yang, Tao Wang, Kaihao Zhang, Bizhu Wu, Guanying Chen, Wei Liu, and Wenhan Luo. 2024. OMG: Occlusion-friendly Personalized Multi-concept Generation in Diffusion Models. In European Conference on Computer Vision (ECCV). Springer, 253\u2013270."},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_3_2_29_1","unstructured":"Black\u00a0Forest Labs. 2023. FLUX. https:\/\/github.com\/black-forest-labs\/flux."},{"key":"e_1_3_3_2_30_1","first-page":"110","volume-title":"European Conference on Computer Vision (ECCV)","author":"Li Wen","year":"2024","unstructured":"Wen Li, Muyuan Fang, Cheng Zou, Biao Gong, Ruobing Zheng, Meng Wang, Jingdong Chen, and Ming Yang. 2024b. StyleTokenizer: Defining Image Style by a Single Instance for Controlling Diffusion Models. In European Conference on Computer Vision (ECCV). Springer, 110\u2013126."},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00825"},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00592"},{"key":"e_1_3_3_2_33_1","volume-title":"International Conference on Machine Learning (ICML)","author":"Liu Zhiheng","year":"2023","unstructured":"Zhiheng Liu, Ruili Feng, Kai Zhu, Yifei Zhang, Kecheng Zheng, Yu Liu, Deli Zhao, Jingren Zhou, and Yang Cao. 2023. Cones: Concept Neurons in Diffusion Models for Customized Generation. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_5"},{"key":"e_1_3_3_2_35_1","unstructured":"Jiawei Mao Xiaoke Huang Yunfei Xie Yuanqi Chang Mude Hui Bingjie Xu and Yuyin Zhou. 2024. Story-Adapter: A Training-free Iterative Framework for Long Story Visualization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.06244 (2024)."},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"crossref","unstructured":"Gaurav Parmar Or Patashnik Kuan-Chieh Wang Daniil Ostashev Srinivasa Narasimhan Jun-Yan Zhu Daniel Cohen-Or and Kfir Aberman. 2025. Object-level Visual Prompts for Compositional Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.01424 (2025).","DOI":"10.1145\/3757377.3763867"},{"key":"e_1_3_3_2_38_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"crossref","unstructured":"Senthil Purushwalkam Akash Gokul Shafiq Joty and Nikhil Naik. 2024. Bootpig: Bootstrapping zero-shot personalized image generation capabilities in pretrained diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.13974 (2024).","DOI":"10.1007\/978-3-031-91907-7_15"},{"key":"e_1_3_3_2_40_1","first-page":"8748","volume-title":"International Conference on Machine Learning (ICML)","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning (ICML). 8748\u20138763."},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"crossref","unstructured":"Elad Richardson Kfir Goldberg Yuval Alaluf and Daniel Cohen-Or. 2024. ConceptLab: Creative Concept Generation using VLM-Guided Diffusion Prior Constraints. ACM Transactions on Graphics 43 3 Article 34 (June 2024) 14\u00a0pages.","DOI":"10.1145\/3659578"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73232-4_24"},{"key":"e_1_3_3_2_44_1","unstructured":"Chaehun Shin Jooyoung Choi Heeseung Kim and Sungroh Yoon. 2024. Large-Scale Text-to-Image Model with Inpainting is a Zero-Shot Subject-Driven Image Generator. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.15466 (2024)."},{"key":"e_1_3_3_2_45_1","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Sohn Kihyuk","year":"2024","unstructured":"Kihyuk Sohn, Lu Jiang, Jarred Barber, Kimin Lee, Nataniel Ruiz, Dilip Krishnan, Huiwen Chang, Yuanzhen Li, Irfan Essa, Michael Rubinstein, and Dilip Krishnan. 2024. StyleDrop: Text-to-Image Synthesis of Any Style. In Advances in Neural Information Processing Systems (NeurIPS) , Vol.\u00a036."},{"key":"e_1_3_3_2_46_1","unstructured":"Zhenxiong Tan Songhua Liu Xingyi Yang Qiaochu Xue and Xinchao Wang. 2024. Ominicontrol: Minimal and universal control for diffusion transformer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.15098 3 (2024)."},{"key":"e_1_3_3_2_47_1","first-page":"479","volume-title":"European Conference on Computer Vision (ECCV)","author":"Tao Ming","year":"2024","unstructured":"Ming Tao, Bing-Kun Bao, Hao Tang, Yaowei Wang, and Changsheng Xu. 2024. StoryImager: A Unified and Efficient Framework for Coherent Story Visualization and Completion. In European Conference on Computer Vision (ECCV) (Milan, Italy). Springer-Verlag, Berlin, Heidelberg, 479\u2013495."},{"key":"e_1_3_3_2_48_1","unstructured":"Kolors Team. 2024. Kolors-Character. https:\/\/huggingface.co\/spaces\/Kwai-Kolors\/Kolors-Character-With-Flux."},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"crossref","unstructured":"Yoad Tewel Omri Kaduri Rinon Gal Yoni Kasten Lior Wolf Gal Chechik and Yuval Atzmon. 2024. Training-Free Consistent Text-to-Image Generation. ACM Transactions on Graphics 43 4 Article 52 (July 2024) 18\u00a0pages.","DOI":"10.1145\/3658157"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"crossref","unstructured":"Yael Vinker Andrey Voynov Daniel Cohen-Or and Ariel Shamir. 2023. Concept Decomposition for Visual Exploration and Inspiration. ACM Transactions on Graphics 42 6 Article 241 (Dec. 2023) 13\u00a0pages.","DOI":"10.1145\/3618315"},{"key":"e_1_3_3_2_51_1","unstructured":"Haofan Wang Matteo Spinelli Qixun Wang Xu Bai Zekui Qin and Anthony Chen. 2024b. InstantStyle: Free Lunch towards Style-Preserving in Text-to-Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.02733 (2024)."},{"key":"e_1_3_3_2_52_1","unstructured":"Qixun Wang Xu Bai Haofan Wang Zekui Qin Anthony Chen Huaxia Li Xu Tang and Yao Hu. 2024a. InstantID: Zero-shot Identity-Preserving Generation in Seconds. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.07519 (2024)."},{"key":"e_1_3_3_2_53_1","unstructured":"Wen Wang Canyu Zhao Hao Chen Zhekai Chen Kecheng Zheng and Chunhua Shen. 2024c. AutoStory: Generating Diverse Storytelling Images with Minimal Human Effort. International Journal of Computer Vision (2024) 1\u201322."},{"key":"e_1_3_3_2_54_1","unstructured":"Xuezhi Wang Jason Wei Dale Schuurmans Quoc Le Ed Chi Sharan Narang Aakanksha Chowdhery and Denny Zhou. 2022. Self-consistency improves chain of thought reasoning in language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.11171 (2022)."},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00713"},{"key":"e_1_3_3_2_57_1","unstructured":"Yu Xu Fan Tang Juan Cao Yuxin Zhang Oliver Deussen Weiming Dong Jintao Li and Tong-Yee Lee. 2024. Break-for-Make: Modular Low-Rank Adaptations for Composable Content-Style Customization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.19456 (2024)."},{"key":"e_1_3_3_2_58_1","unstructured":"Shuai Yang Yuying Ge Yang Li Yukang Chen Yixiao Ge Ying Shan and Yingcong Chen. 2024. SEED-Story: Multimodal Long Story Generation with Large Language Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.08683 (2024). https:\/\/arxiv.org\/abs\/2407.08683"},{"key":"e_1_3_3_2_59_1","unstructured":"Xianjun Yang Wei Cheng Xujiang Zhao Wenchao Yu Linda Petzold and Haifeng Chen. 2023. Dynamic prompting: A unified framework for prompt tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.02909 (2023)."},{"key":"e_1_3_3_2_60_1","unstructured":"Hu Ye Jun Zhang Sibo Liu Xiao Han and Wei Yang. 2023. IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models. arXiv preprint arxiv:https:\/\/arXiv.org\/abs\/2308.06721 (2023)."},{"key":"e_1_3_3_2_61_1","unstructured":"Chun-Hsiao Yeh Ta-Ying Cheng He-Yen Hsieh Chuan-En Lin Yi Ma Andrew Markham Niki Trigoni Hsiang-Tsung Kung and Yubei Chen. 2024. Gen4Gen: Generative Data Pipeline for Generative Multi-Concept Composition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.15504 (2024)."},{"key":"e_1_3_3_2_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680894"},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00648"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_65_1","doi-asserted-by":"crossref","unstructured":"Yuxin Zhang Weiming Dong Fan Tang Nisha Huang Haibin Huang Chongyang Ma Tong-Yee Lee Oliver Deussen and Changsheng Xu. 2023a. ProSpect: Prompt Spectrum for Attribute-Aware Personalization of Diffusion Models. ACM Transactions on Graphics 42 6 Article 244 (dec 2023) 14\u00a0pages.","DOI":"10.1145\/3618342"},{"key":"e_1_3_3_2_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00978"},{"key":"e_1_3_3_2_67_1","unstructured":"Yang Zhang Rui Zhang Xuecheng Nie Haochen Li Jikun Chen Yifan Hao Xin Zhang Luoqi Liu and Ling Li. 2024. SPDiffusion: Semantic Protection Diffusion for Multi-concept Text-to-image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.01327 (2024)."},{"key":"e_1_3_3_2_68_1","first-page":"17773","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Zhang Yuanhan","year":"2023","unstructured":"Yuanhan Zhang, Kaiyang Zhou, and Ziwei Liu. 2023d. What makes good examples for visual in-context learning?. In Advances in Neural Information Processing Systems (NeurIPS) , Vol.\u00a036. 17773\u201317794."},{"key":"e_1_3_3_2_69_1","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","author":"Zhou Yupeng","year":"2024","unstructured":"Yupeng Zhou, Daquan Zhou, Ming-Ming Cheng, Jiashi Feng, and Qibin Hou. 2024b. StoryDiffusion: Consistent Self-Attention for Long-Range Image and Video Generation. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_70_1","unstructured":"Zhengguang Zhou Jing Li Huaxia Li Nemo Chen and Xu Tang. 2024a. StoryMaker: Towards Holistic Consistent Characters in Text-to-image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.12576 (2024)."},{"key":"e_1_3_3_2_71_1","unstructured":"Zhuofan Zong Dongzhi Jiang Bingqi Ma Guanglu Song Hao Shao Dazhong Shen Yu Liu and Hongsheng Li. 2024. EasyRef: Omni-Generalized Group Image Reference for Diffusion Models via Multimodal LLM. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.09618 (2024)."}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730670","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:54:08Z","timestamp":1774018448000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730670"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":70,"alternative-id":["10.1145\/3721238.3730670","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730670","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}