{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,21]],"date-time":"2026-07-21T14:25:55Z","timestamp":1784643955685,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3680528.3687604","type":"proceedings-article","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T08:14:37Z","timestamp":1733213677000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["PALP: Prompt Aligned Personalization of Text-to-Image Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8423-3538","authenticated-orcid":false,"given":"Moab","family":"Arar","sequence":"first","affiliation":[{"name":"Tel-Aviv University, Tel Aviv, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2997-9601","authenticated-orcid":false,"given":"Andrey","family":"Voynov","sequence":"additional","affiliation":[{"name":"Google Research, Tel Aviv, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3037-3556","authenticated-orcid":false,"given":"Amir","family":"Hertz","sequence":"additional","affiliation":[{"name":"Google Research, Tel Aviv, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7628-7525","authenticated-orcid":false,"given":"Omri","family":"Avrahami","sequence":"additional","affiliation":[{"name":"Hebrew University of Jerusalem, Jerusalem, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1797-2143","authenticated-orcid":false,"given":"Shlomi","family":"Fruchter","sequence":"additional","affiliation":[{"name":"Google Research, Tel Aviv, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5419-3915","authenticated-orcid":false,"given":"Yael","family":"Pritch","sequence":"additional","affiliation":[{"name":"Google Research, Tel Aviv, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6777-7445","authenticated-orcid":false,"given":"Daniel","family":"Cohen-Or","sequence":"additional","affiliation":[{"name":"Tel Aviv University, Tel Aviv, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7082-7845","authenticated-orcid":false,"given":"Ariel","family":"Shamir","sequence":"additional","affiliation":[{"name":"Reichman University, Herzliya, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"publisher","unstructured":"Yuval Alaluf Elad Richardson Gal Metzer and Daniel Cohen-Or. 2023. A Neural Space-Time Representation for Text-to-Image Personalization. CoRR abs\/2305.15391 (2023). 10.48550\/ARXIV.2305.15391 arXiv:https:\/\/arXiv.org\/abs\/2305.15391","DOI":"10.48550\/ARXIV.2305.15391"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","unstructured":"Moab Arar Rinon Gal Yuval Atzmon Gal Chechik Daniel Cohen-Or Ariel Shamir and Amit\u00a0H. Bermano. 2023. Domain-Agnostic Tuning-Encoder for Fast Personalization of Text-To-Image Models. CoRR abs\/2307.06925 (2023). 10.48550\/ARXIV.2307.06925 arXiv:https:\/\/arXiv.org\/abs\/2307.06925","DOI":"10.48550\/ARXIV.2307.06925"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","unstructured":"Omri Avrahami Kfir Aberman Ohad Fried Daniel Cohen-Or and Dani Lischinski. 2023a. Break-A-Scene: Extracting Multiple Concepts from a Single Image. CoRR abs\/2305.16311 (2023). 10.48550\/ARXIV.2305.16311 arXiv:https:\/\/arXiv.org\/abs\/2305.16311","DOI":"10.48550\/ARXIV.2305.16311"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"publisher","unstructured":"Omri Avrahami Ohad Fried and Dani Lischinski. 2023b. Blended Latent Diffusion. ACM Trans. Graph. 42 4 (2023) 149:1\u2013149:11. 10.1145\/3592450https:\/\/dl.acm.org\/doi\/10.1145\/3592450","DOI":"10.1145\/3592450"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_41"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"crossref","unstructured":"Hila Chefer Yuval Alaluf Yael Vinker Lior Wolf and Daniel Cohen-Or. 2023a. Attend-and-excite: Attention-based semantic guidance for text-to-image diffusion models. ACM Transactions on Graphics (TOG) 42 4 (2023) 1\u201310.","DOI":"10.1145\/3592116"},{"key":"e_1_3_3_2_10_1","unstructured":"Hila Chefer Oran Lang Mor Geva Volodymyr Polosukhin Assaf Shocher Michal Irani Inbar Mosseri and Lior Wolf. 2023b. The Hidden Language of Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.00966 (2023)."},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","unstructured":"Wenhu Chen Hexiang Hu Yandong Li Nataniel Ruiz Xuhui Jia Ming-Wei Chang and William\u00a0W. Cohen. 2023. Subject-driven Text-to-Image Generation via Apprenticeship Learning. CoRR abs\/2304.00186 (2023). 10.48550\/ARXIV.2304.00186 arXiv:https:\/\/arXiv.org\/abs\/2304.00186","DOI":"10.48550\/ARXIV.2304.00186"},{"key":"e_1_3_3_2_12_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"e_1_3_3_2_13_1","unstructured":"Weixi Feng Xuehai He Tsu-Jui Fu Varun Jampani Arjun Akula Pradyumna Narayana Sugato Basu Xin\u00a0Eric Wang and William\u00a0Yang Wang. 2022. Training-free structured diffusion guidance for compositional text-to-image synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.05032 (2022)."},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_6"},{"key":"e_1_3_3_2_15_1","volume-title":"The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023","author":"Gal Rinon","year":"2023","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit\u00a0Haim Bermano, Gal Chechik, and Daniel Cohen-Or. 2023a. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net. https:\/\/openreview.net\/pdf?id=NAQvF08TcyG"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","unstructured":"Rinon Gal Moab Arar Yuval Atzmon Amit\u00a0H. Bermano Gal Chechik and Daniel Cohen-Or. 2023b. Encoder-based Domain Tuning for Fast Personalization of Text-to-Image Models. ACM Trans. Graph. 42 4 (2023) 150:1\u2013150:13. 10.1145\/3592133https:\/\/dl.acm.org\/doi\/10.1145\/3592133","DOI":"10.1145\/3592133"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","unstructured":"Rinon Gal Or Patashnik Haggai Maron Amit\u00a0H. Bermano Gal Chechik and Daniel Cohen-Or. 2022. StyleGAN-NADA: CLIP-guided domain adaptation of image generators. ACM Trans. Graph. 41 4 (2022) 141:1\u2013141:13. 10.1145\/3528223.3530164https:\/\/dl.acm.org\/doi\/10.1145\/3528223.3530164","DOI":"10.1145\/3528223.3530164"},{"key":"e_1_3_3_2_18_1","unstructured":"Ligong Han Yinxiao Li Han Zhang Peyman Milanfar Dimitris Metaxas and Feng Yang. 2023. Svdiff: Compact parameter space for diffusion fine-tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.11305 (2023)."},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","unstructured":"Amir Hertz Kfir Aberman and Daniel Cohen-Or. 2023a. Delta Denoising Score. CoRR abs\/2304.07090 (2023). 10.48550\/ARXIV.2304.07090 arXiv:https:\/\/arXiv.org\/abs\/2304.07090","DOI":"10.48550\/ARXIV.2304.07090"},{"key":"e_1_3_3_2_20_1","volume-title":"The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023","author":"Hertz Amir","year":"2023","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2023b. Prompt-to-Prompt Image Editing with Cross-Attention Control. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net. https:\/\/openreview.net\/pdf?id=_CDixzkzeyb"},{"key":"e_1_3_3_2_21_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems 33 (2020) 6840\u20136851."},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-Free Diffusion Guidance. CoRR abs\/2207.12598 (2022). 10.48550\/ARXIV.2207.12598 arXiv:https:\/\/arXiv.org\/abs\/2207.12598","DOI":"10.48550\/ARXIV.2207.12598"},{"key":"e_1_3_3_2_23_1","volume-title":"The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022","author":"Hu Edward\u00a0J.","year":"2022","unstructured":"Edward\u00a0J. Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022. OpenReview.net. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","unstructured":"Ziqi Huang Tianxing Wu Yuming Jiang Kelvin C.\u00a0K. Chan and Ziwei Liu. 2023. ReVersion: Diffusion-Based Relation Inversion from Images. CoRR abs\/2303.13495 (2023). 10.48550\/ARXIV.2303.13495 arXiv:https:\/\/arXiv.org\/abs\/2303.13495","DOI":"10.48550\/ARXIV.2303.13495"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","unstructured":"Shir Iluz Yael Vinker Amir Hertz Daniel Berio Daniel Cohen-Or and Ariel Shamir. 2023. Word-As-Image for Semantic Typography. ACM Trans. Graph. 42 4 (2023) 151:1\u2013151:11. 10.1145\/3592123https:\/\/dl.acm.org\/doi\/10.1145\/3592123","DOI":"10.1145\/3592123"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00190"},{"key":"e_1_3_3_2_27_1","unstructured":"Oren Katzir Or Patashnik Daniel Cohen-Or and Dani Lischinski. 2023. Noise-Free Score Distillation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.17590 (2023)."},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00774"},{"key":"e_1_3_3_2_32_1","unstructured":"Alex Nichol Prafulla Dhariwal Aditya Ramesh Pranav Shyam Pamela Mishkin Bob McGrew Ilya Sutskever and Mark Chen. 2021. Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2112.10741 (2021)."},{"key":"e_1_3_3_2_33_1","unstructured":"Lianyu Pang Jian Yin Haoran Xie Qiping Wang Qing Li and Xudong Mao. 2023. Cross initialization for personalized text-to-image generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.15905 (2023)."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"crossref","unstructured":"Gaurav Parmar Krishna\u00a0Kumar Singh Richard Zhang Yijun Li Jingwan Lu and Jun-Yan Zhu. 2023. Zero-shot Image-to-Image Translation. arxiv:https:\/\/arXiv.org\/abs\/2302.03027\u00a0[cs.CV]","DOI":"10.1145\/3588432.3591513"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"e_1_3_3_2_36_1","unstructured":"Quynh Phung Songwei Ge and Jia-Bin Huang. 2023. Grounded Text-to-Image Synthesis with Attention Refocusing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.05427 (2023)."},{"key":"e_1_3_3_2_37_1","volume-title":"The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023","author":"Poole Ben","year":"2023","unstructured":"Ben Poole, Ajay Jain, Jonathan\u00a0T. Barron, and Ben Mildenhall. 2023. DreamFusion: Text-to-3D using 2D Diffusion. In The Eleventh International Conference on Learning Representations, ICLR 2023, Kigali, Rwanda, May 1-5, 2023. OpenReview.net. https:\/\/openreview.net\/pdf?id=FjNys5c7VyY"},{"key":"e_1_3_3_2_38_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. CoRR abs\/2103.00020 (2021). arXiv:https:\/\/arXiv.org\/abs\/2103.00020https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"e_1_3_3_2_39_1","unstructured":"Aditya Ramesh Mikhail Pavlov Gabriel Goh Scott Gray Chelsea Voss Alec Radford Mark Chen and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. CoRR abs\/2102.12092 (2021). arXiv:https:\/\/arXiv.org\/abs\/2102.12092https:\/\/arxiv.org\/abs\/2102.12092"},{"key":"e_1_3_3_2_40_1","unstructured":"Royi Rassin Eran Hirsch Daniel Glickman Shauli Ravfogel Yoav Goldberg and Gal Chechik. 2023. Linguistic Binding in Diffusion Models: Enhancing Attribute Correspondence through Attention Map Alignment. arxiv:https:\/\/arXiv.org\/abs\/2306.08877\u00a0[cs.CL]"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_3_2_43_1","unstructured":"Simo Ryu. 2023. Low-rank Adaptation for Fast Text-to-Image Diffusion Fine-tuning. https:\/\/github.com\/cloneofsimo\/lora."},{"key":"e_1_3_3_2_44_1","unstructured":"Chitwan Saharia William Chan Saurabh Saxena Lala Li Jay Whang Emily Denton Seyed Kamyar\u00a0Seyed Ghasemipour Burcu\u00a0Karagol Ayan S\u00a0Sara Mahdavi Rapha\u00a0Gontijo Lopes et\u00a0al. 2022. Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.11487 (2022)."},{"key":"e_1_3_3_2_45_1","unstructured":"Christoph Schuhmann Richard Vencu Romain Beaumont Robert Kaczmarczyk Clayton Mullis Aarush Katta Theo Coombes Jenia Jitsev and Aran Komatsuzaki. 2021. Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2111.02114 (2021)."},{"key":"e_1_3_3_2_46_1","unstructured":"Eyal Segalis Dani Valevski Danny Lumen Yossi Matias and Yaniv Leviathan. 2023. A Picture is Worth a Thousand Words: Principled Recaptioning Improves Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.16656 (2023)."},{"key":"e_1_3_3_2_47_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","unstructured":"Kunpeng Song Ligong Han Bingchen Liu Dimitris\u00a0N. Metaxas and Ahmed Elgammal. 2022. Diffusion Guided Domain Adaptation of Image Generators. CoRR abs\/2212.04473 (2022). 10.48550\/ARXIV.2212.04473 arXiv:https:\/\/arXiv.org\/abs\/2212.04473","DOI":"10.48550\/ARXIV.2212.04473"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591506"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"e_1_3_3_2_51_1","unstructured":"Dani Valevski Matan Kalman Yossi Matias and Yaniv Leviathan. 2022. Unitune: Text-driven image editing by fine tuning an image generation model on a single image. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.09477 (2022)."},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","unstructured":"Dani Valevski Danny Wasserman Yossi Matias and Yaniv Leviathan. 2023. Face0: Instantaneously Conditioning a Text-to-Image Model on a Face. CoRR abs\/2306.06638 (2023). 10.48550\/ARXIV.2306.06638 arXiv:https:\/\/arXiv.org\/abs\/2306.06638","DOI":"10.48550\/ARXIV.2306.06638"},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"publisher","unstructured":"Yael Vinker Andrey Voynov Daniel Cohen-Or and Ariel Shamir. 2023. Concept Decomposition for Visual Exploration and Inspiration. CoRR abs\/2305.18203 (2023). 10.48550\/ARXIV.2305.18203 arXiv:https:\/\/arXiv.org\/abs\/2305.18203","DOI":"10.48550\/ARXIV.2305.18203"},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","unstructured":"Andrey Voynov Qinghao Chu Daniel Cohen-Or and Kfir Aberman. 2023. P+: Extended Textual Conditioning in Text-to-Image Generation. CoRR abs\/2303.09522 (2023). 10.48550\/ARXIV.2303.09522 arXiv:https:\/\/arXiv.org\/abs\/2303.09522","DOI":"10.48550\/ARXIV.2303.09522"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","unstructured":"Zhengyi Wang Cheng Lu Yikai Wang Fan Bao Chongxuan Li Hang Su and Jun Zhu. 2023. ProlificDreamer: High-Fidelity and Diverse Text-to-3D Generation with Variational Score Distillation. CoRR abs\/2305.16213 (2023). 10.48550\/ARXIV.2305.16213 arXiv:https:\/\/arXiv.org\/abs\/2305.16213","DOI":"10.48550\/ARXIV.2305.16213"},{"key":"e_1_3_3_2_56_1","unstructured":"Yuxiang Wei Yabo Zhang Zhilong Ji Jinfeng Bai Lei Zhang and Wangmeng Zuo. 2023. Elite: Encoding visual concepts into textual embeddings for customized text-to-image generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13848 (2023)."},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"publisher","unstructured":"Qiucheng Wu Yujian Liu Handong Zhao Trung Bui Zhe Lin Yang Zhang and Shiyu Chang. 2023. Harnessing the Spatial-Temporal Attention of Diffusion Models for High-Fidelity Text-to-Image Synthesis. CoRR abs\/2304.03869 (2023). 10.48550\/ARXIV.2304.03869 arXiv:https:\/\/arXiv.org\/abs\/2304.03869","DOI":"10.48550\/ARXIV.2304.03869"},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"publisher","unstructured":"Hu Ye Jun Zhang Sibo Liu Xiao Han and Wei Yang. 2023. IP-Adapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models. CoRR abs\/2308.06721 (2023). 10.48550\/ARXIV.2308.06721 arXiv:https:\/\/arXiv.org\/abs\/2308.06721","DOI":"10.48550\/ARXIV.2308.06721"},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","unstructured":"Yufan Zhou Ruiyi Zhang Tong Sun and Jinhui Xu. 2023. Enhancing Detail Preservation for Customized Text-to-Image Generation: A Regularization-Free Approach. CoRR abs\/2305.13579 (2023). 10.48550\/ARXIV.2305.13579 arXiv:https:\/\/arXiv.org\/abs\/2305.13579","DOI":"10.48550\/ARXIV.2305.13579"}],"event":{"name":"SA '24: SIGGRAPH Asia 2024 Conference Papers","location":"Tokyo Japan","acronym":"SA '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["SIGGRAPH Asia 2024 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687604","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3680528.3687604","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:26Z","timestamp":1750294706000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687604"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":58,"alternative-id":["10.1145\/3680528.3687604","10.1145\/3680528"],"URL":"https:\/\/doi.org\/10.1145\/3680528.3687604","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}