{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T19:42:35Z","timestamp":1783107755740,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":105,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Packard Fellowship"},{"name":"Amazon Faculty Research Award"},{"name":"NSF IIS-2239076,"},{"DOI":"10.13039\/100015597","name":"Siebel Scholars Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015597","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3680528.3687642","type":"proceedings-article","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T08:14:37Z","timestamp":1733213677000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["Customizing Text-to-Image Models with a Single Image Pair"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-6289-7007","authenticated-orcid":false,"given":"Maxwell","family":"Jones","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4000-2046","authenticated-orcid":false,"given":"Sheng-Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1799-1069","authenticated-orcid":false,"given":"Nupur","family":"Kumari","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1744-6765","authenticated-orcid":false,"given":"David","family":"Bau","sequence":"additional","affiliation":[{"name":"Northeastern University, Pittsburgh, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8504-3410","authenticated-orcid":false,"given":"Jun-Yan","family":"Zhu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, United States of America"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"key":"e_1_3_3_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3618322"},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618173"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618154"},{"key":"e_1_3_3_2_5_1","unstructured":"Yogesh Balaji Seungjun Nah Xun Huang Arash Vahdat Jiaming Song Karsten Kreis Miika Aittala Timo Aila Samuli Laine Bryan Catanzaro et\u00a0al. 2022. ediffi: Text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.01324 (2022)."},{"key":"e_1_3_3_2_6_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Bar Amir","year":"2022","unstructured":"Amir Bar, Yossi Gandelsman, Trevor Darrell, Amir Globerson, and Alexei\u00a0A. Efros. 2022. Visual Prompting via Image Inpainting. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_21"},{"key":"e_1_3_3_2_8_1","unstructured":"Manuel Brack Felix Friedrich Katharina Kornmeier Linoy Tsaban Patrick Schramowski Kristian Kersting and Apolin\u00e1rio Passos. 2023. LEDITS++: Limitless Image Editing using Text-to-Image Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.16711 (2023)."},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_3_2_10_1","unstructured":"Minwoo Byeon Beomhee Park Haecheon Kim Sungjun Lee Woonhyuk Baek and Saehoon Kim. 2022. COYO-700M: Image-Text Pair Dataset. https:\/\/github.com\/kakaobrain\/coyo-dataset."},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_3_2_12_1","first-page":"4055","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Chang Huiwen","year":"2023","unstructured":"Huiwen Chang, Han Zhang, Jarred Barber, AJ Maschinot, Jos\u00e9 Lezama, Lu Jiang, Ming-Hsuan Yang, Kevin Murphy, William\u00a0T Freeman, Michael Rubinstein, et\u00a0al. 2023. Muse: Text-to-image generation via masked generative transformers. In Proceedings of the 40th International Conference on Machine Learning. 4055\u20134075."},{"key":"e_1_3_3_2_13_1","unstructured":"Tian\u00a0Qi Chen and Mark Schmidt. 2016. Fast patch-based style transfer of arbitrary style. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1612.04337 (2016)."},{"key":"e_1_3_3_2_14_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Chen Wenhu","year":"2023","unstructured":"Wenhu Chen, Hexiang Hu, Yandong Li, Nataniel Rui, Xuhui Jia, Ming-Wei Chang, and William\u00a0W Cohen. 2023a. Subject-driven text-to-image generation via apprenticeship learning. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_15_1","unstructured":"Xi Chen Lianghua Huang Yu Liu Yujun Shen Deli Zhao and Hengshuang Zhao. 2023b. Anydoor: Zero-shot object-level image customization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.09481 (2023)."},{"key":"e_1_3_3_2_16_1","unstructured":"Giannis Daras and Alexandros\u00a0G Dimakis. 2022. Multiresolution textual inversion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.17115 (2022)."},{"key":"e_1_3_3_2_17_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Dinh Laurent","year":"2017","unstructured":"Laurent Dinh, Jascha Sohl-Dickstein, and Samy Bengio. 2017. Density estimation using real nvp. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_18_1","unstructured":"Stephanie Fu Netanel Tamir Shobhita Sundaram Lucy Chai Richard Zhang Tali Dekel and Phillip Isola. 2023. DreamSim: Learning New Dimensions of Human Visual Similarity using Synthetic Data. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.09344 (2023)."},{"key":"e_1_3_3_2_19_1","volume-title":"The Eleventh International Conference on Learning Representations","author":"Gal Rinon","year":"2022","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit\u00a0Haim Bermano, Gal Chechik, and Daniel Cohen-or. 2022a. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"crossref","unstructured":"Rinon Gal Moab Arar Yuval Atzmon Amit\u00a0H Bermano Gal Chechik and Daniel Cohen-Or. 2023a. Designing an encoder for fast personalization of text-to-image models. ACM Transactions on Graphics (TOG) (2023).","DOI":"10.1145\/3592133"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"crossref","unstructured":"Rinon Gal Moab Arar Yuval Atzmon Amit\u00a0H Bermano Gal Chechik and Daniel Cohen-Or. 2023b. Encoder-based domain tuning for fast personalization of text-to-image models. ACM Transactions on Graphics (TOG) 42 4 (2023) 1\u201313.","DOI":"10.1145\/3592133"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"crossref","unstructured":"Rinon Gal Or Patashnik Haggai Maron Amit\u00a0H Bermano Gal Chechik and Daniel Cohen-Or. 2022b. Stylegan-nada: Clip-guided domain adaptation of image generators. ACM Transactions on Graphics (TOG) 41 4 (2022) 1\u201313.","DOI":"10.1145\/3528223.3530164"},{"key":"e_1_3_3_2_23_1","unstructured":"Rohit Gandikota Joanna Materzynska Tingrui Zhou Antonio Torralba and David Bau. 2023. Concept sliders: Lora adaptors for precise control in diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.12092 (2023)."},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"crossref","unstructured":"Daniel Garibi Or Patashnik Andrey Voynov Hadar Averbuch-Elor and Daniel Cohen-Or. 2024. ReNoise: Real Image Inversion Through Iterative Noising. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.14602 (2024).","DOI":"10.1007\/978-3-031-72630-9_23"},{"key":"e_1_3_3_2_25_1","unstructured":"Leon\u00a0A Gatys Alexander\u00a0S Ecker and Matthias Bethge. 2015. A neural algorithm of artistic style. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1508.06576 (2015)."},{"key":"e_1_3_3_2_26_1","unstructured":"Aaron Gokaslan A\u00a0Feder Cooper Jasmine Collins Landan Seguin Austin Jacobson Mihir Patel Jonathan Frankle Cory Stephenson and Volodymyr Kuleshov. 2023. CommonCanvas: An Open Diffusion Model Trained with Creative-Commons Images. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.16825 (2023)."},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"crossref","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2020. Generative adversarial networks. Commun. ACM 63 11 (2020) 139\u2013144.","DOI":"10.1145\/3422622"},{"key":"e_1_3_3_2_28_1","unstructured":"Yuchao Gu Xintao Wang Jay\u00a0Zhangjie Wu Yujun Shi Yunpeng Chen Zihan Fan Wuyou Xiao Rui Zhao Shuning Chang Weijia Wu et\u00a0al. 2024. Mix-of-show: Decentralized low-rank adaptation for multi-concept customization of diffusion models. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_29_1","unstructured":"Yuwei Guo Ceyuan Yang Anyi Rao Zhengyang Liang Yaohui Wang Yu Qiao Maneesh Agrawala Dahua Lin and Bo Dai. 2023. Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.04725 (2023)."},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00673"},{"key":"e_1_3_3_2_31_1","unstructured":"Amir Hertz Andrey Voynov Shlomi Fruchter and Daniel Cohen-Or. 2023. Style aligned image generation via shared attention. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.02133 (2023)."},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"crossref","unstructured":"Aaron Hertzmann Charles\u00a0E. Jacobs Nuria Oliver Brian Curless and David\u00a0H. Salesin. 2001. Image Analogies.","DOI":"10.1145\/383259.383295"},{"key":"e_1_3_3_2_33_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_34_1","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.12598 (2022)."},{"key":"e_1_3_3_2_35_1","first-page":"2790","volume-title":"International Conference on Machine Learning","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De\u00a0Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-efficient transfer learning for NLP. In International Conference on Machine Learning. PMLR, 2790\u20132799."},{"key":"e_1_3_3_2_36_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Hu Edward\u00a0J","year":"2021","unstructured":"Edward\u00a0J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.5143773"},{"key":"e_1_3_3_2_39_1","volume-title":"Eurographics Conference on Rendering Techniques","author":"Irony Revital","year":"2005","unstructured":"Revital Irony, Daniel Cohen-Or, and Dani Lischinski. 2005. Colorization by Example. In Eurographics Conference on Rendering Techniques."},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"e_1_3_3_2_42_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Karras Tero","year":"2020","unstructured":"Tero Karras, Miika Aittala, Janne Hellsten, Samuli Laine, Jaakko Lehtinen, and Timo Aila. 2020a. Training generative adversarial networks with limited data. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_3_2_45_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Kingma Diederik\u00a0P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2014. Auto-encoding variational bayes. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_3_2_47_1","unstructured":"Dongxu Li Junnan Li and Steven Hoi. 2024. Blip-diffusion: Pre-trained subject representation for controllable text-to-image generation and editing. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"e_1_3_3_2_49_1","unstructured":"Yanghao Li Naiyan Wang Jiaying Liu and Xiaodi Hou. 2017. Demystifying neural style transfer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1701.01036 (2017)."},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"crossref","unstructured":"Jing Liao Yuan Yao Lu Yuan Gang Hua and Sing\u00a0Bing Kang. 2017. Visual Attribute Transfer through Deep Image Analogy. ACM Trans. Graph. 36 4 Article 120 (jul 2017) 15\u00a0pages.","DOI":"10.1145\/3072959.3073683"},{"key":"e_1_3_3_2_51_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Liu Luping","year":"2022","unstructured":"Luping Liu, Yi Ren, Zhijie Lin, and Zhou Zhao. 2022b. Pseudo numerical methods for diffusion models on manifolds. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_26"},{"key":"e_1_3_3_2_53_1","volume-title":"International Conference on Machine Learning (ICML)","author":"Liu Zhiheng","year":"2023","unstructured":"Zhiheng Liu, Ruili Feng, Kai Zhu, Yifei Zhang, Kecheng Zheng, Yu Liu, Deli Zhao, Jingren Zhou, and Yang Cao. 2023. Cones: Concept neurons in diffusion models for customized generation. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_3_2_54_1","volume-title":"International Conference on Learning Representations","author":"Loshchilov Ilya","year":"2018","unstructured":"Ilya Loshchilov and Frank Hutter. 2018. Decoupled Weight Decay Regularization. In International Conference on Learning Representations."},{"key":"e_1_3_3_2_55_1","unstructured":"Simian Luo Yiqin Tan Longbo Huang Jian Li and Hang Zhao. 2023. Latent Consistency Models: Synthesizing High-Resolution Images with Few-Step Inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.04378 (2023)."},{"key":"e_1_3_3_2_56_1","unstructured":"Jian Ma Junhao Liang Chen Chen and Haonan Lu. 2023. Subject-diffusion: Open domain personalized text-to-image generation without test-time fine-tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.11410 (2023)."},{"key":"e_1_3_3_2_57_1","unstructured":"Csaba Markus. 2019. How Six Different Artists Have Re-Interpreted Da Vinci\u2019s \u2018Mona Lisa\u2019. https:\/\/www.parkwestgallery.com\/six-different-artists-da-vinci-mona-lisa\/."},{"key":"e_1_3_3_2_58_1","unstructured":"Joanna Materzynska Josef Sivic Eli Shechtman Antonio Torralba Richard Zhang and Bryan Russell. 2023. Customizing motion in text-to-video diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.04966 (2023)."},{"key":"e_1_3_3_2_59_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Meng Chenlin","year":"2022","unstructured":"Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, and Stefano Ermon. 2022. Sdedit: Guided image synthesis and editing with stochastic differential equations. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_3_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555436"},{"key":"e_1_3_3_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01529"},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_19"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"e_1_3_3_2_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_3_2_66_1","unstructured":"The Phillips Collection. 2013. Vah Gogh Repetitions. https:\/\/www.phillipscollection.org\/event\/2013-10-11-van-gogh-repetitions."},{"key":"e_1_3_3_2_67_1","unstructured":"Ryan Po Guandao Yang Kfir Aberman and Gordon Wetzstein. 2023. Orthogonal Adaptation for Modular Customization of Diffusion Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.02432 (2023)."},{"key":"e_1_3_3_2_68_1","unstructured":"Dustin Podell Zion English Kyle Lacey Andreas Blattmann Tim Dockhorn Jonas M\u00fcller Joe Penna and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.01952 (2023)."},{"key":"e_1_3_3_2_69_1","volume-title":"International Conference on Machine Learning (ICML)","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_3_2_70_1","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.06125 (2022)."},{"key":"e_1_3_3_2_71_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Reed Scott","year":"2015","unstructured":"Scott Reed, Yi Zhang, Yuting Zhang, and Honglak Lee. 2015. Deep Visual Analogy-Making. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_72_1","unstructured":"Yixuan Ren Yang Zhou Jimei Yang Jing Shi Difan Liu Feng Liu Mingi Kwon and Abhinav Shrivastava. 2024. Customize-a-video: One-shot motion customization of text-to-video diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.14780 (2024)."},{"key":"e_1_3_3_2_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_74_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_3_2_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_3_2_76_1","doi-asserted-by":"crossref","unstructured":"Nataniel Ruiz Yuanzhen Li Varun Jampani Wei Wei Tingbo Hou Yael Pritch Neal Wadhwa Michael Rubinstein and Kfir Aberman. 2023b. HyperDreamBooth: HyperNetworks for Fast Personalization of Text-to-Image Models. arxiv:https:\/\/arXiv.org\/abs\/2307.06949\u00a0[cs.CV]","DOI":"10.1109\/CVPR52733.2024.00624"},{"key":"e_1_3_3_2_77_1","unstructured":"Simo Ryu. 2023a. LoRA-Stable Diffusion. https:\/\/github.com\/cloneofsimo\/lora."},{"key":"e_1_3_3_2_78_1","unstructured":"Simo Ryu. 2023b. Low-rank Adaptation for Fast Text-to-Image Diffusion Fine-tuning. https:\/\/github.com\/cloneofsimo\/lora."},{"key":"e_1_3_3_2_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530757"},{"key":"e_1_3_3_2_80_1","volume-title":"NeurIPS","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily Denton, Seyed Kamyar\u00a0Seyed Ghasemipour, Burcu\u00a0Karagol Ayan, S\u00a0Sara Mahdavi, Rapha\u00a0Gontijo Lopes, et\u00a0al. 2022b. Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding. In NeurIPS."},{"key":"e_1_3_3_2_81_1","first-page":"30105","volume-title":"International conference on machine learning","author":"Sauer Axel","year":"2023","unstructured":"Axel Sauer, Tero Karras, Samuli Laine, Andreas Geiger, and Timo Aila. 2023. Stylegan-t: Unlocking the power of gans for fast large-scale text-to-image synthesis. In International conference on machine learning. PMLR, 30105\u201330118."},{"key":"e_1_3_3_2_82_1","unstructured":"Christoph Schuhmann Richard Vencu Romain Beaumont Robert Kaczmarczyk Clayton Mullis Aarush Katta Theo Coombes Jenia Jitsev and Aran Komatsuzaki. 2021. Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2111.02114 (2021)."},{"key":"e_1_3_3_2_83_1","unstructured":"Viraj Shah Nataniel Ruiz Forrester Cole Erika Lu Svetlana Lazebnik Yuanzhen Li and Varun Jampani. 2023. ZipLoRA: Any Subject in Any Style by Effectively Merging LoRAs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.13600 (2023)."},{"key":"e_1_3_3_2_84_1","unstructured":"Jing Shi Wei Xiong Zhe Lin and Hyun\u00a0Joon Jung. 2023. Instantbooth: Personalized text-to-image generation without test-time finetuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.03411 (2023)."},{"key":"e_1_3_3_2_85_1","volume-title":"International Conference on Machine Learning (ICML)","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International Conference on Machine Learning (ICML). https:\/\/dl.acm.org\/doi\/10.5555\/3045118.3045358"},{"key":"e_1_3_3_2_86_1","unstructured":"Kihyuk Sohn Nataniel Ruiz Kimin Lee Daniel\u00a0Castro Chin Irina Blok Huiwen Chang Jarred Barber Lu Jiang Glenn Entis Yuanzhen Li et\u00a0al. 2023. StyleDrop: Text-to-Image Generation in Any Style. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.00983 (2023)."},{"key":"e_1_3_3_2_87_1","volume-title":"International Conference on Learning Representations (ICLR)","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021a. Denoising diffusion implicit models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_2_88_1","volume-title":"ICLR","author":"Song Yang","year":"2021","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik\u00a0P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2021b. Score-based generative modeling through stochastic differential equations. In ICLR."},{"key":"e_1_3_3_2_89_1","unstructured":"Joshua Tenenbaum and William Freeman. 1996. Separating style and content. Advances in neural information processing systems 9 (1996)."},{"key":"e_1_3_3_2_90_1","doi-asserted-by":"crossref","unstructured":"Yoad Tewel Rinon Gal Gal Chechik and Yuval Atzmon. 2023. Key-Locked Rank One Editing for Text-to-Image Personalization. ACM Transactions on Graphics (TOG) (2023).","DOI":"10.1145\/3588432.3591506"},{"key":"e_1_3_3_2_91_1","unstructured":"Paul Upchurch Noah Snavely and Kavita Bala. 2016. From a to z: supervised transfer of style and content using deep neural network generators. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1603.02003 (2016)."},{"key":"e_1_3_3_2_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618249"},{"key":"e_1_3_3_2_93_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Oord Aaron Van\u00a0den","year":"2016","unstructured":"Aaron Van\u00a0den Oord, Nal Kalchbrenner, Lasse Espeholt, Oriol Vinyals, Alex Graves, et\u00a0al. 2016. Conditional image generation with pixelcnn decoders. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_94_1","unstructured":"Andrey Voynov Qinghao Chu Daniel Cohen-Or and Kfir Aberman. 2023. P + : Extended Textual Conditioning in Text-to-Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.09522 (2023)."},{"key":"e_1_3_3_2_95_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01379"},{"key":"e_1_3_3_2_96_1","unstructured":"Sheng-Yu Wang David Bau and Jun-Yan Zhu. 2022. Rewriting Geometric Rules of a GAN. ACM SIGGRAPH (2022)."},{"key":"e_1_3_3_2_97_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00660"},{"key":"e_1_3_3_2_98_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00811"},{"key":"e_1_3_3_2_99_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"e_1_3_3_2_100_1","unstructured":"Hu Ye Jun Zhang Sibo Liu Xiao Han and Wei Yang. 2023. Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.06721 (2023)."},{"key":"e_1_3_3_2_101_1","unstructured":"Jiahui Yu Yuanzhong Xu Jing\u00a0Yu Koh Thang Luong Gunjan Baid Zirui Wang Vijay Vasudevan Alexander Ku Yinfei Yang Burcu\u00a0Karagol Ayan et\u00a0al. 2022. Scaling Autoregressive Models for Content-Rich Text-to-Image Generation. Transactions on Machine Learning Research (2022)."},{"key":"e_1_3_3_2_102_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_103_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00584"},{"key":"e_1_3_3_2_104_1","volume-title":"Conference on Neural Information Processing Systems (NeurIPS)","author":"Zhao Shengyu","year":"2020","unstructured":"Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han. 2020. Differentiable Augmentation for Data-Efficient GAN Training. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_2_105_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"e_1_3_3_2_106_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01543"}],"event":{"name":"SA '24: SIGGRAPH Asia 2024 Conference Papers","location":"Tokyo Japan","acronym":"SA '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["SIGGRAPH Asia 2024 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687642","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3680528.3687642","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:27Z","timestamp":1750294707000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687642"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":105,"alternative-id":["10.1145\/3680528.3687642","10.1145\/3680528"],"URL":"https:\/\/doi.org\/10.1145\/3680528.3687642","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}