{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T00:49:15Z","timestamp":1774658955321,"version":"3.50.1"},"publisher-location":"Cham","reference-count":99,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729829","type":"print"},{"value":"9783031729836","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72983-6_15","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:34:20Z","timestamp":1730108060000},"page":"252-271","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["MasterWeaver: Taming Editability and\u00a0Face Identity for\u00a0Personalized Text-to-Image Generation"],"prefix":"10.1007","author":[{"given":"Yuxiang","family":"Wei","sequence":"first","affiliation":[]},{"given":"Zhilong","family":"Ji","sequence":"additional","affiliation":[]},{"given":"Jinfeng","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Hongzhi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Wangmeng","family":"Zuo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Abdal, R., Qin, Y., Wonka, P.: Image2styleGAN: how to embed images into the styleGAN latent space? In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4432\u20134441 (2019)","DOI":"10.1109\/ICCV.2019.00453"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Arar, M., et al.: Domain-agnostic tuning-encoder for fast personalization of text-to-image models. In: SIGGRAPH Asia 2023 Conference Papers, pp. 1\u201310 (2023)","DOI":"10.1145\/3610548.3618173"},{"key":"15_CR3","unstructured":"Arar, M., et al.: Palp: prompt aligned personalization of text-to-image models. arXiv preprint arXiv:2401.06105 (2024)"},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Aberman, K., Fried, O., Cohen-Or, D., Lischinski, D.: Break-a-scene: extracting multiple concepts from a single image. arXiv preprint arXiv:2305.16311 (2023)","DOI":"10.1145\/3610548.3618154"},{"key":"15_CR5","unstructured":"Balaji, Y., et\u00a0al.: ediffi: text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:2211.01324 (2022)"},{"key":"15_CR6","unstructured":"Bau, D., et al.: GAN dissection: visualizing and understanding generative adversarial networks. arXiv preprint arXiv:1811.10597 (2018)"},{"key":"15_CR7","unstructured":"Betker, J., et\u00a0al.: Improving image generation with better captions. Comput. Sci. 2(3), 8 (2023). https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Cai, Y., Wei, Y., Ji, Z., Bai, J., Han, H., Zuo, W.: Decoupled textual embeddings for customized image generation. arXiv preprint arXiv:2312.11826 (2023)","DOI":"10.1609\/aaai.v38i2.27850"},{"key":"15_CR9","unstructured":"Chae, D., Park, N., Kim, J., Lee, K.: Instructbooth: instruction-following personalized text-to-image generation. arXiv preprint arXiv:2312.03011 (2023)"},{"key":"15_CR10","unstructured":"Chen, H., Zhang, Y., Wang, X., Duan, X., Zhou, Y., Zhu, W.: Disenbooth: disentangled parameter-efficient tuning for subject-driven text-to-image generation. arXiv preprint arXiv:2305.03374 (2023)"},{"key":"15_CR11","unstructured":"Chen, L., et\u00a0al.: Photoverse: tuning-free image customization with text-to-image diffusion models. arXiv preprint arXiv:2309.05793 (2023)"},{"key":"15_CR12","unstructured":"Chen, W., et al.: Subject-driven text-to-image generation via apprenticeship learning. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"15_CR13","unstructured":"Chen, Z., et al.: Dreamidentity: improved editability for efficient face-identity preserved image generation. arXiv preprint arXiv:2307.00300 (2023)"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Choi, Y., Choi, M., Kim, M., Ha, J.W., Kim, S., Choo, J.: StarGAN: unified generative adversarial networks for multi-domain image-to-image translation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8789\u20138797 (2018)","DOI":"10.1109\/CVPR.2018.00916"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Choi, Y., Uh, Y., Yoo, J., Ha, J.W.: Stargan v2: diverse image synthesis for multiple domains. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8188\u20138197 (2020)","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"15_CR16","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR17","unstructured":"Ding, M., Zheng, W., Hong, W., Tang, J.: Cogview2: faster and better text-to-image generation via hierarchical transformers. arXiv preprint arXiv:2204.14217 (2022)"},{"key":"15_CR18","unstructured":"Dong, Z., Wei, P., Lin, L.: Dreamartist: towards controllable one-shot text-to-image generation via contrastive prompt-tuning. arXiv preprint arXiv:2211.11337 (2022)"},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Fei, Z., Fan, M., Huang, J.: Gradient-free textual inversion. arXiv preprint arXiv:2304.05818 (2023)","DOI":"10.1145\/3581783.3612599"},{"key":"15_CR20","unstructured":"Gal, R., et al.: An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Gal, R., Arar, M., Atzmon, Y., Bermano, A.H., Chechik, G., Cohen-Or, D.: Designing an encoder for fast personalization of text-to-image models. arXiv preprint arXiv:2302.12228 (2023)","DOI":"10.1145\/3592133"},{"issue":"4","key":"15_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592133","volume":"42","author":"R Gal","year":"2023","unstructured":"Gal, R., Arar, M., Atzmon, Y., Bermano, A.H., Chechik, G., Cohen-Or, D.: Encoder-based domain tuning for fast personalization of text-to-image models. ACM Trans. Graphics (TOG) 42(4), 1\u201313 (2023)","journal-title":"ACM Trans. Graphics (TOG)"},{"issue":"4","key":"15_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530164","volume":"41","author":"R Gal","year":"2022","unstructured":"Gal, R., Patashnik, O., Maron, H., Bermano, A.H., Chechik, G., Cohen-Or, D.: StyleGAN-Nada: clip-guided domain adaptation of image generators. ACM Trans. Graphics (TOG) 41(4), 1\u201313 (2022)","journal-title":"ACM Trans. Graphics (TOG)"},{"key":"15_CR24","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"15_CR25","unstructured":"Hao, S., Han, K., Zhao, S., Wong, K.Y.K.: VICO: detail-preserving visual condition for personalized text-to-image generation. arXiv preprint arXiv:2306.00971 (2023)"},{"key":"15_CR26","doi-asserted-by":"crossref","unstructured":"Hertz, A., Aberman, K., Cohen-Or, D.: Delta denoising score. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2328\u20132337 (2023)","DOI":"10.1109\/ICCV51070.2023.00221"},{"key":"15_CR27","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR28","unstructured":"Hong, Y., Zhang, J.: Comfusion: personalized subject generation in multiple specific scenes from single image. arXiv preprint arXiv:2402.11849 (2024)"},{"key":"15_CR29","doi-asserted-by":"crossref","unstructured":"Hu, H., et\u00a0al.: Instruct-imagen: image generation with multi-modal instruction. arXiv preprint arXiv:2401.01952 (2024)","DOI":"10.1109\/CVPR52733.2024.00455"},{"key":"15_CR30","unstructured":"Hua, M., Liu, J., Ding, F., Liu, W., Wu, J., He, Q.: Dreamtuner: single image is enough for subject-driven generation. arXiv preprint arXiv:2312.13691 (2023)"},{"key":"15_CR31","doi-asserted-by":"crossref","unstructured":"Huang, T., et al.: Dreamcontrol: control-based text-to-3D generation with 3d self-prior. arXiv preprint arXiv:2312.06439 (2023)","DOI":"10.1109\/CVPR52733.2024.00513"},{"key":"15_CR32","doi-asserted-by":"crossref","unstructured":"Hyung, J., Shin, J., Choo, J.: Magicapture: high-resolution multi-concept portrait customization. arXiv preprint arXiv:2309.06895 (2023)","DOI":"10.1609\/aaai.v38i3.28020"},{"key":"15_CR33","unstructured":"Jia, X., et al.: Taming encoder for zero fine-tuning image customization with text-to-image diffusion models. arXiv preprint arXiv:2304.02642 (2023)"},{"key":"15_CR34","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"15_CR35","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of stylegan. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8110\u20138119 (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"15_CR36","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. arXiv preprint arXiv:2212.04488 (2022)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"15_CR37","doi-asserted-by":"crossref","unstructured":"Lee, C.H., Liu, Z., Wu, L., Luo, P.: MaskGAN: towards diverse and interactive facial image manipulation. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"15_CR38","unstructured":"Lee, K., Kwak, S., Sohn, K., Shin, J.: Direct consistency optimization for compositional text-to-image personalization. arXiv preprint arXiv:2402.12004 (2024)"},{"key":"15_CR39","unstructured":"Li, D., Li, J., Hoi, S.: Blip-diffusion: pre-trained subject representation for controllable text-to-image generation and editing. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"15_CR40","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)"},{"key":"15_CR41","doi-asserted-by":"crossref","unstructured":"Li, X., Hou, X., Loy, C.C.: When styleGAN meets stable diffusion: a w+ adapter for personalized image generation. arXiv preprint arXiv:2311.17461 (2023)","DOI":"10.1109\/CVPR52733.2024.00213"},{"key":"15_CR42","doi-asserted-by":"crossref","unstructured":"Li, Z., Cao, M., Wang, X., Qi, Z., Cheng, M.M., Shan, Y.: Photomaker: customizing realistic human photos via stacked id embedding. arXiv preprint arXiv:2312.04461 (2023)","DOI":"10.1109\/CVPR52733.2024.00825"},{"key":"15_CR43","doi-asserted-by":"crossref","unstructured":"Liang, C., Ma, F., Zhu, L., Deng, Y., Yang, Y.: CapHuman: capture your moments in parallel universes. arXiv preprint arXiv:2402.00627 (2024)","DOI":"10.1109\/CVPR52733.2024.00612"},{"key":"15_CR44","doi-asserted-by":"crossref","unstructured":"Lin, J., Zhang, Z., Wei, Y., Ren, D., Jiang, D., Zuo, W.: Improving image restoration through removing degradations in textual representations. arXiv preprint arXiv:2312.17334 (2023)","DOI":"10.1109\/CVPR52733.2024.00277"},{"key":"15_CR45","first-page":"16331","volume":"34","author":"H Ling","year":"2021","unstructured":"Ling, H., Kreis, K., Li, D., Kim, S.W., Torralba, A., Fidler, S.: EditGAN: high-precision semantic image editing. Adv. Neural. Inf. Process. Syst. 34, 16331\u201316345 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR46","doi-asserted-by":"crossref","unstructured":"Liu, M., et al.: STGAN: a unified selective transfer network for arbitrary image attribute editing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3673\u20133682 (2019)","DOI":"10.1109\/CVPR.2019.00379"},{"key":"15_CR47","doi-asserted-by":"crossref","unstructured":"Liu, R., et al.: Towards a simultaneous and granular identity-expression control in personalized face generation. arXiv preprint arXiv:2401.01207 (2024)","DOI":"10.1109\/CVPR52733.2024.00206"},{"key":"15_CR48","unstructured":"Liu, Z., et al.: Cones: concept neurons in diffusion models for customized generation. arXiv preprint arXiv:2303.05125 (2023)"},{"key":"15_CR49","unstructured":"Liu, Z., et al.: Cones 2: customizable image synthesis with multiple subjects. arXiv preprint arXiv:2305.19327 (2023)"},{"key":"15_CR50","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"15_CR51","unstructured":"Lu, J., Xie, C., Guo, H.: Object-driven one-shot fine-tuning of text-to-image diffusion with prototypical embedding. arXiv preprint arXiv:2401.15708 (2024)"},{"key":"15_CR52","doi-asserted-by":"crossref","unstructured":"Lv, Z., Wei, Y., Zuo, W., Wong, K.Y.K.: Place: adaptive layout-semantic fusion for semantic image synthesis. IEEE Conference on Computer Vision and Pattern Recognition (2024)","DOI":"10.1109\/CVPR52733.2024.00885"},{"key":"15_CR53","doi-asserted-by":"crossref","unstructured":"Lyu, Y., Lin, T., Li, F., He, D., Dong, J., Tan, T.: DeltaEdit: exploring text-free training for text-driven image manipulation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6894\u20136903 (2023)","DOI":"10.1109\/CVPR52729.2023.00666"},{"key":"15_CR54","doi-asserted-by":"crossref","unstructured":"Ma, J., Liang, J., Chen, C., Lu, H.: Subject-diffusion: open domain personalized text-to-image generation without test-time fine-tuning. arXiv preprint arXiv:2307.11410 (2023)","DOI":"10.1145\/3641519.3657469"},{"key":"15_CR55","doi-asserted-by":"crossref","unstructured":"Nam, J., Kim, H., Lee, D., Jin, S., Kim, S., Chang, S.: Dreammatcher: appearance matching self-attention for semantically-consistent text-to-image personalization. arXiv preprint arXiv:2402.09812 (2024)","DOI":"10.1109\/CVPR52733.2024.00774"},{"key":"15_CR56","unstructured":"Nichol, A., et al.: Glide: towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"15_CR57","doi-asserted-by":"crossref","unstructured":"Patashnik, O., Wu, Z., Shechtman, E., Cohen-Or, D., Lischinski, D.: Styleclip: text-driven manipulation of stylegan imagery. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2085\u20132094 (2021)","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"15_CR58","unstructured":"Patel, M., Jung, S., Baral, C., Yang, Y.: $$\\lambda $$-eclipse: multi-concept personalized text-to-image diffusion models by leveraging clip latent space. arXiv preprint arXiv:2402.05195 (2024)"},{"key":"15_CR59","doi-asserted-by":"crossref","unstructured":"Peng, X., et al.: PortraitBooth: a versatile portrait model for fast identity-preserved personalization. arXiv preprint arXiv:2312.06354 (2023)","DOI":"10.1109\/CVPR52733.2024.02557"},{"key":"15_CR60","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"15_CR61","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: DreamFusion: text-to-3D using 2D diffusion. arXiv preprint arXiv:2209.14988 (2022)"},{"key":"15_CR62","unstructured":"Purushwalkam, S., Gokul, A., Joty, S., Naik, N.: Bootpig: bootstrapping zero-shot personalized image generation capabilities in pretrained diffusion models. arXiv preprint arXiv:2401.13974 (2024)"},{"key":"15_CR63","unstructured":"Qiu, Z., et al.: Controlling text-to-image diffusion by orthogonal finetuning. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"15_CR64","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"issue":"8","key":"15_CR65","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"15_CR66","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"15_CR67","doi-asserted-by":"crossref","unstructured":"Richardson, E., et al.: Encoding in style: a stylegan encoder for image-to-image translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2287\u20132296 (2021)","DOI":"10.1109\/CVPR46437.2021.00232"},{"key":"15_CR68","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"15_CR69","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. arXiv preprint arXiv:2208.12242 (2022)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"15_CR70","doi-asserted-by":"crossref","unstructured":"Ruiz, N., et al.: HyperdreamBooth: hypernetworks for fast personalization of text-to-image models. arXiv preprint arXiv:2307.06949 (2023)","DOI":"10.1109\/CVPR52733.2024.00624"},{"key":"15_CR71","doi-asserted-by":"crossref","unstructured":"Ryu, H., Lim, S., Shim, H.: Memory-efficient personalization using quantized diffusion model. arXiv preprint arXiv:2401.04339 (2024)","DOI":"10.1007\/978-3-031-72640-8_20"},{"key":"15_CR72","unstructured":"Saharia, C., et\u00a0al.: Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487 (2022)"},{"key":"15_CR73","doi-asserted-by":"crossref","unstructured":"Schroff, F., Kalenichenko, D., Philbin, J.: FaceNet: a unified embedding for face recognition and clustering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 815\u2013823 (2015)","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"15_CR74","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"4","key":"15_CR75","doi-asserted-by":"publisher","first-page":"2004","DOI":"10.1109\/TPAMI.2020.3034267","volume":"44","author":"Y Shen","year":"2020","unstructured":"Shen, Y., Yang, C., Tang, X., Zhou, B.: InterFaceGAN: interpreting the disentangled face representation learned by GANs. IEEE Trans. Pattern Anal. Mach. Intell. 44(4), 2004\u20132018 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR76","doi-asserted-by":"crossref","unstructured":"Shi, J., Xiong, W., Lin, Z., Jung, H.J.: InstantBooth: personalized text-to-image generation without test-time finetuning. arXiv preprint arXiv:2304.03411 (2023)","DOI":"10.1109\/CVPR52733.2024.00816"},{"key":"15_CR77","doi-asserted-by":"crossref","unstructured":"Tewel, Y., Gal, R., Chechik, G., Atzmon, Y.: Key-locked rank one editing for text-to-image personalization. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591506"},{"key":"15_CR78","doi-asserted-by":"crossref","unstructured":"Valevski, D., Lumen, D., Matias, Y., Leviathan, Y.: Face0: instantaneously conditioning a text-to-image model on a face. In: SIGGRAPH Asia 2023 Conference Papers, pp. 1\u201310 (2023)","DOI":"10.1145\/3610548.3618249"},{"key":"15_CR79","unstructured":"Voynov, A., Chu, Q., Cohen-Or, D., Aberman, K.: $$ p+ $$: extended textual conditioning in text-to-image generation. arXiv preprint arXiv:2303.09522 (2023)"},{"key":"15_CR80","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"607","DOI":"10.1007\/978-3-030-01261-8_36","volume-title":"Computer Vision \u2013 ECCV 2018","author":"B Wang","year":"2018","unstructured":"Wang, B., Zheng, H., Liang, X., Chen, Y., Lin, L., Yang, M.: Toward characteristic-preserving image-based virtual try-on network. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11217, pp. 607\u2013623. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_36"},{"key":"15_CR81","unstructured":"Wang, Q., et al.: StableIdentity: inserting anybody into anywhere at first sight. arXiv preprint arXiv:2401.15975 (2024)"},{"key":"15_CR82","unstructured":"Wang, Q., Bai, X., Wang, H., Qin, Z., Chen, A.: InstantID: zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519 (2024)"},{"key":"15_CR83","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhang, Y., Fan, Y., Wang, J., Chen, Q.: High-fidelity GAN inversion for image attribute editing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11379\u201311388 (2022)","DOI":"10.1109\/CVPR52688.2022.01109"},{"key":"15_CR84","unstructured":"Wang, Z., et al.: HiFi tuner: high-fidelity subject-driven fine-tuning for diffusion models. arXiv preprint arXiv:2312.00079 (2023)"},{"key":"15_CR85","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhang, Y., Ji, Z., Bai, J., Zhang, L., Zuo, W.: Elite: encoding visual concepts into textual embeddings for customized text-to-image generation. arXiv preprint arXiv:2302.13848 (2023)","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"15_CR86","unstructured":"Wu, Z., Yu, C., Zhu, Z., Wang, F., Bai, X.: SingleInsert: inserting new concepts from a single image into text-to-image models for flexible editing. arXiv preprint arXiv:2310.08094 (2023)"},{"key":"15_CR87","doi-asserted-by":"crossref","unstructured":"Xiao, G., Yin, T., Freeman, W.T., Durand, F., Han, S.: FastComposer: tuning-free multi-subject image generation with localized attention. arXiv preprint arXiv:2305.10431 (2023)","DOI":"10.1007\/s11263-024-02227-z"},{"key":"15_CR88","unstructured":"Yan, Y., et al.: FaceStudio: put your face everywhere in seconds. arXiv preprint arXiv:2312.02663 (2023)"},{"key":"15_CR89","unstructured":"Yang, Y., Wang, R., Qian, Z., Zhu, Y., Wu, Y.: Diffusion in diffusion: cyclic one-way diffusion for text-vision-conditioned generation. arXiv preprint arXiv:2306.08247 (2023)"},{"key":"15_CR90","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-adapter: text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"15_CR91","unstructured":"Yuan, G., et al.: Inserting anybody in diffusion models via celeb basis. arXiv preprint arXiv:2306.00926 (2023)"},{"key":"15_CR92","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"15_CR93","unstructured":"Zhang, X.L., et al.: Compositional inversion for stable diffusion models. arXiv preprint arXiv:2312.08048 (2023)"},{"key":"15_CR94","unstructured":"Zhang, Y., Wei, Y., Jiang, D., Zhang, X., Zuo, W., Tian, Q.: ControlVideo: training-free controllable text-to-video generation. arXiv preprint arXiv:2305.13077 (2023)"},{"key":"15_CR95","unstructured":"Zhang, Y., et al.: VideoElevator: elevating video generation quality with versatile text-to-image diffusion models. arXiv preprint arXiv:2403.05438 (2024)"},{"key":"15_CR96","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et\u00a0al.: SSR-encoder: Encoding selective subject representation for subject-driven generation. arXiv preprint arXiv:2312.16272 (2023)","DOI":"10.1109\/CVPR52733.2024.00771"},{"key":"15_CR97","unstructured":"Zhao, R., Zhu, M., Dong, S., Wang, N., Gao, X.: Catversion: concatenating embeddings for diffusion-based text-to-image personalization. arXiv preprint arXiv:2311.14631 (2023)"},{"key":"15_CR98","doi-asserted-by":"crossref","unstructured":"Zheng, Y., et al.: General facial representation learning in a visual-linguistic manner. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18697\u201318709 (2022)","DOI":"10.1109\/CVPR52688.2022.01814"},{"key":"15_CR99","unstructured":"Zhou, Y., Zhang, R., Sun, T., Xu, J.: Enhancing detail preservation for customized text-to-image generation: a regularization-free approach. arXiv preprint arXiv:2305.13579 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72983-6_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T10:35:21Z","timestamp":1732962921000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72983-6_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"ISBN":["9783031729829","9783031729836"],"references-count":99,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72983-6_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}