{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:29:28Z","timestamp":1772119768126,"version":"3.50.1"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732416","type":"print"},{"value":"9783031732423","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73242-3_16","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:15:43Z","timestamp":1730106943000},"page":"279-296","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Infinite-ID: Identity-Preserved Personalization via\u00a0ID-Semantics Decoupling Paradigm"],"prefix":"10.1007","author":[{"given":"Yi","family":"Wu","sequence":"first","affiliation":[]},{"given":"Ziqiang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Heliang","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Chaoyue","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"16_CR1","unstructured":"Achlioptas, P., Benetatos, A., Fostiropoulos, I., Skourtis, D.: Stellar: systematic evaluation of human-centric personalized text-to-image methods. arXiv preprint arXiv:2312.06116 (2023)"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Aberman, K., Fried, O., Cohen-Or, D., Lischinski, D.: Break-a-scene: extracting multiple concepts from a single image. arXiv preprint arXiv:2305.16311 (2023)","DOI":"10.1145\/3610548.3618154"},{"key":"16_CR3","unstructured":"Betker, J., et\u00a0al.: Improving image generation with better captions. Comput. Sci. 2, 3 (2023). https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf"},{"key":"16_CR4","unstructured":"Biden, J.R.: Executive order on the safe, secure, and trustworthy development and use of artificial intelligence (2023)"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"Cao, M., Wang, X., Qi, Z., Shan, Y., Qie, X., Zheng, Y.: MasaCtrl: tuning-free mutual self-attention control for consistent image synthesis and editing. arXiv preprint arXiv:2304.08465 (2023)","DOI":"10.1109\/ICCV51070.2023.02062"},{"key":"16_CR6","unstructured":"Chen, L., et\u00a0al.: PhotoVerse: tuning-free image customization with text-to-image diffusion models. arXiv preprint arXiv:2309.05793 (2023)"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., Huang, L., Liu, Y., Shen, Y., Zhao, D., Zhao, H.: AnyDoor: zero-shot object-level image customization. arXiv preprint arXiv:2307.09481 (2023)","DOI":"10.1109\/CVPR52733.2024.00630"},{"key":"16_CR8","unstructured":"Gal, R., et al.: An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"issue":"4","key":"16_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592133","volume":"42","author":"R Gal","year":"2023","unstructured":"Gal, R., Arar, M., Atzmon, Y., Bermano, A.H., Chechik, G., Cohen-Or, D.: Encoder-based domain tuning for fast personalization of text-to-image models. ACM Trans. Graph. (TOG) 42(4), 1\u201313 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Hertz, A., Voynov, A., Fruchter, S., Cohen-Or, D.: Style aligned image generation via shared attention. arXiv preprint arXiv:2312.02133 (2023)","DOI":"10.1109\/CVPR52733.2024.00457"},{"key":"16_CR11","doi-asserted-by":"publisher","unstructured":"Huang, X., Belongie, S.: Arbitrary style transfer in real-time with adaptive instance normalization. In: 2017 IEEE International Conference on Computer Vision (ICCV) (2017). https:\/\/doi.org\/10.1109\/iccv.2017.167, http:\/\/dx.doi.org\/10.1109\/iccv.2017.167","DOI":"10.1109\/iccv.2017.167"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Inoue, N., Kikuchi, K., Simo-Serra, E., Otani, M., Yamaguchi, K.: LayoutDM: discrete diffusion model for controllable layout generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10167\u201310176 (2023)","DOI":"10.1109\/CVPR52729.2023.00980"},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Kawar, B., et al.: Imagic: text-based real image editing with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6007\u20136017 (2023)","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Khachatryan, L., et al.: Text2video-zero: text-to-image diffusion models are zero-shot video generators. arXiv preprint arXiv:2303.13439 (2023)","DOI":"10.1109\/ICCV51070.2023.01462"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"16_CR16","unstructured":"Li, D., Li, J., Hoi, S.C.: BLIP-diffusion: pre-trained subject representation for controllable text-to-image generation and editing. arXiv preprint arXiv:2305.14720 (2023)"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Li, Z., Cao, M., Wang, X., Qi, Z., Cheng, M.M., Shan, Y.: PhotoMaker: customizing realistic human photos via stacked id embedding. arXiv preprint arXiv:2312.04461 (2023)","DOI":"10.1109\/CVPR52733.2024.00825"},{"issue":"11","key":"16_CR18","first-page":"1","volume":"55","author":"Z Li","year":"2023","unstructured":"Li, Z., et al.: A systematic survey of regularization and normalization in GANs. ACM Comput. Surv. 55(11), 1\u201337 (2023)","journal-title":"ACM Comput. Surv."},{"key":"16_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"598","DOI":"10.1007\/978-3-031-19784-0_35","volume-title":"Computer Vision - ECCV 2022","author":"Z Li","year":"2022","unstructured":"Li, Z., Wang, C., Zheng, H., Zhang, J., Li, B.: FakeCLR: exploring contrastive learning for solving latent discontinuity in data-efficient GANs. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13675, pp. 598\u2013615. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19784-0_35"},{"issue":"1","key":"16_CR20","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1109\/TETCI.2022.3193373","volume":"7","author":"Z Li","year":"2022","unstructured":"Li, Z., Xia, P., Tao, R., Niu, H., Li, B.: A new perspective on stabilizing GANs training: direct adversarial training. IEEE Trans. Emerg. Top. Comput. Intell. 7(1), 178\u2013189 (2022)","journal-title":"IEEE Trans. Emerg. Top. Comput. Intell."},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"Ma, J., Liang, J., Chen, C., Lu, H.: Subject-diffusion: open domain personalized text-to-image generation without test-time fine-tuning. arXiv preprint arXiv:2307.11410 (2023)","DOI":"10.1145\/3641519.3657469"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Peng, X., et al.: PortraitBooth: a versatile portrait model for fast identity-preserved personalization. arXiv preprint arXiv:2312.06354 (2023)","DOI":"10.1109\/CVPR52733.2024.02557"},{"key":"16_CR23","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"16_CR24","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"16_CR25","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. Cornell University - arXiv (2021)"},{"issue":"1","key":"16_CR26","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(1), 5485\u20135551 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"16_CR27","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"16_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015, Part III. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"16_CR30","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR31","doi-asserted-by":"crossref","unstructured":"Schroff, F., Kalenichenko, D., Philbin, J.: FaceNet: a unified embedding for face recognition and clustering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 815\u2013823 (2015)","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"16_CR32","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"16_CR33","unstructured":"Schuhmann, C., et al.: LAION-400M: open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114 (2021)"},{"key":"16_CR34","unstructured":"Shi, R., et al.: Zero123++: a single image to consistent multi-view diffusion base model. arXiv preprint arXiv:2310.15110 (2023)"},{"key":"16_CR35","unstructured":"Wang, Q., Bai, X., Wang, H., Qin, Z., Chen, A.: InstantID: zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519 (2024)"},{"key":"16_CR36","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhang, Y., Ji, Z., Bai, J., Zhang, L., Zuo, W.: ELITE: encoding visual concepts into textual embeddings for customized text-to-image generation. arXiv preprint arXiv:2302.13848 (2023)","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"16_CR37","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., et al.: Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7623\u20137633 (2023)","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"16_CR38","unstructured":"Wu, Y., et al.: Domain re-modulation for few-shot generative domain adaptation. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"16_CR39","doi-asserted-by":"crossref","unstructured":"Xiao, G., Yin, T., Freeman, W.T., Durand, F., Han, S.: FastComposer: tuning-free multi-subject image generation with localized attention. arXiv preprint arXiv:2305.10431 (2023)","DOI":"10.1007\/s11263-024-02227-z"},{"key":"16_CR40","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-adapter: text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"16_CR41","unstructured":"Yuan, G., et al.: Inserting anybody in diffusion models via celeb basis. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems, vol.\u00a036, pp. 72958\u201372982. Curran Associates, Inc. (2023). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/e6d37cc5723e810b793c834bcb6647cf-Paper-Conference.pdf"},{"key":"16_CR42","unstructured":"Zhai, X., et\u00a0al.: A large-scale study of representation learning with the visual task adaptation benchmark. arXiv preprint arXiv:1910.04867 (2019)"},{"key":"16_CR43","unstructured":"Zhang, C., Zhang, C., Zhang, M., Kweon, I.S.: Text-to-image diffusion model in generative AI: a survey. arXiv preprint arXiv:2303.07909 (2023)"},{"issue":"10","key":"16_CR44","doi-asserted-by":"publisher","first-page":"1499","DOI":"10.1109\/LSP.2016.2603342","volume":"23","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Zhang, Z., Li, Z., Qiao, Y.: Joint face detection and alignment using multitask cascaded convolutional networks. IEEE Signal Process. Lett. 23(10), 1499\u20131503 (2016)","journal-title":"IEEE Signal Process. Lett."},{"key":"16_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"16_CR46","doi-asserted-by":"crossref","unstructured":"Zheng, Y., et al.: General facial representation learning in a visual-linguistic manner. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18697\u201318709 (2022)","DOI":"10.1109\/CVPR52688.2022.01814"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73242-3_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T10:25:35Z","timestamp":1732962335000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73242-3_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"ISBN":["9783031732416","9783031732423"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73242-3_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}