{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T08:54:05Z","timestamp":1767084845969,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031733895"},{"type":"electronic","value":"9783031733901"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73390-1_7","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T16:24:01Z","timestamp":1730305441000},"page":"110-126","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["StyleTokenizer: Defining Image Style by\u00a0a\u00a0Single Instance for Controlling Diffusion Models"],"prefix":"10.1007","author":[{"given":"Wen","family":"Li","sequence":"first","affiliation":[]},{"given":"Muyuan","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Cheng","family":"Zou","sequence":"additional","affiliation":[]},{"given":"Biao","family":"Gong","sequence":"additional","affiliation":[]},{"given":"Ruobing","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Meng","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jingdong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Ming","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"7_CR1","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"7_CR2","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR3","doi-asserted-by":"publisher","unstructured":"Cali\u0144ski, T., Harabasz, J.: A dendrite method for cluster analysis. Commun. Stat. 3(1), 1\u201327 (1974). https:\/\/doi.org\/10.1080\/03610927408827101","DOI":"10.1080\/03610927408827101"},{"key":"7_CR4","doi-asserted-by":"crossref","unstructured":"Choi, Y., Choi, M., Kim, M., Ha, J.W., Kim, S., Choo, J.: StarGAN: unified generative adversarial networks for multi-domain image-to-image translation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8789\u20138797 (2018)","DOI":"10.1109\/CVPR.2018.00916"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Choi, Y., Uh, Y., Yoo, J., Ha, J.W.: StarGAN V2: diverse image synthesis for multiple domains. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8188\u20138197 (2020)","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Deng, Y., et al.: StyTr2: image style transfer with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11326\u201311336 (2022)","DOI":"10.1109\/CVPR52688.2022.01104"},{"key":"7_CR7","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Gatys, L.A., Ecker, A.S., Bethge, M.: A neural algorithm of artistic style. arXiv preprint arXiv:1508.06576 (2015)","DOI":"10.1167\/16.12.326"},{"key":"7_CR9","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"7_CR10","unstructured":"Hu, E.J., et al.: LoRa: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"7_CR11","unstructured":"Huang, L., Chen, D., Liu, Y., Shen, Y., Zhao, D., Zhou, J.: Composer: creative and controllable image synthesis with composable conditions. arXiv preprint arXiv:2302.09778 (2023)"},{"key":"7_CR12","unstructured":"Huang, N., et al.: DiffStyler: controllable dual diffusion for text-driven image stylization. arXiv preprint arXiv:2211.10682 (2022)"},{"key":"7_CR13","doi-asserted-by":"crossref","unstructured":"Huang, X., Belongie, S.: Arbitrary style transfer in real-time with adaptive instance normalization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1501\u20131510 (2017)","DOI":"10.1109\/ICCV.2017.167"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of styleGAN. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8110\u20138119 (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"7_CR15","unstructured":"Laion.ai: Laion-aesthetics. https:\/\/laion.ai\/blog\/laion-aesthetics\/"},{"key":"7_CR16","unstructured":"Li, D., Li, J., Hoi, S.C.: Blip-diffusion: pre-trained subject representation for controllable text-to-image generation and editing. arXiv preprint arXiv:2305.14720 (2023)"},{"key":"7_CR17","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning (2023)"},{"key":"7_CR18","first-page":"29710","volume":"34","author":"M Liu","year":"2021","unstructured":"Liu, M., Li, Q., Qin, Z., Zhang, G., Wan, P., Zheng, W.: BlendGAN: implicitly GAN blending for arbitrary stylized face generation. Adv. Neural. Inf. Process. Syst. 34, 29710\u201329722 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR19","unstructured":"MidJourney: Midjourney. https:\/\/www.midjourney.com\/"},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Mou, C., et al.: T2I-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"7_CR21","unstructured":"OpenAI: Dall-e 3. https:\/\/openai.com\/dall-e-3"},{"key":"7_CR22","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"7_CR23","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"7_CR24","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with CLIP latents. arXiv preprint arXiv:2204.06125. 1(2), 3 (2022)"},{"key":"7_CR25","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. PMLR (2021)"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"7_CR27","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","volume":"20","author":"PJ Rousseeuw","year":"1987","unstructured":"Rousseeuw, P.J.: Silhouettes: a graphical aid to the interpretation and validation of cluster analysis. Comput. Appl. Math. 20, 53\u201365 (1987). https:\/\/doi.org\/10.1016\/0377-0427(87)90125-7","journal-title":"Comput. Appl. Math."},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Ruta, D., et al.: ALADIN: all layer adaptive instance normalization for fine-grained style similarity. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11926\u201311935 (2021)","DOI":"10.1109\/ICCV48922.2021.01171"},{"key":"7_CR30","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR31","unstructured":"Sauer, A., Karras, T., Laine, S., Geiger, A., Aila, T.: StyleGAN-T: unlocking the power of GANs for fast large-scale text-to-image synthesis. arXiv preprint arXiv:2301.09515 (2023)"},{"key":"7_CR32","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"7_CR33","unstructured":"Voynov, A., Chu, Q., Cohen-Or, D., Aberman, K.: $$ p+ $$: extended textual conditioning in text-to-image generation. arXiv preprint arXiv:2303.09522 (2023)"},{"key":"7_CR34","unstructured":"Wang, H., Wang, Q., Bai, X., Qin, Z., Chen, A.: InstantStyle: free lunch towards style-preserving in text-to-image generation. arXiv preprint arXiv:2404.02733 (2024)"},{"key":"7_CR35","unstructured":"Wang, W., et al.: CogVLM: visual expert for pretrained language models (2023)"},{"key":"7_CR36","unstructured":"Wang, Z., et al.: StyleAdapter: a single-pass LoRA-free model for stylized image generation. arXiv preprint arXiv:2309.01770 (2023)"},{"key":"7_CR37","doi-asserted-by":"crossref","unstructured":"Wu, Y., Nakashima, Y., Garcia, N.: Not only generative art: stable diffusion for content-style disentanglement in art analysis. In: Proceedings of the 2023 ACM International Conference on Multimedia Retrieval, pp. 199\u2013208 (2023)","DOI":"10.1145\/3591106.3592262"},{"key":"7_CR38","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-adapter: text compatible image prompt adapter for text-to-image diffusion models (2023)"},{"key":"7_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"7_CR40","unstructured":"Zhang, S., et al.: I2VGen-XL: high-quality image-to-video synthesis via cascaded diffusion models (2023)"},{"key":"7_CR41","unstructured":"Zhang, Y., et al.: Prospect: expanded conditioning for the personalization of attribute-aware image generation. arXiv preprint arXiv:2305.16225 (2023)"},{"key":"7_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Inversion-based style transfer with diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10146\u201310156 (2023)","DOI":"10.1109\/CVPR52729.2023.00978"},{"key":"7_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Domain enhanced arbitrary image style transfer via contrastive learning. In: ACM SIGGRAPH 2022 Conference Proceedings, pp.\u00a01\u20138 (2022)","DOI":"10.1145\/3528233.3530736"},{"key":"7_CR44","doi-asserted-by":"crossref","unstructured":"Zhu, J.Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2223\u20132232 (2017)","DOI":"10.1109\/ICCV.2017.244"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73390-1_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T16:29:31Z","timestamp":1730305771000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73390-1_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031733895","9783031733901"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73390-1_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}