{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T07:29:21Z","timestamp":1742974161220,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":28,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819620708"},{"type":"electronic","value":"9789819620715"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2071-5_2","type":"book-chapter","created":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T15:33:50Z","timestamp":1735745630000},"page":"16-29","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Skin-Adapter: Fine-Grained Skin-Color Preservation for\u00a0Text-to-Image Generation"],"prefix":"10.1007","author":[{"given":"Zhuowei","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengqi","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhendong","family":"Mao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,1,2]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Afifi, M., Brubaker, M.A., Brown, M.S.: HistoGAN: controlling colors of GAN-generated and real images via color histograms. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7941\u20137950 (2021)","DOI":"10.1109\/CVPR46437.2021.00785"},{"key":"2_CR2","first-page":"23716","volume":"35","author":"JB Alayrac","year":"2022","unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. Adv. Neural. Inf. Process. Syst. 35, 23716\u201323736 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR3","unstructured":"Chen, J., et\u00a0al.: Pixart\u2013$$\\alpha $$: Fast training of diffusion transformer for photorealistic text-to-image synthesis. arXiv preprint arXiv:2310.00426 (2023)"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Z., Fang, S., Liu, W., He, Q., Huang, M., Mao, Z.: DreamIdentity: enhanced editability for efficient face-identity preserved image generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 1281\u20131289 (2024)","DOI":"10.1609\/aaai.v38i2.27891"},{"key":"2_CR5","unstructured":"Clark, K., Vicol, P., Swersky, K., Fleet, D.J.: Directly fine-tuning diffusion models on differentiable rewards. arXiv preprint arXiv:2309.17400 (2023)"},{"key":"2_CR6","unstructured":"Esser, P., et\u00a0al.: Scaling rectified flow transformers for high-resolution image synthesis. In: Forty-first International Conference on Machine Learning (2024)"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Ge, S., Park, T., Zhu, J.Y., Huang, J.B.: Expressive text-to-image generation with rich text. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7545\u20137556 (2023)","DOI":"10.1109\/ICCV51070.2023.00694"},{"key":"2_CR8","unstructured":"Guo, Z., Wu, Y., Chen, Z., Chen, L., He, Q.: PuLID: pure and lightning id customization via contrastive alignment. arXiv preprint arXiv:2404.16022 (2024)"},{"key":"2_CR9","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR10","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"2_CR11","unstructured":"Huang, L., Chen, D., Liu, Y., Shen, Y., Zhao, D., Zhou, J.: Composer: creative and controllable image synthesis with composable conditions. In: International Conference on Machine Learning, pp. 13753\u201313773. PMLR (2023)"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Huang, M., Mao, Z., Liu, M., He, Q., Zhang, Y.: RealCustom: narrowing real text word for real-time open-domain text-to-image customization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7476\u20137485 (2024)","DOI":"10.1109\/CVPR52733.2024.00714"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Li, Z., Cao, M., Wang, X., Qi, Z., Cheng, M.M., Shan, Y.: PhotoMaker: customizing realistic human photos via stacked id embedding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8640\u20138650 (2024)","DOI":"10.1109\/CVPR52733.2024.00825"},{"key":"2_CR14","unstructured":"Liu, L., Fu, Q., Hou, F., He, Y.: Flexible portrait image editing with fine-grained control. arXiv preprint arXiv:2204.01318 (2022)"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Mou, C., et al.: T2I-Adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 4296\u20134304 (2024)","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"2_CR16","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Qi, T., et al.: DEADiff: an efficient stylization diffusion model with disentangled representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8693\u20138702 (2024)","DOI":"10.1109\/CVPR52733.2024.00830"},{"key":"2_CR18","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"2_CR21","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR22","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR23","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"2_CR24","unstructured":"Wang, H., Wang, Q., Bai, X., Qin, Z., Chen, A.: InstantStyle: free lunch towards style-preserving in text-to-image generation. arXiv preprint arXiv:2404.02733 (2024)"},{"key":"2_CR25","unstructured":"Wang, Q., Bai, X., Wang, H., Qin, Z., Chen, A.: InstantID: zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519 (2024)"},{"key":"2_CR26","unstructured":"Xu, J., et al.: ImagereWard: learning and evaluating human preferences for text-to-image generation. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"2_CR27","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-adapter: text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2071-5_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T16:01:29Z","timestamp":1735747289000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2071-5_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819620708","9789819620715"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2071-5_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nara","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2025.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}