{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T20:22:15Z","timestamp":1769458935715,"version":"3.49.0"},"publisher-location":"Cham","reference-count":78,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031918377","type":"print"},{"value":"9783031918384","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91838-4_6","type":"book-chapter","created":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:34:23Z","timestamp":1748198063000},"page":"88-106","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["MM2Latent: Text-to-Facial Image Generation and\u00a0Editing in\u00a0GANs with\u00a0Multimodal Assistance"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0430-0660","authenticated-orcid":false,"given":"Debin","family":"Meng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2036-9089","authenticated-orcid":false,"given":"Christos","family":"Tzelepis","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3913-4738","authenticated-orcid":false,"given":"Ioannis","family":"Patras","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1803-5338","authenticated-orcid":false,"given":"Georgios","family":"Tzimiropoulos","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Afifi, M., Brubaker, M.A., Brown, M.S.: Histogan: controlling colors of GAN-generated and real images via color histograms. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7937\u20137946 (2020). https:\/\/api.semanticscholar.org\/CorpusID:227151819","DOI":"10.1109\/CVPR46437.2021.00785"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Barattin, S., Tzelepis, C., Patras, I., Sebe, N.: Attribute-preserving face dataset anonymization via latent code optimization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8001\u20138010 (2023)","DOI":"10.1109\/CVPR52729.2023.00773"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Bounareli, S., Tzelepis, C., Argyriou, V., Patras, I., Tzimiropoulos, G.: Hyperreenact: one-shot reenactment via jointly learning to refine and retarget faces. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7149\u20137159 (2023)","DOI":"10.1109\/ICCV51070.2023.00657"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Bounareli, S., Tzelepis, C., Argyriou, V., Patras, I., Tzimiropoulos, G.: Stylemask: disentangling the style space of stylegan2 for neural face reenactment. In: 2023 IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG), pp.\u00a01\u20138. IEEE (2023)","DOI":"10.1109\/FG57933.2023.10042744"},{"key":"6_CR5","unstructured":"Bounareli, S., Tzelepis, C., Argyriou, V., Patras, I., Tzimiropoulos, G.: Diffusionact: controllable diffusion autoencoder for one-shot face reenactment. arXiv preprint arXiv:2403.17217 (2024)"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Bounareli, S., Tzelepis, C., Argyriou, V., Patras, I., Tzimiropoulos, G.: One-shot neural face reenactment via finding directions in GAN\u2019s latent space. Int. J. Comput. Vis. 1\u201331 (2024)","DOI":"10.1007\/s11263-024-02018-6"},{"key":"6_CR7","unstructured":"Bradski, G.: The OpenCV library. Dr. Dobb\u2019s J. Softw. Tools (2000)"},{"key":"6_CR8","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)"},{"issue":"1","key":"6_CR9","first-page":"1","volume":"41","author":"A Chen","year":"2022","unstructured":"Chen, A., Liu, R., Xie, L., Chen, Z., Su, H., Yu, J.: Sofgan: a portrait image generator with dynamic styling. ACM Trans. Graph. (TOG) 41(1), 1\u201326 (2022)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Chen, Q., Koltun, V.: Photographic image synthesis with cascaded refinement networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1511\u20131520 (2017)","DOI":"10.1109\/ICCV.2017.168"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Chen, S.Y., Su, W., Gao, L., Xia, S., Fu, H.: Deepfacedrawing: deep generation of face images from sketches. ACM Trans. Graph. (TOG) 39(4), 72-1 (2020)","DOI":"10.1145\/3386569.3392386"},{"key":"6_CR12","unstructured":"Chen, X., Qing, L., He, X., Luo, X., Xu, Y.: Ftgan: a fully-trained generative adversarial networks for text to face generation. arXiv preprint arXiv:1904.05729 (2019)"},{"issue":"1","key":"6_CR13","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"Creswell, A., White, T., Dumoulin, V., Arulkumaran, K., Sengupta, B., Bharath, A.A.: Generative adversarial networks: an overview. IEEE Signal Process. Mag. 35(1), 53\u201365 (2018)","journal-title":"IEEE Signal Process. Mag."},{"key":"6_CR14","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"D\u2019Inc\u00e0, M., Tzelepis, C., Patras, I., Sebe, N.: Improving fairness using vision-language driven image augmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 4695\u20134704 (2024)","DOI":"10.1109\/WACV57701.2024.00463"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Dong, W., Xue, S., Duan, X., Han, S.: Prompt tuning inversion for text-driven image editing using diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7430\u20137440 (2023)","DOI":"10.1109\/ICCV51070.2023.00683"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Du, X., et al.: Pixelface+: towards controllable face generation and manipulation with text descriptions and segmentation masks. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 4666\u20134677 (2023)","DOI":"10.1145\/3581783.3612067"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Feng, Y., Feng, H., Black, M.J., Bolkart, T.: Learning an animatable detailed 3D face model from in-the-wild images. ACM Trans. Graph. (TOG) 40, 1 \u2013 13 (2020). https:\/\/api.semanticscholar.org\/CorpusID:236094976","DOI":"10.1145\/3450626.3459936"},{"issue":"11","key":"6_CR19","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Goodfellow, I.J., et al.: Generative adversarial networks. Commun. ACM 63, 139\u2013144 (2014). https:\/\/api.semanticscholar.org\/CorpusID:1033682","DOI":"10.1145\/3422622"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"He, J., et al.: Pixelfolder: an efficient progressive pixel synthesis network for image generation. In: European Conference on Computer Vision, pp. 643\u2013660. Springer (2022)","DOI":"10.1007\/978-3-031-19781-9_37"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"He, Z., Kan, M., Shan, S.: Eigengan: layer-wise eigen-learning for GANs. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 14388\u201314397 (2021). https:\/\/api.semanticscholar.org\/CorpusID:233394581","DOI":"10.1109\/ICCV48922.2021.01414"},{"key":"6_CR23","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Hou, X., Zhang, X., Li, Y., Shen, L.: Textface: text-to-style mapping based face generation and manipulation. IEEE Trans. Multimedia (2022)","DOI":"10.1109\/TMM.2022.3160360"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Huang, Z., Chan, K.C., Jiang, Y., Liu, Z.: Collaborative diffusion for multi-modal face generation and editing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6080\u20136090 (2023)","DOI":"10.1109\/CVPR52729.2023.00589"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Jayasumana, S., Ramalingam, S., Veit, A., Glasner, D., Chakrabarti, A., Kumar, S.: Rethinking fid: towards a better evaluation metric for image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9307\u20139315 (2024)","DOI":"10.1109\/CVPR52733.2024.00889"},{"key":"6_CR27","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Huang, Z., Pan, X., Loy, C.C., Liu, Z.: Talk-to-edit: fine-grained facial editing via dialog. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 13779\u201313788 (2021). https:\/\/api.semanticscholar.org\/CorpusID:237453495","DOI":"10.1109\/ICCV48922.2021.01354"},{"key":"6_CR28","unstructured":"Karras, T., Aila, T., Laine, S., Lehtinen, J.: Progressive growing of GANs for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196 (2017)"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of stylegan. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8110\u20138119 (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Kawar, B., et al.: Imagic: text-based real image editing with diffusion models. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6007\u20136017 (2022). https:\/\/api.semanticscholar.org\/CorpusID:252918469","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Lee, C.H., Liu, Z., Wu, L., Luo, P.: Maskgan: towards diverse and interactive facial image manipulation. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Liang, J., Zeng, H., Zhang, L.: High-resolution photorealistic image translation in real-time: a laplacian pyramid translation network. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9387\u20139395 (2021). https:\/\/api.semanticscholar.org\/CorpusID:233356339","DOI":"10.1109\/CVPR46437.2021.00927"},{"key":"6_CR34","doi-asserted-by":"crossref","unstructured":"Lin, J., Zhang, R., Ganz, F., Han, S., Zhu, J.Y.: Anycost GANs for interactive image synthesis and editing. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14981\u201314991 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232110729","DOI":"10.1109\/CVPR46437.2021.01474"},{"key":"6_CR35","unstructured":"Liu, N., Li, S., Du, Y., Torralba, A., Tenenbaum, J.B.: Compositional visual generation with composable diffusion models. arXiv abs\/2206.01714 (2022). https:\/\/api.semanticscholar.org\/CorpusID:249375227"},{"key":"6_CR36","doi-asserted-by":"crossref","unstructured":"Liu, R., Ge, Y., Choi, C.L., Wang, X., Li, H.: Divco: diverse conditional image synthesis via contrastive generative adversarial network. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16372\u201316381 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232232950","DOI":"10.1109\/CVPR46437.2021.01611"},{"key":"6_CR37","unstructured":"Liu, X., Yin, G., Shao, J., Wang, X., et\u00a0al.: Learning to predict layout-to-image conditional convolutions for semantic image synthesis. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"6_CR38","unstructured":"Mirza, M., Osindero, S.: Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)"},{"key":"6_CR39","doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., Aberman, K., Pritch, Y., Cohen-Or, D.: Null-text inversion for editing real images using guided diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6038\u20136047 (2023)","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"6_CR40","doi-asserted-by":"crossref","unstructured":"Nair, N.G., Bandara, W.G.C., Patel, V.M.: Unite and conquer: plug & play multi-modal synthesis using diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6070\u20136079 (2023)","DOI":"10.1109\/CVPR52729.2023.00588"},{"key":"6_CR41","doi-asserted-by":"crossref","unstructured":"Nasir, O.R., Jha, S.K., Grover, M.S., Yu, Y., Kumar, A., Shah, R.R.: Text2facegan: face generation from fine grained textual descriptions. In: 2019 IEEE Fifth International Conference on Multimedia Big Data (BigMM), pp. 58\u201367. IEEE (2019)","DOI":"10.1109\/BigMM.2019.00-42"},{"key":"6_CR42","unstructured":"Oldfield, J., Tzelepis, C., Panagakis, Y., Nicolaou, A., Patras, I., et\u00a0al.: Panda: unsupervised learning of parts and appearances in the feature maps of GANs (2023)"},{"key":"6_CR43","first-page":"2700","volume":"36","author":"J Oldfield","year":"2023","unstructured":"Oldfield, J., Tzelepis, C., Panagakis, Y., Nicolaou, M., Patras, I.: Parts of speech-grounded subspaces in vision-language models. Adv. Neural. Inf. Process. Syst. 36, 2700\u20132724 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR44","doi-asserted-by":"crossref","unstructured":"Oldfield, J., Tzelepis, C., Panagakis, Y., Nicolaou, M.A., Patras, I.: Bilinear models of parts and appearances in generative adversarial networks. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3415506"},{"key":"6_CR45","doi-asserted-by":"crossref","unstructured":"Patashnik, O., Wu, Z., Shechtman, E., Cohen-Or, D., Lischinski, D.: Styleclip: text-driven manipulation of stylegan imagery. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2085\u20132094 (2021)","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"6_CR46","doi-asserted-by":"crossref","unstructured":"Patashnik, O., Wu, Z., Shechtman, E., Cohen-Or, D., Lischinski, D.: Styleclip: text-driven manipulation of stylegan imagery. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 2065\u20132074 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232428282","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"6_CR47","doi-asserted-by":"crossref","unstructured":"Peng, J., et al.: Learning dynamic prior knowledge for text-to-face pixel synthesis. In: Proceedings of the 30th ACM International Conference on Multimedia (2022). https:\/\/api.semanticscholar.org\/CorpusID:252782093","DOI":"10.1145\/3503161.3547818"},{"key":"6_CR48","doi-asserted-by":"crossref","unstructured":"Peng, J., et al.: Towards open-ended text-to-face generation, combination and manipulation. In: Proceedings of the 30th ACM International Conference on Multimedia (2022). https:\/\/api.semanticscholar.org\/CorpusID:252782570","DOI":"10.1145\/3503161.3547758"},{"key":"6_CR49","doi-asserted-by":"publisher","first-page":"4356","DOI":"10.1109\/TMM.2021.3116416","volume":"24","author":"J Peng","year":"2021","unstructured":"Peng, J., et al.: Knowledge-driven generative adversarial network for text-to-image synthesis. IEEE Trans. Multimedia 24, 4356\u20134366 (2021)","journal-title":"IEEE Trans. Multimedia"},{"key":"6_CR50","unstructured":"Pinkney, J.N.M., Li, C.: clip2latent: text driven sampling of a pre-trained stylegan using denoising diffusion and clip. In: British Machine Vision Conference (2022). https:\/\/api.semanticscholar.org\/CorpusID:252715743"},{"key":"6_CR51","doi-asserted-by":"crossref","unstructured":"Qi, X., Chen, Q., Jia, J., Koltun, V.: Semi-parametric image synthesis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8808\u20138816 (2018)","DOI":"10.1109\/CVPR.2018.00918"},{"key":"6_CR52","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning (2021). https:\/\/api.semanticscholar.org\/CorpusID:231591445"},{"key":"6_CR53","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: International Conference on Machine Learning (2021)"},{"key":"6_CR54","doi-asserted-by":"crossref","unstructured":"Richardson, E., et al.: Encoding in style: a stylegan encoder for image-to-image translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2287\u20132296 (2021)","DOI":"10.1109\/CVPR46437.2021.00232"},{"key":"6_CR55","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10674\u201310685 (2021). https:\/\/api.semanticscholar.org\/CorpusID:245335280","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6_CR56","doi-asserted-by":"crossref","unstructured":"Sar\u0131ta\u015f, E., Ekenel, H.K.: Analyzing the feature extractor networks for face image synthesis. In: 2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG), pp.\u00a01\u20135. IEEE (2024)","DOI":"10.1109\/FG59268.2024.10581922"},{"key":"6_CR57","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"6_CR58","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv abs\/2010.02502 (2020). https:\/\/api.semanticscholar.org\/CorpusID:222140788"},{"key":"6_CR59","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"6_CR60","doi-asserted-by":"crossref","unstructured":"Sun, J., Li, Q., Wang, W., Zhao, J., Sun, Z.: Multi-caption text-to-face synthesis: dataset and algorithm. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 2290\u20132298 (2021)","DOI":"10.1145\/3474085.3475391"},{"key":"6_CR61","doi-asserted-by":"crossref","unstructured":"Sun, J., Li, Q., Wang, W., Zhao, J., Sun, Z.: Multi-caption text-to-face synthesis: dataset and algorithm. In: Proceedings of the 29th ACM International Conference on Multimedia (2021). https:\/\/api.semanticscholar.org\/CorpusID:237953270","DOI":"10.1145\/3474085.3475391"},{"key":"6_CR62","unstructured":"Sushko, V., Sch\u00f6nfeld, E., Zhang, D., Gall, J., Schiele, B., Khoreva, A.: You only need adversarial supervision for semantic image synthesis. arXiv preprint arXiv:2012.04781 (2020)"},{"key":"6_CR63","unstructured":"Tao, M., et al.: Deep fusion generative adversarial networks for text-to-image synthesis. arXiv preprint arXiv:2008.05865 (2020)"},{"key":"6_CR64","doi-asserted-by":"crossref","unstructured":"Tewari, A., et al.: Stylerig: rigging stylegan for 3D control over portrait images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6142\u20136151 (2020)","DOI":"10.1109\/CVPR42600.2020.00618"},{"issue":"4","key":"6_CR65","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3450626.3459838","volume":"40","author":"O Tov","year":"2021","unstructured":"Tov, O., Alaluf, Y., Nitzan, Y., Patashnik, O., Cohen-Or, D.: Designing an encoder for stylegan image manipulation. ACM Trans. Graph. (TOG) 40(4), 1\u201314 (2021)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"6_CR66","unstructured":"Tzelepis, C., Oldfield, J., Tzimiropoulos, G., Patras, I.: Contraclip: interpretable GAN generation driven by pairs of contrasting sentences. arXiv preprint arXiv:2206.02104 (2022)"},{"key":"6_CR67","doi-asserted-by":"crossref","unstructured":"Tzelepis, C., Tzimiropoulos, G., Patras, I.: Warpedganspace: finding non-linear RBF paths in GAN latent space. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6393\u20136402 (2021)","DOI":"10.1109\/ICCV48922.2021.00633"},{"key":"6_CR68","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhang, T., Lovell, B.: Faces a la carte: text-to-face generation via attribute disentanglement. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3380\u20133388 (2021)","DOI":"10.1109\/WACV48630.2021.00342"},{"key":"6_CR69","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Liu, M.Y., Zhu, J.Y., Tao, A., Kautz, J., Catanzaro, B.: High-resolution image synthesis and semantic manipulation with conditional GANs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8798\u20138807 (2018)","DOI":"10.1109\/CVPR.2018.00917"},{"key":"6_CR70","doi-asserted-by":"crossref","unstructured":"Wang, Y., Qi, L., Chen, Y.C., Zhang, X., Jia, J.: Image synthesis via semantic composition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13749\u201313758 (2021)","DOI":"10.1109\/ICCV48922.2021.01349"},{"key":"6_CR71","doi-asserted-by":"crossref","unstructured":"Xia, W., Yang, Y., Xue, J.H., Wu, B.: Tedigan: text-guided diverse face image generation and manipulation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2256\u20132265 (2021)","DOI":"10.1109\/CVPR46437.2021.00229"},{"key":"6_CR72","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"6_CR73","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3813\u20133824 (2023). https:\/\/api.semanticscholar.org\/CorpusID:256827727","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"6_CR74","doi-asserted-by":"crossref","unstructured":"Zheng, Y., et al.: General facial representation learning in a visual-linguistic manner. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18697\u201318709 (2022)","DOI":"10.1109\/CVPR52688.2022.01814"},{"key":"6_CR75","doi-asserted-by":"crossref","unstructured":"Zhou, Y., et al.: Towards language-free training for text-to-image generation. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 17886\u201317896 (2021). https:\/\/api.semanticscholar.org\/CorpusID:244714549","DOI":"10.1109\/CVPR52688.2022.01738"},{"key":"6_CR76","doi-asserted-by":"crossref","unstructured":"Zhu, M., Pan, P., Chen, W., Yang, Y.: DM-GAN: dynamic memory generative adversarial networks for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5802\u20135810 (2019)","DOI":"10.1109\/CVPR.2019.00595"},{"key":"6_CR77","doi-asserted-by":"crossref","unstructured":"Zhu, P., Abdal, R., Qin, Y., Wonka, P.: Sean: image synthesis with semantic region-adaptive normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5104\u20135113 (2020)","DOI":"10.1109\/CVPR42600.2020.00515"},{"key":"6_CR78","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Xu, Z., You, A., Bai, X.: Semantically multi-modal image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5467\u20135476 (2020)","DOI":"10.1109\/CVPR42600.2020.00551"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91838-4_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:34:45Z","timestamp":1748198085000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91838-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031918377","9783031918384"],"references-count":78,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91838-4_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}