{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,18]],"date-time":"2025-04-18T04:11:37Z","timestamp":1744949497912,"version":"3.40.4"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s11263-024-02308-z","type":"journal-article","created":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T07:29:20Z","timestamp":1733815760000},"page":"2721-2751","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["On Mitigating Stability-Plasticity Dilemma in CLIP-guided Image Morphing via Geodesic Distillation Loss"],"prefix":"10.1007","volume":"133","author":[{"given":"Yeongtak","family":"Oh","sequence":"first","affiliation":[]},{"given":"Saehyung","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Uiwon","family":"Hwang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2367-197X","authenticated-orcid":false,"given":"Sungroh","family":"Yoon","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,10]]},"reference":[{"key":"2308_CR1","first-page":"29414","volume":"35","author":"A Alanov","year":"2022","unstructured":"Alanov, A., Titov, V., & Vetrov, D. P. (2022). Hyperdomainnet: Universal domain adaptation for generative adversarial networks. Advances in Neural Information Processing Systems, 35, 29414\u201329426.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2308_CR2","doi-asserted-by":"crossref","unstructured":"Bar-Tal, O., Ofri-Amar, D., Fridman, R., Kasten, Y., & Dekel, T. (2022). Text2live: Text-driven layered image and video editing. In Computer vision\u2013ECCV 2022: 17th European conference, Tel Aviv, Israel, October 23\u201327, 2022, proceedings, part XV (pp. 707\u2013723). Springer.","DOI":"10.1007\/978-3-031-19784-0_41"},{"key":"2308_CR3","unstructured":"Bendokat, T., Zimmermann, R., & Absil, P. -A. (2020). A Grassmann manifold handbook: Basic geometry and computational aspects. arXiv preprint arXiv:2011.13699"},{"key":"2308_CR4","doi-asserted-by":"crossref","unstructured":"Chefer, H., Benaim, S., Paiss, R., & Wolf, L. (2022). Image-based clip-guided essence transfer. In Computer vision\u2013ECCV 2022: 17th European conference, Tel Aviv, Israel, October 23\u201327, 2022, proceedings, part XIII (pp. 695\u2013711). Springer.","DOI":"10.1007\/978-3-031-19778-9_40"},{"key":"2308_CR5","unstructured":"Chen, J., Yu, J., Ge, C., Yao, L., Xie, E., Wu, Y., Wang, Z., Kwok, J., Luo, P., Lu, H., & Li, Z. (2023). Pixart-alpha: Fast training of diffusion transformer for photorealistic text-to-image synthesis. arXiv preprint arXiv:2310.00426"},{"key":"2308_CR6","doi-asserted-by":"crossref","unstructured":"Cherti, M., Beaumont, R., Wightman, R., Wortsman, M., Ilharco, G., Gordon, C., Schuhmann, C., Schmidt, L., & Jitsev, J. (2022). Reproducible scaling laws for contrastive language-image learning. arXiv preprint arXiv:2212.07143","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"2308_CR7","doi-asserted-by":"crossref","unstructured":"Cherti, M., Beaumont, R., Wightman, R., Wortsman, M., Ilharco, G., Gordon, C., Schuhmann, C., Schmidt, L., & Jitsev, J. (2023). Reproducible scaling laws for contrastive language-image learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2818\u20132829).","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"2308_CR8","doi-asserted-by":"crossref","unstructured":"Choi, Y., Uh, Y., Yoo, J., & Ha, J. -W. (2020). Stargan v2: Diverse image synthesis for multiple domains. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8188\u20138197).","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"2308_CR9","doi-asserted-by":"crossref","unstructured":"Crowson, K., Biderman, S., Kornis, D., Stander, D., Hallahan, E., Castricato, L., & Raff, E. (2022). Vqgan-clip: Open domain image generation and editing with natural language guidance. In Computer vision\u2013ECCV 2022: 17th European conference, Tel Aviv, Israel, October 23\u201327, 2022, proceedings, part XXXVII (pp. 88\u2013105). Springer.","DOI":"10.1007\/978-3-031-19836-6_6"},{"key":"2308_CR10","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. -J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In 2009 IEEE conference on computer vision and pattern recognition (pp. 248\u2013255). IEEE","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2308_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Guo, J., Xue, N., & Zafeiriou, S. (2019). Arcface: Additive angular margin loss for deep face recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4690\u20134699).","DOI":"10.1109\/CVPR.2019.00482"},{"key":"2308_CR12","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., & Houlsby N. (2020). An image is worth 16 $$times$$ 16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"issue":"2","key":"2308_CR13","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1137\/S0895479895290954","volume":"20","author":"A Edelman","year":"1998","unstructured":"Edelman, A., Arias, T. A., & Smith, S. T. (1998). The geometry of algorithms with orthogonality constraints. SIAM Journal on Matrix Analysis and Applications, 20(2), 303\u2013353.","journal-title":"SIAM Journal on Matrix Analysis and Applications"},{"key":"2308_CR14","unstructured":"Eyring, L., Karthik, S., Roth, K., Dosovitskiy, A., & Akata, Z. (2024). Reno: Enhancing one-step text-to-image models through reward-based noise optimization. arXiv preprint arXiv:2406.04312"},{"key":"2308_CR15","unstructured":"Fan, Y., Watkins, O., Du, Y., Liu, H., Ryu, M., Boutilier, C., Abbeel, P., Ghavamzadeh, M., Lee, K., & Lee, K. (2024). Reinforcement learning for fine-tuning text-to-image diffusion models. In Advances in neural information processing systems (vol. 36)."},{"key":"2308_CR16","unstructured":"Fang, A., Ilharco, G., Wortsman, M., Wan, Y., Shankar, V., Dave, A., & Schmidt, L. (2022). Data determines distributional robustness in contrastive language image pre-training (clip). In International conference on machine learning (pp. 6216\u20136234). PMLR."},{"key":"2308_CR17","first-page":"5207","volume":"35","author":"K Frans","year":"2022","unstructured":"Frans, K., Soros, L., & Witkowski, O. (2022). Clipdraw: Exploring text-to-drawing synthesis through language-image encoders. Advances in Neural Information Processing Systems, 35, 5207\u20135218.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"4","key":"2308_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530164","volume":"41","author":"R Gal","year":"2022","unstructured":"Gal, R., Patashnik, O., Maron, H., Bermano, A. H., Chechik, G., & Cohen-Or, D. (2022). Stylegan-nada: Clip-guided domain adaptation of image generators. ACM Transactions on Graphics (TOG), 41(4), 1\u201313.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2308_CR19","unstructured":"Ghiasi, A., Kazemi, H., Reich, S., Zhu, C., Goldblum, M., & Goldstein, T. (2022). Plug-in inversion: Model-agnostic inversion for vision with data augmentations. In International conference on machine learning (pp. 7484\u20137512). PMLR."},{"key":"2308_CR20","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1007\/s11263-015-0833-x","volume":"114","author":"M Harandi","year":"2015","unstructured":"Harandi, M., Hartley, R., Shen, C., Lovell, B., & Sanderson, C. (2015). Extrinsic methods for coding and dictionary learning on Grassmann manifolds. International Journal of Computer Vision, 114, 113\u2013136.","journal-title":"International Journal of Computer Vision"},{"key":"2308_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"2308_CR22","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2308_CR23","doi-asserted-by":"crossref","unstructured":"Hou, S., Pan, X., Loy, C. C., Wang, Z., & Lin, D. (2019). Learning a unified classifier incrementally via rebalancing. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 831\u2013839).","DOI":"10.1109\/CVPR.2019.00092"},{"key":"2308_CR24","unstructured":"Huang, N., Zhang, Y., Tang, F., Ma, C., Huang, H., Zhang, Y., Dong, W., & Xu, C. (2022). Diffstyler: Controllable dual diffusion for text-driven image stylization. arXiv preprint arXiv:2211.10682"},{"key":"2308_CR25","first-page":"78723","volume":"36","author":"K Huang","year":"2023","unstructured":"Huang, K., Sun, K., Xie, E., Li, Z., & Liu, X. (2023). T2i-compbench: A comprehensive benchmark for open-world compositional text-to-image generation. Advances in Neural Information Processing Systems, 36, 78723\u201378747.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2308_CR26","doi-asserted-by":"crossref","unstructured":"Jayasumana, S., Ramalingam, S., Veit, A., Glasner, D., Chakrabarti, A., & Kumar, S. (2024). Rethinking fid: Towards a better evaluation metric for image generation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9307\u20139315).","DOI":"10.1109\/CVPR52733.2024.00889"},{"key":"2308_CR27","unstructured":"Jia, C., Yang, Y., Xia, Y., Chen, Y. -T., Parekh, Z., Pham, H., Le, Q., Sung, Y. -H., Li, Z., & Duerig, T. (2021). Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning (pp. 4904\u20134916). PMLR."},{"key":"2308_CR28","first-page":"12104","volume":"33","author":"T Karras","year":"2020","unstructured":"Karras, T., Aittala, M., Hellsten, J., Laine, S., Lehtinen, J., & Aila, T. (2020). Training generative adversarial networks with limited data. Advances in Neural Information Processing Systems, 33, 12104\u201312114.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2308_CR29","doi-asserted-by":"crossref","unstructured":"Kim, G., Kwon, T., & Ye, J. C. (2022). Diffusionclip: Text-guided diffusion models for robust image manipulation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2426\u20132435)","DOI":"10.1109\/CVPR52688.2022.00246"},{"issue":"13","key":"2308_CR30","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"J Kirkpatrick","year":"2017","unstructured":"Kirkpatrick, J., Pascanu, R., Rabinowitz, N., Veness, J., Desjardins, G., Rusu, A. A., Milan, K., Quan, J., Ramalho, T., Grabska-Barwinska, A., et al. (2017). Overcoming catastrophic forgetting in neural networks. Proceedings of the National Academy of Sciences, 114(13), 3521\u20133526.","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"2308_CR31","first-page":"36652","volume":"36","author":"Y Kirstain","year":"2023","unstructured":"Kirstain, Y., Polyak, A., Singer, U., Matiana, S., Penna, J., & Levy, O. (2023). Pick-a-pic: An open dataset of user preferences for text-to-image generation. Advances in Neural Information Processing Systems, 36, 36652\u201336663.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2308_CR32","doi-asserted-by":"crossref","unstructured":"Krause, J., Stark, M., Deng, J., & Fei-Fei, L. (2013). 3D object representations for fine-grained categorization. In 4th International IEEE workshop on 3D representation and recognition (3dRR-13), Sydney, Australia.","DOI":"10.1109\/ICCVW.2013.77"},{"key":"2308_CR33","doi-asserted-by":"crossref","unstructured":"Kwon, G., & Ye, J. C. (2022) Clipstyler: Image style transfer with a single text condition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 18062\u201318071).","DOI":"10.1109\/CVPR52688.2022.01753"},{"issue":"1","key":"2308_CR34","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1002\/(SICI)1099-1778(199601)7:1<3::AID-VIS131>3.0.CO;2-U","volume":"7","author":"S-Y Lee","year":"1996","unstructured":"Lee, S.-Y., Chwa, K.-Y., Hahn, J., & Shin, S. Y. (1996). Image morphing using deformation techniques. The Journal of Visualization and Computer Animation, 7(1), 3\u201323.","journal-title":"The Journal of Visualization and Computer Animation"},{"key":"2308_CR35","unstructured":"Li, J., Li, D., Xiong, C., & Hoi, S. (2022). Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning (pp. 12888\u201312900). PMLR."},{"key":"2308_CR36","unstructured":"Li, J., Savarese, S., & Hoi, S. C. (2022). Masked unsupervised self-training for zero-shot image classification. arXiv preprint arXiv:2206.02967"},{"key":"2308_CR37","doi-asserted-by":"crossref","unstructured":"Li, M., Yang, T., Kuang, H., Wu, J., Wang, Z., Xiao, X., & Chen, C. (2024). Controlnet++: Improving conditional controls with efficient consistency feedback. arXiv preprint arXiv:2404.07987","DOI":"10.1007\/978-3-031-72667-5_8"},{"key":"2308_CR38","unstructured":"Liang, W., Zhang, Y., Kwon, Y., Yeung, S., & Zou, J. (2022). Mind the gap: Understanding the modality gap in multi-modal contrastive representation learning. arXiv preprint arXiv:2203.02053"},{"issue":"12","key":"2308_CR39","doi-asserted-by":"publisher","first-page":"2935","DOI":"10.1109\/TPAMI.2017.2773081","volume":"40","author":"Z Li","year":"2017","unstructured":"Li, Z., & Hoiem, D. (2017). Learning without forgetting. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(12), 2935\u20132947.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2308_CR40","doi-asserted-by":"crossref","unstructured":"Nitzan, Y., Gharbi, M., Zhang, R., Park, T., Zhu, J. -Y., Cohen-Or, D., & Shechtman, E. (2023). Domain expansion of image generators. arXiv preprint arXiv:2301.05225","DOI":"10.1109\/CVPR52729.2023.01529"},{"key":"2308_CR41","unstructured":"Oord, A., Li, Y., & Vinyals, O. (2018). Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748"},{"key":"2308_CR42","doi-asserted-by":"crossref","unstructured":"Patashnik, O., Wu, Z., Shechtman, E., Cohen-Or, D., & Lischinski, D. (2021) Styleclip: Text-driven manipulation of stylegan imagery. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 2085\u20132094).","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"2308_CR43","unstructured":"Podell, D., English, Z., Lacey, K., Blattmann, A., Dockhorn, T., M\u00fcller, J., Penna, J., & Rombach, R. (2023). Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952"},{"key":"2308_CR44","unstructured":"Radford, A., Kim, J. W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., Krueger, G., & Sutskever, I. (2021). Learning transferable visual models from natural language supervision. In International conference on machine learning (pp. 8748\u20138763). PMLR."},{"key":"2308_CR45","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., & Chen, M. (2022). Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125"},{"key":"2308_CR46","doi-asserted-by":"crossref","unstructured":"Rebuffi, S. -A., Kolesnikov, A., Sperl, G., & Lampert, C. H. (2017). icarl: Incremental classifier and representation learning. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2001\u20132010).","DOI":"10.1109\/CVPR.2017.587"},{"key":"2308_CR47","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., & Ommer, B. (2022). High-resolution image synthesis with latent diffusion models. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10684\u201310695).","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2308_CR48","unstructured":"Sauer, A., Karras, T., Laine, S., Geiger, A., & Aila, T. (2023). Stylegan-t: Unlocking the power of GANs for fast large-scale text-to-image synthesis. arXiv preprint arXiv:2301.09515"},{"key":"2308_CR49","unstructured":"Schuhmann, C., Beaumont, R., Vencu, R., Gordon, C., Wightman, R., Cherti, M., Coombes, T., Katta, A., Mullis, C., Wortsman, M., Schramowski, P., Kundurthy, S., Crowson, K., Schmidt, L., Kaczmarczyk, R., & Jitsev J. (2022). Laion-5b: An open large-scale dataset for training next generation image-text models. arXiv preprint arXiv:2210.08402"},{"key":"2308_CR50","doi-asserted-by":"crossref","unstructured":"Simon, C., Koniusz, P., & Harandi, M. (2021). On learning the geodesic path for incremental learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 1591\u20131600).","DOI":"10.1109\/CVPR46437.2021.00164"},{"key":"2308_CR51","doi-asserted-by":"crossref","unstructured":"Song, Y., Shao, X., Chen, K., Zhang, W., Li, M., & Jing, Z. (2022). Clipvg: Text-guided image manipulation using differentiable vector graphics. arXiv preprint arXiv:2212.02122","DOI":"10.1609\/aaai.v37i2.25326"},{"key":"2308_CR52","doi-asserted-by":"crossref","unstructured":"Tevet, G., Gordon, B., Hertz, A., Bermano, A. H., & Cohen-Or, D. (2022). Motionclip: Exposing human motion generation to clip space. In Computer Vision\u2013ECCV 2022: 17th European conference, Tel Aviv, Israel, October 23\u201327, 2022, proceedings, part XXII (pp. 358\u2013374). Springer.","DOI":"10.1007\/978-3-031-20047-2_21"},{"key":"2308_CR53","first-page":"24261","volume":"34","author":"IO Tolstikhin","year":"2021","unstructured":"Tolstikhin, I. O., Houlsby, N., Kolesnikov, A., Beyer, L., Zhai, X., Unterthiner, T., Yung, J., Steiner, A., Keysers, D., Uszkoreit, J., et al. (2021). Mlp-mixer: An all-mlp architecture for vision. Advances in Neural Information Processing Systems, 34, 24261\u201324272.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"4","key":"2308_CR54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3450626.3459838","volume":"40","author":"O Tov","year":"2021","unstructured":"Tov, O., Alaluf, Y., Nitzan, Y., Patashnik, O., & Cohen-Or, D. (2021). Designing an encoder for Stylegan image manipulation. ACM Transactions on Graphics (TOG), 40(4), 1\u201314.","journal-title":"ACM Transactions on Graphics (TOG)"},{"issue":"1","key":"2308_CR55","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1137\/0713009","volume":"13","author":"CF Van Loan","year":"1976","unstructured":"Van Loan, C. F. (1976). Generalizing the singular value decomposition. SIAM Journal on Numerical Analysis, 13(1), 76\u201383.","journal-title":"SIAM Journal on Numerical Analysis"},{"key":"2308_CR56","doi-asserted-by":"crossref","unstructured":"Wallace, B., Dang, M., Rafailov, R., Zhou, L., Lou, A., Purushwalkam, S., Ermon, S., Xiong, C., Joty, S., & Naik, N. (2024). Diffusion model alignment using direct preference optimization. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8228\u20138238).","DOI":"10.1109\/CVPR52733.2024.00786"},{"key":"2308_CR57","unstructured":"Wu, X., Hao, Y., Sun, K., Chen, Y., Zhu, F., Zhao, R., & Li, H. (2023). Human preference score v2: A solid benchmark for evaluating human preferences of text-to-image synthesis. arXiv preprint arXiv:2306.09341"},{"key":"2308_CR58","doi-asserted-by":"crossref","unstructured":"Yin, H., Molchanov, P., Alvarez, J. M., Li, Z., Mallya, A., Hoiem, D., Jha, N. K., & Kautz, J. (2020) Dreaming to distill: Data-free knowledge transfer via deepinversion. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8715\u20138724).","DOI":"10.1109\/CVPR42600.2020.00874"},{"key":"2308_CR59","unstructured":"Yu, F., Seff, A., Zhang, Y., Song, S., Funkhouser, T., & Xiao, J. (2015). Lsun: Construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365"},{"key":"2308_CR60","doi-asserted-by":"crossref","unstructured":"Zhang, L., & Agrawala, M. (2023). Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"2308_CR61","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A. A., Shechtman, E., & Wang, O. (2018). The unreasonable effectiveness of deep features as a perceptual metric. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 586\u2013595).","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2308_CR62","first-page":"7559","volume":"33","author":"S Zhao","year":"2020","unstructured":"Zhao, S., Liu, Z., Lin, J., Zhu, J.-Y., & Han, S. (2020). Differentiable augmentation for data-efficient GAN training. Advances in Neural Information Processing Systems, 33, 7559\u20137570.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2308_CR63","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C. C., & Liu, Z. (2022). Conditional prompt learning for vision-language models. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 16816\u201316825).","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"2308_CR64","doi-asserted-by":"crossref","unstructured":"Zhou, C., Zhong, F., & Oztireli, C. (2022). Clip-pae: Projection-augmentation embedding to extract relevant features for a disentangled, interpretable, and controllable text-guided image manipulation. arXiv preprint arXiv:2210.03919","DOI":"10.1145\/3588432.3591532"},{"key":"2308_CR65","doi-asserted-by":"crossref","unstructured":"Zhu, J. -Y., Kr\u00e4henb\u00fchl, P., Shechtman, E., & Efros, A. A. (2016). Generative visual manipulation on the natural image manifold. In Computer vision\u2013ECCV 2016: 14th European conference, Amsterdam, The Netherlands, October 11\u201314, 2016, proceedings, part V (vol. 14, pp. 597\u2013613). Springer.","DOI":"10.1007\/978-3-319-46454-1_36"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02308-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02308-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02308-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,17]],"date-time":"2025-04-17T06:01:28Z","timestamp":1744869688000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02308-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,10]]},"references-count":65,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["2308"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02308-z","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2024,12,10]]},"assertion":[{"value":"12 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors claim no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"All procedures performed in studies involving human participants were in accordance with the ethical standards of the institutional and\/or national research committee and with the 1964 Helsinki Declaration and its later amendments or comparable ethical standards.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Informed consent was obtained from all individual participants involved in the study.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}}]}}