{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T16:44:10Z","timestamp":1769273050618,"version":"3.49.0"},"reference-count":110,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T00:00:00Z","timestamp":1752192000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20B2062"],"award-info":[{"award-number":["U20B2062"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62227801"],"award-info":[{"award-number":["62227801"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0117900"],"award-info":[{"award-number":["2022ZD0117900"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0116305"],"award-info":[{"award-number":["2022ZD0116305"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s11263-025-02498-0","type":"journal-article","created":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T17:05:03Z","timestamp":1752253503000},"page":"7012-7036","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["DomainStudio: Fine-Tuning Diffusion Models for Domain-Driven Image Generation Using Limited Data"],"prefix":"10.1007","volume":"133","author":[{"given":"Jingyuan","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Huimin","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Jiansheng","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Yuan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,11]]},"reference":[{"key":"2498_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, N., Lee, J., Lee, C., Kim, K., Kim, D., Nam, S.-H., & Hong, K. (2024) Dreamstyler: Paint by style inversion with text-to-image diffusion models. In: AAAI","DOI":"10.1609\/aaai.v38i2.27824"},{"key":"2498_CR2","unstructured":"Balaji, Y., Nah, S., Huang, X., Vahdat, A., Song, J., Kreis, K., Aittala, M., Aila, T., Laine, S., & Catanzaro, B., et al. (2022) ediffi: Text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:2211.01324"},{"key":"2498_CR3","unstructured":"Bar-Tal, O., Yariv, L., Lipman, Y., & Dekel, T. (2023) Multidiffusion: Fusing diffusion paths for controlled image generation. In: International Conference on Machine Learning"},{"key":"2498_CR4","unstructured":"Brock, A., Donahue, J., & Simonyan, K. (2019) Large scale GAN training for high fidelity natural image synthesis. In: ICLR"},{"key":"2498_CR5","doi-asserted-by":"crossref","unstructured":"Cai, M., Zhang, H., Huang, H., Geng, Q., Li, Y., & Huang, G. (2021) Frequency domain image translation: More photo-realistic, better identity-preserving. In: ICCV, pp. 13930\u201313940","DOI":"10.1109\/ICCV48922.2021.01367"},{"key":"2498_CR6","unstructured":"Chang, H., Zhang, H., Barber, J., Maschinot, A., Lezama, J., Jiang, L., Yang, M.-H., Murphy, K., Freeman, W.T., & Rubinstein, M., et al. (2023) Muse: Text-to-image generation via masked generative transformers. arXiv preprint arXiv:2301.00704"},{"key":"2498_CR7","doi-asserted-by":"crossref","unstructured":"Chang, H., Zhang, H., Jiang, L., Liu, C., & Freeman, W.T. (2022) Maskgit: Masked generative image transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11315\u201311325","DOI":"10.1109\/CVPR52688.2022.01103"},{"key":"2498_CR8","doi-asserted-by":"crossref","unstructured":"Chen, M., Laina, I., & Vedaldi, A. (2024) Training-free layout control with cross-attention guidance. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5343\u20135353","DOI":"10.1109\/WACV57701.2024.00526"},{"key":"2498_CR9","unstructured":"Cheng, J., Liang, X., Shi, X., He, T., Xiao, T., & Li, M. (2023) Layoutdiffuse: Adapting foundational diffusion models for layout-to-image generation. arXiv preprint arXiv:2302.08908"},{"key":"2498_CR10","doi-asserted-by":"crossref","unstructured":"Chong, M.J., & Forsyth, D. (2022) Jojogan: One shot face stylization. In: European Conference on Computer Vision, pp. 128\u2013152. Springer","DOI":"10.1007\/978-3-031-19787-1_8"},{"key":"2498_CR11","doi-asserted-by":"crossref","unstructured":"Crowson, K., Biderman, S., Kornis, D., Stander, D., Hallahan, E., Castricato, L., & Raff, E. (2022) Vqgan-clip: Open domain image generation and editing with natural language guidance. In: Proceedings of the European Conference on Computer Vision, pp. 88\u2013105. Springer","DOI":"10.1007\/978-3-031-19836-6_6"},{"issue":"5","key":"2498_CR12","doi-asserted-by":"publisher","first-page":"961","DOI":"10.1109\/18.57199","volume":"36","author":"I Daubechies","year":"1990","unstructured":"Daubechies, I. (1990). The wavelet transform, time-frequency localization and signal analysis. IEEE Transactions on Information Theory, 36(5), 961\u20131005.","journal-title":"IEEE Transactions on Information Theory"},{"key":"2498_CR13","doi-asserted-by":"crossref","unstructured":"Deng, Y., Tang, F., Dong, W., Ma, C., Pan, X., Wang, L., & Xu, C. (2022) Stytr2: Image style transfer with transformers. In: CVPR, pp. 11326\u201311336","DOI":"10.1109\/CVPR52688.2022.01104"},{"key":"2498_CR14","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., & Nichol, A. (2021). Diffusion models beat gans on image synthesis. Advances in Neural Information Processing Systems, 34, 8780\u20138794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR15","first-page":"19822","volume":"34","author":"M Ding","year":"2021","unstructured":"Ding, M., Yang, Z., Hong, W., Zheng, W., Zhou, C., Yin, D., Lin, J., Zou, X., Shao, Z., & Yang, H. (2021). Cogview: Mastering text-to-image generation via transformers. Advances in Neural Information Processing Systems, 34, 19822\u201319835.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR16","doi-asserted-by":"crossref","unstructured":"Everaert, M. N., Bocchio, M., Arpa, S., S\u00fcsstrunk, S., & Achanta, R. (2023) Diffusion in style. In: ICCV, pp. 2251\u20132261","DOI":"10.1109\/ICCV51070.2023.00214"},{"key":"2498_CR17","doi-asserted-by":"crossref","unstructured":"Frenkel, Y., Vinker, Y., Shamir, A., & Cohen-Or, D. (2024) Implicit style-content separation using b-lora. In: European Conference on Computer Vision, pp. 181\u2013198. Springer","DOI":"10.1007\/978-3-031-72684-2_11"},{"key":"2498_CR18","doi-asserted-by":"crossref","unstructured":"Gafni, O., Polyak, A., Ashual, O., Sheynin, S., Parikh, D., & Taigman, Y. (2022) Make-a-scene: Scene-based text-to-image generation with human priors. In: Proceedings of the European Conference on Computer Vision, pp. 89\u2013106. Springer","DOI":"10.1007\/978-3-031-19784-0_6"},{"key":"2498_CR19","unstructured":"Gal, R., Alaluf, Y., Atzmon, Y., Patashnik, O., Bermano, A. H., Chechik, G., & Cohen-or, D. (2022) An image is worth one word: Personalizing text-to-image generation using textual inversion. In: The Eleventh International Conference on Learning Representations"},{"key":"2498_CR20","unstructured":"Gal, R., Alaluf, Y., Atzmon, Y., Patashnik, O., Bermano, A. H., Chechik, G., & Cohen-or, D. (2023) An image is worth one word: Personalizing text-to-image generation using textual inversion. In: The Eleventh International Conference on Learning Representations"},{"issue":"4","key":"2498_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3450626.3459836","volume":"40","author":"R Gal","year":"2021","unstructured":"Gal, R., Hochberg, D. C., Bermano, A., & Cohen-Or, D. (2021). Swagan: A style-based wavelet-driven generative model. ACM Transactions on Graphics (TOG), 40(4), 1\u201311.","journal-title":"ACM Transactions on Graphics (TOG)"},{"issue":"4","key":"2498_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530164","volume":"41","author":"R Gal","year":"2022","unstructured":"Gal, R., Patashnik, O., Maron, H., Bermano, A. H., Chechik, G., & Cohen-Or, D. (2022). Stylegan-nada: Clip-guided domain adaptation of image generators. ACM Transactions on Graphics (TOG), 41(4), 1\u201313.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2498_CR23","doi-asserted-by":"crossref","unstructured":"Gatys, L.A., Ecker, A.S., & Bethge, M. (2015) A neural algorithm of artistic style. arXiv preprint arXiv:1508.06576","DOI":"10.1167\/16.12.326"},{"key":"2498_CR24","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., Lee, H., Kudlur, M., Dumoulin, V., & Shlens, J. (2017) Exploring the structure of a real-time, arbitrary neural artistic stylization network. arXiv preprint arXiv:1705.06830","DOI":"10.5244\/C.31.114"},{"key":"2498_CR25","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., & Bengio, Y. (2014) Generative adversarial nets. Advances in Neural Information Processing Systems 27"},{"key":"2498_CR26","unstructured":"Gu, Y., Wang, X., Wu, J. Z., Shi, Y., Chen, Y., Fan, Z., Xiao, W., Zhao, R., Chang, S., & Wu, W., et al. (2024) Mix-of-show: Decentralized low-rank adaptation for multi-concept customization of diffusion models. Advances in Neural Information Processing Systems 36"},{"key":"2498_CR27","doi-asserted-by":"crossref","unstructured":"Hertz, A., Voynov, A., Fruchter, S., & Cohen-Or, D. (2024) Style aligned image generation via shared attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4775\u20134785","DOI":"10.1109\/CVPR52733.2024.00457"},{"key":"2498_CR28","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., & Hochreiter, S. (2017) Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in Neural Information Processing Systems 30"},{"issue":"3","key":"2498_CR29","doi-asserted-by":"publisher","first-page":"1552","DOI":"10.1109\/TPAMI.2020.3021209","volume":"44","author":"T Hinz","year":"2020","unstructured":"Hinz, T., Heinrich, S., & Wermter, S. (2020). Semantic object accuracy for generative text-to-image synthesis. IEEE transactions on pattern analysis and machine intelligence, 44(3), 1552\u20131565.","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"2498_CR30","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR31","unstructured":"Hu, E.J., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., & Chen, W. (2021) Lora: Low-rank adaptation of large language models. In: ICLR"},{"key":"2498_CR32","doi-asserted-by":"crossref","unstructured":"Hu, T., Zhang, J., Liu, L., Yi, R., Kou, S., Zhu, H., Chen, X., Wang, Y., Wang, C., & Ma, L. (2023) Phasic content fusing diffusion model with directional distribution consistency for few-shot model adaption. In: ICCV, pp. 2406\u20132415","DOI":"10.1109\/ICCV51070.2023.00228"},{"key":"2498_CR33","first-page":"2154","volume":"35","author":"J Huang","year":"2022","unstructured":"Huang, J., Cui, K., Guan, D., Xiao, A., Zhan, F., Lu, S., Liao, S., & Xing, E. (2022). Masked generative adversarial networks are data-efficient generation learners. Advances in Neural Information Processing Systems, 35, 2154\u20132167.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR34","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., & Aila, T. (2019) A style-based generator architecture for generative adversarial networks. In: CVPR, pp. 4401\u20134410","DOI":"10.1109\/CVPR.2019.00453"},{"key":"2498_CR35","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., & Aila, T. (2020) Analyzing and improving the image quality of stylegan. In: CVPR, pp. 8110\u20138119","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"2498_CR36","first-page":"12104","volume":"33","author":"T Karras","year":"2020","unstructured":"Karras, T., Aittala, M., Hellsten, J., Laine, S., Lehtinen, J., & Aila, T. (2020). Training generative adversarial networks with limited data. Advances in Neural Information Processing Systems, 33, 12104\u201312114.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR37","first-page":"852","volume":"34","author":"T Karras","year":"2021","unstructured":"Karras, T., Aittala, M., Laine, S., H\u00e4rk\u00f6nen, E., Hellsten, J., Lehtinen, J., & Aila, T. (2021). Alias-free generative adversarial networks. Advances in Neural Information Processing Systems, 34, 852\u2013863.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR38","doi-asserted-by":"crossref","unstructured":"Kim, G., Kwon, T., & Ye, J. C. (2022) Diffusionclip: Text-guided diffusion models for robust image manipulation. In: CVPR, pp. 2426\u20132435","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"2498_CR39","unstructured":"Kingma, D. P., & Welling, M. (2013) Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114"},{"key":"2498_CR40","first-page":"21696","volume":"34","author":"D Kingma","year":"2021","unstructured":"Kingma, D., Salimans, T., Poole, B., & Ho, J. (2021). Variational diffusion models. Advances in Neural Information Processing Systems, 34, 21696\u201321707.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR41","unstructured":"Krizhevsky, A., & Hinton, G., et al. (2009) Learning multiple layers of features from tiny images. Master\u2019s thesis, University of Tront"},{"key":"2498_CR42","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., & Zhu, J.-Y. (2023) Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"2498_CR43","doi-asserted-by":"crossref","unstructured":"Kwon, G., & Ye, J.C. (2023) One-shot adaptation of gan in just one clip. TPAMI","DOI":"10.1109\/TPAMI.2023.3283551"},{"key":"2498_CR44","unstructured":"Li, J., Li, D., Xiong, C., & Hoi, S. (2022) Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International Conference on Machine Learning, pp. 12888\u201312900. PMLR"},{"key":"2498_CR45","unstructured":"Li, Y., Liu, H., Wen, Y., & Lee, Y. J. (2023) Generate anything anywhere in any scene. arXiv preprint arXiv:2306.17154"},{"key":"2498_CR46","doi-asserted-by":"crossref","unstructured":"Li, Y., Liu, H., Wu, Q., Mu, F., Yang, J., Gao, J., Li, C., & Lee, Y.J. (2023) Gligen: Open-set grounded text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22511\u201322521","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"2498_CR47","unstructured":"Li, B., Qi, X., Lukasiewicz, T., & Torr, P. (2019) Controllable text-to-image generation. Advances in Neural Information Processing Systems 32"},{"key":"2498_CR48","doi-asserted-by":"crossref","unstructured":"Li, W., Zhang, P., Zhang, L., Huang, Q., He, X., Lyu, S., & Gao, J. (2019) Object-driven text-to-image synthesis via adversarial training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12174\u201312182","DOI":"10.1109\/CVPR.2019.01245"},{"key":"2498_CR49","first-page":"15885","volume":"33","author":"Y Li","year":"2020","unstructured":"Li, Y., Zhang, R., Lu, J., & Shechtman, E. (2020). Few-shot image generation with elastic weight consolidation. Advances in Neural Information Processing Systems, 33, 15885\u201315896.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR50","doi-asserted-by":"crossref","unstructured":"Liang, H., Zhang, W., Li, W., Yu, J., & Xu, L. (2024) Intergen: Diffusion-based multi-human motion generation under complex interactions. International Journal of Computer Vision, 1\u201321","DOI":"10.1007\/s11263-024-02042-6"},{"key":"2498_CR51","doi-asserted-by":"crossref","unstructured":"Lugmayr, A., Danelljan, M., Romero, A., Yu, F., Timofte, R., & Van Gool, L. (2022) Repaint: Inpainting using denoising diffusion probabilistic models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11461\u201311471","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"2498_CR52","doi-asserted-by":"crossref","unstructured":"Ma, J., Liang, J., Chen, C., & Lu, H. (2024) Subject-diffusion: Open domain personalized text-to-image generation without test-time fine-tuning. In: ACM SIGGRAPH 2024 Conference Papers, pp. 1\u201312","DOI":"10.1145\/3641519.3657469"},{"key":"2498_CR53","unstructured":"Mo, S., Cho, M., & Shin, J. (2020) Freeze the discriminator: A simple baseline for fine-tuning gans. In: CVPR AI for Content Creation Workshop"},{"key":"2498_CR54","unstructured":"Moon, S.-J., Kim, C., & Park, G.-M. (2022) Wagi: wavelet-based gan inversion for preserving high-frequency image details. arXiv preprint arXiv:2210.09655"},{"key":"2498_CR55","unstructured":"Nichol, A. Q., & Dhariwal, P. (2021) Improved denoising diffusion probabilistic models. In: ICLR, pp. 8162\u20138171. PMLR"},{"key":"2498_CR56","doi-asserted-by":"crossref","unstructured":"Noguchi, A., & Harada, T. (2019) Image generation from small datasets via batch statistics adaptation. In: ICCV, pp. 2750\u20132758","DOI":"10.1109\/ICCV.2019.00284"},{"key":"2498_CR57","doi-asserted-by":"crossref","unstructured":"Ojha, U., Li, Y., Lu, J., Efros, A. A., Lee, Y. J., Shechtman, E., & Zhang, R. (2021) Few-shot image generation via cross-domain correspondence. In: CVPR, pp. 10743\u201310752","DOI":"10.1109\/CVPR46437.2021.01060"},{"key":"2498_CR58","unstructured":"Oord, A., Kalchbrenner, N., Espeholt, L., Vinyals, O., & Graves, A., et al. (2016) Conditional image generation with pixelcnn decoders. Advances in Neural Information Processing Systems 29"},{"key":"2498_CR59","doi-asserted-by":"crossref","unstructured":"Park, J., & Kim, Y. (2022) Styleformer: Transformer based generative adversarial networks with style vector. In: CVPR, pp. 8983\u20138992","DOI":"10.1109\/CVPR52688.2022.00878"},{"key":"2498_CR60","doi-asserted-by":"crossref","unstructured":"Phung, H., Dao, Q., & Tran, A. (2023) Wavelet diffusion models are fast and scalable image generators. In: CVPR, pp. 10199\u201310208","DOI":"10.1109\/CVPR52729.2023.00983"},{"key":"2498_CR61","unstructured":"Podell, D., English, Z., Lacey, K., Blattmann, A., Dockhorn, T., M\u00fcller, J., Penna, J., & Rombach, R. (2024) Sdxl: Improving latent diffusion models for high-resolution image synthesis. In: International Conference on Learning Representations"},{"key":"2498_CR62","unstructured":"Qiao, T., Zhang, J., Xu, D., & Tao, D. (2019) Learn, imagine and create: Text-to-image generation from prior knowledge. Advances in neural information processing systems 32"},{"key":"2498_CR63","unstructured":"Radford, A., Kim, & J. W., Hallacy, C. (2021) Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR"},{"key":"2498_CR64","doi-asserted-by":"crossref","unstructured":"Raj, A., Kaza, S., Poole, B., Niemeyer, M., Ruiz, N., Mildenhall, B., Zada, S., Aberman, K., Rubinstein, M., & Barron, J. (2023) Dreambooth3d: Subject-driven text-to-3d generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2349\u20132359","DOI":"10.1109\/ICCV51070.2023.00223"},{"key":"2498_CR65","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., & Chen, M. (2022) Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125"},{"key":"2498_CR66","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., & Sutskever, I. (2021) Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. PMLR"},{"key":"2498_CR67","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., & Ommer, B. (2022) High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2498_CR68","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., & Aberman, K. (2023) Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: CVPR, pp. 22500\u201322510","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"2498_CR69","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., & Aberman, K. (2023) Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"2498_CR70","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Wei, W., Hou, T., Pritch, Y., Wadhwa, N., Rubinstein, M., & Aberman, K. (2024) Hyperdreambooth: Hypernetworks for fast personalization of text-to-image models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6527\u20136536","DOI":"10.1109\/CVPR52733.2024.00624"},{"key":"2498_CR71","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E. L., Ghasemipour, K., Gontijo Lopes, R., Karagol Ayan, B., & Salimans, T. (2022). Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems, 35, 36479\u201336494.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"4","key":"2498_CR72","first-page":"4713","volume":"45","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Ho, J., Chan, W., Salimans, T., Fleet, D. J., & Norouzi, M. (2022). Image super-resolution via iterative refinement. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(4), 4713\u20134726.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2498_CR73","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., Beaumont, R., Vencu, R., Gordon, C., Wightman, R., Cherti, M., Coombes, T., Katta, A., Mullis, C., & Wortsman, M. (2022). Laion-5b: An open large-scale dataset for training next generation image-text models. Advances in Neural Information Processing Systems, 35, 25278\u201325294.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR74","first-page":"18126","volume":"34","author":"K Schwarz","year":"2021","unstructured":"Schwarz, K., Liao, Y., & Geiger, A. (2021). On the frequency bias of generative models. Advances in Neural Information Processing Systems, 34, 18126\u201318136.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR75","doi-asserted-by":"crossref","unstructured":"Shah, V., Ruiz, N., Cole, F., Lu, E., Lazebnik, S., Li, Y., & Jampani, V. (2024) Ziplora: Any subject in any style by effectively merging loras. In: European Conference on Computer Vision, pp. 422\u2013438. Springer","DOI":"10.1007\/978-3-031-73232-4_24"},{"key":"2498_CR76","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., & Ganguli, S. (2015) Deep unsupervised learning using nonequilibrium thermodynamics. In: ICLR, pp. 2256\u20132265"},{"key":"2498_CR77","unstructured":"Sohn, K., Ruiz, N., Lee, K., Chin, D. C., Blok, I., Chang, H., Barber, J., Jiang, L., Entis, G., & Li, Y. (2023) Styledrop: text-to-image generation in any style. In: Proceedings of the 37th International Conference on Neural Information Processing Systems, pp. 66860\u201366889"},{"key":"2498_CR78","unstructured":"Sohn, K., Shaw, A., Hao, Y., Zhang, H., Polania, L., Chang, H., Jiang, L., & Essa, I. (2023) Learning disentangled prompts for compositional image synthesis. arXiv preprint arXiv:2306.00763"},{"key":"2498_CR79","unstructured":"Tao, M., Tang, H., Wu, S., Sebe, N., Jing, X.-Y., Wu, F., & Bao, B. (2020) Df-gan: Deep fusion generative adversarial networks for text-to-image synthesis. arXiv preprint arXiv:2008.05865"},{"key":"2498_CR80","first-page":"1882","volume":"30","author":"N-T Tran","year":"2021","unstructured":"Tran, N.-T., Tran, V.-H., Nguyen, N.-B., Nguyen, T.-K., & Cheung, N.-M. (2021). On data augmentation for gan training. IEEE TIP, 30, 1882\u20131897.","journal-title":"IEEE TIP"},{"key":"2498_CR81","doi-asserted-by":"crossref","unstructured":"Tumanyan, N., Bar-Tal, O., Bagon, S., & Dekel, T. (2022) Splicing vit features for semantic appearance transfer. In: CVPR, pp. 10748\u201310757","DOI":"10.1109\/CVPR52688.2022.01048"},{"key":"2498_CR82","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, \u0141., & Polosukhin, I. (2017) Attention is all you need. Advances in neural information processing systems 30"},{"key":"2498_CR83","doi-asserted-by":"crossref","unstructured":"Wang, Y., Gonzalez-Garcia, A., Berga, D., Herranz, L., Khan, F. S., & Weijer, J.v.d. (2020) Minegan: Effective knowledge transfer from gans to target domains with few images. In: CVPR, pp. 9332\u20139341","DOI":"10.1109\/CVPR42600.2020.00935"},{"key":"2498_CR84","unstructured":"Wang, H., Spinelli, M., Wang, Q., Bai, X., Qin, Z., & Chen, A. (2024) Instantstyle: Free lunch towards style-preserving in text-to-image generation. arXiv preprint arXiv:2404.02733"},{"key":"2498_CR85","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wu, C., Herranz, L., Weijer, J., Gonzalez-Garcia, A., & Raducanu, B. (2018) Transferring gans: Generating images from limited data. In: ECCV, pp. 218\u2013234","DOI":"10.1007\/978-3-030-01231-1_14"},{"key":"2498_CR86","doi-asserted-by":"crossref","unstructured":"Wang, J., Yue, Z., Zhou, S., Chan, K. C., & Loy, C. C. (2024) Exploiting diffusion prior for real-world image super-resolution. International Journal of Computer Vision, 1\u201321","DOI":"10.1007\/s11263-024-02168-7"},{"key":"2498_CR87","first-page":"33387","volume":"35","author":"Z Wang","year":"2022","unstructured":"Wang, Z., Chi, Z., & Zhang, Y. (2022). Fregan: exploiting frequency components for training gans under limited data. Advances in Neural Information Processing Systems, 35, 33387\u201333399.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2498_CR88","doi-asserted-by":"crossref","unstructured":"Xiao, J., Li, L., Wang, C., Zha, Z.-J., & Huang, Q. (2022) Few shot generative model adaption via relaxed spatial structural alignment. In: CVPR, pp. 11204\u201311213","DOI":"10.1109\/CVPR52688.2022.01092"},{"key":"2498_CR89","doi-asserted-by":"crossref","unstructured":"Xie, J., Li, Y., Huang, Y., Liu, H., Zhang, W., Zheng, Y., & Shou, M.Z. (2023) Boxdiff: Text-to-image synthesis with training-free box-constrained diffusion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7452\u20137461","DOI":"10.1109\/ICCV51070.2023.00685"},{"key":"2498_CR90","doi-asserted-by":"crossref","unstructured":"Xie, S., Zhang, Z., Lin, Z., Hinz, T., & Zhang, K. (2023) Smartbrush: Text and shape guided object inpainting with diffusion model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22428\u201322437","DOI":"10.1109\/CVPR52729.2023.02148"},{"key":"2498_CR91","unstructured":"Xu, Y., Tang, F., Cao, J., Zhang, Y., Deussen, O., Dong, W., Li, J., & Lee, T.-Y. (2024) Break-for-make: Modular low-rank adaptations for composable content-style customization. arXiv preprint arXiv:2403.19456"},{"key":"2498_CR92","unstructured":"Xu, Y., Wang, Z., Xiao, J., Liu, W., & Chen, L. (2024) Freetuner: Any subject in any style with training-free diffusion. arXiv preprint arXiv:2405.14201"},{"key":"2498_CR93","doi-asserted-by":"crossref","unstructured":"Xue, H., Huang, Z., Sun, Q., Song, L., & Zhang, W. (2023) Freestyle layout-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14256\u201314266","DOI":"10.1109\/CVPR52729.2023.01370"},{"key":"2498_CR94","doi-asserted-by":"crossref","unstructured":"Yang, C., Shen, Y., Zhang, Z., Xu, Y., Zhu, J., Wu, Z., & Zhou, B. (2023) One-shot generative domain adaptation. In: ICCV, pp. 7733\u20137742","DOI":"10.1109\/ICCV51070.2023.00711"},{"key":"2498_CR95","doi-asserted-by":"crossref","unstructured":"Yang, M., Wang, Z., Chi, Z., & Feng, W. (2022) Wavegan: Frequency-aware gan for high-fidelity few-shot image generation. In: ECCV, pp. 1\u201317. Springer","DOI":"10.1007\/978-3-031-19784-0_1"},{"key":"2498_CR96","unstructured":"Yu, F., Seff, A., Zhang, Y., Song, S., Funkhouser, T., & Xiao, J. (2015) Lsun: Construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365"},{"key":"2498_CR97","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Huang, N., Tang, F., Huang, H., Ma, C., Dong, W., & Xu, C. (2023) Inversion-based style transfer with diffusion models. In: CVPR, pp. 10146\u201310156","DOI":"10.1109\/CVPR52729.2023.00978"},{"key":"2498_CR98","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A. A., Shechtman, E., & Wang, O. (2018) The unreasonable effectiveness of deep features as a perceptual metric. In: CVPR, pp. 586\u2013595","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2498_CR99","unstructured":"Zhang, Z., Liu, Y., Han, C., Guo, T., Yao, T., & Mei, T. (2022) Generalized one-shot domain adaption of generative adversarial networks. Advances in Neural Information Processing Systems"},{"key":"2498_CR100","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., & Agrawala, M. (2023) Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"2498_CR101","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Xie, Y., & Yang, L. (2018) Photographic text-to-image synthesis with a hierarchically-nested adversarial network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6199\u20136208","DOI":"10.1109\/CVPR.2018.00649"},{"key":"2498_CR102","unstructured":"Zhang, Y., Yao, M., Wei, Y., Ji, Z., Bai, J., & Zuo, W. (2022) Towards diverse and faithful one-shot adaption of generative adversarial networks. Advances in Neural Information Processing Systems"},{"key":"2498_CR103","unstructured":"Zhao, Y., Chandrasegaran, K., Abdollahzadeh, M., & Cheung, N.-M. (2022) Few-shot image generation via adaptation-aware kernel modulation. Advances in Neural Information Processing Systems"},{"key":"2498_CR104","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Ding, H., Huang, H., & Cheung, N. -M. (2022) A closer look at few-shot image generation. In: CVPR, pp. 9140\u20139150","DOI":"10.1109\/CVPR52688.2022.00893"},{"key":"2498_CR105","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Du, C., Abdollahzadeh, M., Pang, T., Lin, M., YAN, S., & Cheung, N.-M. (2023) Exploring incompatible knowledge transfer in few-shot image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","DOI":"10.1109\/CVPR52729.2023.00713"},{"key":"2498_CR106","unstructured":"Zhao, Z., Zhang, Z., Chen, T., Singh, S., & Zhang, H. (2020) Image augmentations for gan training. arXiv preprint arXiv:2006.02595"},{"key":"2498_CR107","unstructured":"Zhao, S., Liu, Z., Lin, J., Zhu, J.-Y., & Han, S. (2020). Differentiable augmentation for data-efficient gan training. Advances in Neural Information Processing Systems, 33, 7559\u20137570."},{"key":"2498_CR108","doi-asserted-by":"crossref","unstructured":"Zheng, G., Zhou, X., Li, X., Qi, Z., Shan, Y., & Li, X. (2023) Layoutdiffusion: Controllable diffusion model for layout-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22490\u201322499","DOI":"10.1109\/CVPR52729.2023.02154"},{"key":"2498_CR109","unstructured":"Zhu, P., Abdal, R., Femiani, J., & Wonka, P. (2021) Mind the gap: Domain gap control for single shot domain adaptation for generative adversarial networks. In: ICLR"},{"key":"2498_CR110","unstructured":"Zhu, J., Li, S., Liu, Y., Huang, P., Shan, J., Ma, H., & Yuan, J. (2024) Odgen: Domain-specific object detection data generation with diffusion models. In: Proceedings of the 38th International Conference on Neural Information Processing Systems"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02498-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02498-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02498-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T08:54:40Z","timestamp":1760086480000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02498-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,11]]},"references-count":110,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["2498"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02498-0","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,11]]},"assertion":[{"value":"7 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}