{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T04:36:07Z","timestamp":1780374967959,"version":"3.54.1"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2025,4,13]],"date-time":"2025-04-13T00:00:00Z","timestamp":1744502400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,13]],"date-time":"2025-04-13T00:00:00Z","timestamp":1744502400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Scientific Research Platforms and Projects of Guangdong Provincial Education Department","award":["2022ZDZX1012"],"award-info":[{"award-number":["2022ZDZX1012"]}]},{"name":"Scientific Research Platforms and Projects of Guangdong Provincial Education Department","award":["2022ZDZX1012"],"award-info":[{"award-number":["2022ZDZX1012"]}]},{"name":"Scientific Research Platforms and Projects of Guangdong Provincial Education Department","award":["2022ZDZX1012"],"award-info":[{"award-number":["2022ZDZX1012"]}]},{"name":"Scientific Research Platforms and Projects of Guangdong Provincial Education Department","award":["2022ZDZX1012"],"award-info":[{"award-number":["2022ZDZX1012"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s00371-025-03884-y","type":"journal-article","created":{"date-parts":[[2025,4,13]],"date-time":"2025-04-13T11:38:56Z","timestamp":1744544336000},"page":"8553-8567","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Bidirectional feature modulation fusion for fashion design using latent diffusion models"],"prefix":"10.1007","volume":"41","author":[{"given":"Zibin","family":"Lu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jianhua","family":"Guo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shaopeng","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhixiang","family":"Yin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,4,13]]},"reference":[{"key":"3884_CR1","volume-title":"3D Fashion Design: Technique, Design and Visualization","author":"T Makryniotis","year":"2015","unstructured":"Makryniotis, T.: 3D Fashion Design: Technique, Design and Visualization. Batsford Books, London (2015)"},{"key":"3884_CR2","unstructured":"Shelly, G.B., Cashman, T.J., Starks, J.: Adobe Photoshop CS4: Comprehensive Concepts and Techniques. Course Technology\/Cengage Learning (2010)"},{"key":"3884_CR3","doi-asserted-by":"publisher","first-page":"2323","DOI":"10.1109\/TMM.2022.3146010","volume":"25","author":"H Yan","year":"2023","unstructured":"Yan, H., Zhang, H., Liu, L., Zhou, D., Xu, X., Zhang, Z., Yan, S.: Toward intelligent design: an ai-based fashion designer using generative adversarial networks aided by sketch and rendering generators. IEEE Trans. Multimedia 25, 2323\u20132338 (2023). https:\/\/doi.org\/10.1109\/TMM.2022.3146010","journal-title":"IEEE Trans. Multimedia"},{"key":"3884_CR4","doi-asserted-by":"publisher","unstructured":"Shi, J., Zhang, H., Zhou, D., Zhang, Z.: Toward intelligent interactive design: a generation framework based on cross-domain fashion elements. In: Proceedings of the 31st ACM International Conference on Multimedia. MM \u201923, pp. 7152\u20137163. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3581783.3612376","DOI":"10.1145\/3581783.3612376"},{"key":"3884_CR5","doi-asserted-by":"publisher","unstructured":"Yang, J., Guo, F., Chen, S., Li, J., Yang, J.: Industrial style transfer with large-scale geometric warping and content preservation. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7824\u20137833 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.00768","DOI":"10.1109\/CVPR52688.2022.00768"},{"key":"3884_CR6","doi-asserted-by":"crossref","unstructured":"Jiang, S., Fu, Y.R.: Fashion style generator. In: International Joint Conference on Artificial Intelligence (2017). https:\/\/api.semanticscholar.org\/CorpusID:33649558","DOI":"10.24963\/ijcai.2017\/520"},{"issue":"9","key":"3884_CR7","doi-asserted-by":"publisher","first-page":"4538","DOI":"10.1109\/TNNLS.2021.3057892","volume":"33","author":"S Jiang","year":"2022","unstructured":"Jiang, S., Li, J., Fu, Y.: Deep learning for fashion style generation. IEEE Trans. Neural Netw. Learn. Syst. 33(9), 4538\u20134550 (2022). https:\/\/doi.org\/10.1109\/TNNLS.2021.3057892","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"3884_CR8","doi-asserted-by":"crossref","unstructured":"Xian, W., Sangkloy, P., Agrawal, V., Raj, A., Lu, J., Fang, C., Yu, F., Hays, J.: Texturegan: Controlling deep image synthesis with texture patches. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00882"},{"key":"3884_CR9","doi-asserted-by":"crossref","unstructured":"AlBahar, B., Huang, J.-B.: Guided image-to-image translation with bi-directional feature transformation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00911"},{"key":"3884_CR10","doi-asserted-by":"publisher","unstructured":"Yan, H., Zhang, H., Shi, J., Ma, J., Xu, X.: Toward intelligent fashion design: A texture and shape disentangled generative adversarial network. ACM Trans. Multimedia Comput. Commun. Appl. 19(3) (2023). https:\/\/doi.org\/10.1145\/3567596","DOI":"10.1145\/3567596"},{"issue":"5","key":"3884_CR11","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1016\/j.vrih.2022.08.004","volume":"4","author":"L Istead","year":"2022","unstructured":"Istead, L., Istead, J., Pocol, A., Kaplan, C.S.: A simple, stroke-based method for gesture drawing. Virtual Reality Intell. Hardware 4(5), 381\u2013392 (2022)","journal-title":"Virtual Reality Intell. Hardware"},{"issue":"11","key":"3884_CR12","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"3884_CR13","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J.-Y., Zhou, T., Efros, A.A.: Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.632"},{"key":"3884_CR14","doi-asserted-by":"crossref","unstructured":"Wang, T.-C., Liu, M.-Y., Zhu, J.-Y., Tao, A., Kautz, J., Catanzaro, B.: High-resolution image synthesis and semantic manipulation with conditional gans. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00917"},{"key":"3884_CR15","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M.-Y., Wang, T.-C., Zhu, J.-Y.: Semantic image synthesis with spatially-adaptive normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00244"},{"issue":"10","key":"3884_CR16","doi-asserted-by":"publisher","first-page":"5346","DOI":"10.1109\/TNNLS.2021.3070463","volume":"33","author":"P Li","year":"2021","unstructured":"Li, P., Sheng, B., Chen, C.P.: Face sketch synthesis using regularized broad learning system. IEEE Trans. Neural Netw. Learn. Syst. 33(10), 5346\u20135360 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"3884_CR17","doi-asserted-by":"crossref","unstructured":"Zhu, J.-Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networkss. In: 2017 IEEE International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.244"},{"key":"3884_CR18","doi-asserted-by":"crossref","unstructured":"Huang, X., Liu, M.-Y., Belongie, S., Kautz, J.: Multimodal unsupervised image-to-image translation. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01219-9_11"},{"key":"3884_CR19","doi-asserted-by":"crossref","unstructured":"Choi, Y., Uh, Y., Yoo, J., Ha, J.-W.: Stargan v2: Diverse image synthesis for multiple domains. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00821"},{"issue":"12","key":"3884_CR20","doi-asserted-by":"publisher","first-page":"3216","DOI":"10.1109\/TVCG.2018.2866090","volume":"25","author":"B Sheng","year":"2018","unstructured":"Sheng, B., Li, P., Gao, C., Ma, K.-L.: Deep neural representation guided face sketch synthesis. IEEE Trans. Visualiz. Comput. Graphics 25(12), 3216\u20133230 (2018)","journal-title":"IEEE Trans. Visualiz. Comput. Graphics"},{"key":"3884_CR21","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M.F., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol. 33, pp. 6840\u20136851. Curran Associates, Inc., (2020). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"3884_CR22","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"3884_CR23","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 139, pp. 8162\u20138171. PMLR (2021). https:\/\/proceedings.mlr.press\/v139\/nichol21a.html"},{"key":"3884_CR24","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"3884_CR25","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv:2204.06125 (2022)"},{"key":"3884_CR26","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E.L., Ghasemipour, K., Gontijo\u00a0Lopes, R., Karagol\u00a0Ayan, B., Salimans, T., Ho, J., Fleet, D.J., Norouzi, M.: Photorealistic text-to-image diffusion models with deep language understanding. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems, vol. 35, pp. 36479\u201336494. Curran Associates, Inc., (2022). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/ec795aeadae0b7d230fa35cbaf04c041-Paper-Conference.pdf"},{"key":"3884_CR27","unstructured":"Nichol, A., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., McGrew, B., Sutskever, I., Chen, M.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. In: International Conference on Machine Learning (2021). https:\/\/api.semanticscholar.org\/CorpusID:245335086"},{"key":"3884_CR28","doi-asserted-by":"publisher","unstructured":"Saharia, C., Chan, W., Chang, H., Lee, C., Ho, J., Salimans, T., Fleet, D., Norouzi, M.: Palette: Image-to-image diffusion models. In: ACM SIGGRAPH 2022 Conference Proceedings. SIGGRAPH \u201922. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3528233.3530757","DOI":"10.1145\/3528233.3530757"},{"key":"3884_CR29","unstructured":"Kwon, G., Ye, J.C.: Diffusion-based image translation using disentangled style and content representation. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=Nayau9fwXU"},{"key":"3884_CR30","doi-asserted-by":"crossref","unstructured":"Cho, H., Lee, J., Chang, S., Jeong, Y.: One-shot structure-aware stylized image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8302\u20138311 (2024)","DOI":"10.1109\/CVPR52733.2024.00793"},{"key":"3884_CR31","doi-asserted-by":"publisher","unstructured":"Xia, M., Zhou, Y., Yi, R., Liu, Y.-J., Wang, W.: A diffusion model translator for efficient image-to-image translation. IEEE Trans. Pattern Anal. Mach. Intell., pp. 1\u201312 (2024). https:\/\/doi.org\/10.1109\/TPAMI.2024.3435448","DOI":"10.1109\/TPAMI.2024.3435448"},{"key":"3884_CR32","doi-asserted-by":"publisher","first-page":"3962","DOI":"10.1109\/TMM.2023.3318297","volume":"26","author":"S Cao","year":"2024","unstructured":"Cao, S., Chai, W., Hao, S., Zhang, Y., Chen, H., Wang, G.: Difffashion: reference-based fashion design with structure-aware transfer by diffusion models. IEEE Trans. Multimedia 26, 3962\u20133975 (2024). https:\/\/doi.org\/10.1109\/TMM.2023.3318297","journal-title":"IEEE Trans. Multimedia"},{"key":"3884_CR33","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16x16 words: transformers for image recognition at scale. ICLR (2021)"},{"key":"3884_CR34","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. In: Ranzato, M., Beygelzimer, A., Dauphin, Y., Liang, P.S., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems, vol. 34, pp. 8780\u20138794. Curran Associates, Inc., (2021). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2021\/file\/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf"},{"key":"3884_CR35","doi-asserted-by":"crossref","unstructured":"Cheng, S.-I., Chen, Y.-J., Chiu, W.-C., Tseng, H.-Y., Lee, H.-Y.: Adaptively-realistic image generation from stroke and sketch with diffusion model. In: IEEE Winter Conference on Applications of Computer Vision (WACV) (2023)","DOI":"10.1109\/WACV56688.2023.00404"},{"issue":"3","key":"3884_CR36","doi-asserted-by":"publisher","first-page":"2248","DOI":"10.1002\/cav.2248","volume":"35","author":"W Zhao","year":"2024","unstructured":"Zhao, W., Zhu, J., Huang, J., Li, P., Sheng, B.: Gan-based multi-decomposition photo cartoonization. Comput. Animat. Virtual Worlds 35(3), 2248 (2024)","journal-title":"Comput. Animat. Virtual Worlds"},{"key":"3884_CR37","doi-asserted-by":"crossref","unstructured":"Ledig, C., Theis, L., Huszar, F., Caballero, J., Cunningham, A., Acosta, A., Aitken, A., Tejani, A., Totz, J., Wang, Z., Shi, W.: Photo-realistic single image super-resolution using a generative adversarial network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.19"},{"key":"3884_CR38","doi-asserted-by":"crossref","unstructured":"Chen, J., Zhang, X., Ma, L., Yang, B., Zhang, K.: Cs-viton: a realistic virtual try-on network based on clothing region alignment and spm. Vis. Comput., pp. 1\u201315 (2024)","DOI":"10.1007\/s00371-024-03347-w"},{"key":"3884_CR39","doi-asserted-by":"crossref","unstructured":"Tong, S., Liu, H., Guo, R., Wang, W., Liu, D.: Context-aware enhanced virtual try-on network with fabric adaptive registration. Vis. Comput., pp. 1\u201317 (2024)","DOI":"10.1007\/s00371-024-03432-0"},{"issue":"8","key":"3884_CR40","doi-asserted-by":"publisher","first-page":"3347","DOI":"10.1007\/s00371-023-02999-4","volume":"39","author":"X Hu","year":"2023","unstructured":"Hu, X., Zheng, C., Huang, J., Luo, R., Liu, J., Peng, T.: Cloth texture preserving image-based 3d virtual try-on. Vis. Comput. 39(8), 3347\u20133357 (2023)","journal-title":"Vis. Comput."},{"key":"3884_CR41","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1016\/j.optlaseng.2019.06.020","volume":"122","author":"Y He","year":"2019","unstructured":"He, Y., Song, K., Dong, H., Yan, Y.: Semi-supervised defect classification of steel surface based on multi-training and generative adversarial network. Opt. Lasers Eng. 122, 294\u2013302 (2019)","journal-title":"Opt. Lasers Eng."},{"issue":"7","key":"3884_CR42","doi-asserted-by":"publisher","first-page":"9725","DOI":"10.1109\/TII.2024.3383513","volume":"20","author":"K Song","year":"2024","unstructured":"Song, K., Feng, H., Cao, T., Cui, W., Yan, Y.: Mfanet: Multifeature aggregation network for cross-granularity few-shot seamless steel tubes surface defect segmentation. IEEE Trans. Ind. Informat. 20(7), 9725\u20139735 (2024). https:\/\/doi.org\/10.1109\/TII.2024.3383513","journal-title":"IEEE Trans. Ind. Informat."},{"key":"3884_CR43","unstructured":"Liu, M.-Y., Breuel, T., Kautz, J.: Unsupervised image-to-image translation networks. In: Guyon, I., Luxburg, U.V., Bengio, S., Wallach, H., Fergus, R., Vishwanathan, S., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 30. Curran Associates, Inc., (2017). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/dc6a6489640ca02b0d42dabeb8e46bb7-Paper.pdf"},{"key":"3884_CR44","doi-asserted-by":"crossref","unstructured":"Choi, Y., Choi, M., Kim, M., Ha, J.-W., Kim, S., Choo, J.: Stargan: Unified generative adversarial networks for multi-domain image-to-image translation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00916"},{"key":"3884_CR45","unstructured":"Kim, J., Kim, M., Kang, H., Lee, K.H.: U-gat-it: Unsupervised generative attentional networks with adaptive layer-instance normalization for image-to-image translation. In: International Conference on Learning Representations (2020). https:\/\/openreview.net\/forum?id=BJlZ5ySKPH"},{"key":"3884_CR46","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: Bach, F., Blei, D. (eds.) Proceedings of the 32nd International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 37, pp. 2256\u20132265. PMLR, Lille, France (2015). https:\/\/proceedings.mlr.press\/v37\/sohl-dickstein15.html"},{"key":"3884_CR47","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=PxTIG12RRHS"},{"key":"3884_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"3884_CR49","doi-asserted-by":"publisher","unstructured":"Tumanyan, N., Bar-Tal, O., Bagon, S., Dekel, T.: Splicing vit features for semantic appearance transfer. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10738\u201310747 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01048","DOI":"10.1109\/CVPR52688.2022.01048"},{"key":"3884_CR50","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. In: NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications (2021). https:\/\/openreview.net\/forum?id=qw8AKxfYbI"},{"key":"3884_CR51","doi-asserted-by":"publisher","unstructured":"Sun, Z., Zhou, Y., He, H., Mok, P.Y.: Sgdiff: A style guided diffusion model for fashion synthesis. In: Proceedings of the 31st ACM International Conference on Multimedia. MM \u201923, pp. 8433\u20138442. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3581783.3613806","DOI":"10.1145\/3581783.3613806"},{"key":"3884_CR52","doi-asserted-by":"crossref","unstructured":"Baldrati, A., Morelli, D., Cartella, G., Cornia, M., Bertini, M., Cucchiara, R.: Multimodal garment designer: Human-centric latent diffusion models for fashion image editing. 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), 23336\u201323345 (2023)","DOI":"10.1109\/ICCV51070.2023.02138"},{"key":"3884_CR53","doi-asserted-by":"crossref","unstructured":"Baldrati, A., Morelli, D., Cornia, M., Bertini, M., Cucchiara, R.: Multimodal-conditioned latent diffusion models for fashion image editing. arXiv preprint arXiv:2403.14828 (2024)","DOI":"10.1109\/ICCV51070.2023.02138"},{"key":"3884_CR54","unstructured":"Wang, X., Cheng, Z.-Q., Wang, J., Peng, X.: DPDEdit: Detail-Preserved Diffusion Models for Multimodal Fashion Image Editing (2024). arXiv:2409.01086"},{"key":"3884_CR55","unstructured":"Kingma, D.P.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"3884_CR56","doi-asserted-by":"crossref","unstructured":"\"Xie, S., Tu, Z.: Holistically-nested edge detection. In: Proceedings of IEEE International Conference on Computer Vision (2015)","DOI":"10.1109\/ICCV.2015.164"},{"key":"3884_CR57","doi-asserted-by":"publisher","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: Learning to follow image editing instructions. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18392\u201318402 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01764","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"3884_CR58","doi-asserted-by":"crossref","unstructured":"Vasileva, M.I., Plummer, B.A., Dusad, K., Rajpal, S., Kumar, R., Forsyth, D.: Learning type-aware embeddings for fashion compatibility. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01270-0_24"},{"key":"3884_CR59","doi-asserted-by":"crossref","unstructured":"Zou, X., Pang, K., Zhang, W., Wong, W.: How good is aesthetic ability of a fashion model? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21200\u201321209 (2022)","DOI":"10.1109\/CVPR52688.2022.02052"},{"key":"3884_CR60","doi-asserted-by":"crossref","unstructured":"Qin, X., Zhang, Z., Huang, C., Gao, C., Dehghan, M., Jagersand, M.: Basnet: Boundary-aware salient object detection. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00766"},{"key":"3884_CR61","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. In: Guyon, I., Luxburg, U.V., Bengio, S., Wallach, H., Fergus, R., Vishwanathan, S., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 30. Curran Associates, Inc., (2017). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/8a1d694707eb0fefe65871369074926d-Paper.pdf"},{"key":"3884_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"3884_CR63","doi-asserted-by":"crossref","unstructured":"Gatys, L.A.: A neural algorithm of artistic style. arXiv preprint arXiv:1508.06576 (2015)","DOI":"10.1167\/16.12.326"},{"key":"3884_CR64","doi-asserted-by":"crossref","unstructured":"Afifi, M., Brubaker, M.A., Brown, M.S.: Histogan: Controlling colors of gan-generated and real images via color histograms. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7941\u20137950 (2021)","DOI":"10.1109\/CVPR46437.2021.00785"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03884-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-03884-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-03884-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T10:56:58Z","timestamp":1757156218000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-03884-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,13]]},"references-count":64,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["3884"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-03884-y","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,13]]},"assertion":[{"value":"11 March 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 April 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This work does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}]}}