{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T09:27:49Z","timestamp":1780392469431,"version":"3.54.1"},"reference-count":147,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21B2024"],"award-info":[{"award-number":["U21B2024"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61902277"],"award-info":[{"award-number":["61902277"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s11263-024-02305-2","type":"journal-article","created":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T18:03:19Z","timestamp":1733853799000},"page":"2692-2720","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Image-Based Virtual Try-On: A Survey"],"prefix":"10.1007","volume":"133","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5864-0276","authenticated-orcid":false,"given":"Dan","family":"Song","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xuanpu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Juan","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weizhi","family":"Nie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ruofeng","family":"Tong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mohan","family":"Kankanhalli","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"An-An","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,12,10]]},"reference":[{"key":"2305_CR1","unstructured":"Adhikari, S., Bhusal, B., Ghimire, P., et al. (2023) VTON-IT: Virtual try-on using image translation. arXiv preprint arXiv:2310.04558"},{"key":"2305_CR2","doi-asserted-by":"crossref","unstructured":"Alisha, A., Amaldev, C., Aysha Dilna, D., et al. (2022). Photo-realistic virtual try-on with enhanced warping module. In Sentimental analysis and deep learning: Proceedings of ICSADL 2021, pp. 851\u2013862.","DOI":"10.1007\/978-981-16-5157-1_66"},{"key":"2305_CR3","doi-asserted-by":"crossref","unstructured":"Ayush, K., Jandial, S., Chopra, A., et al. (2019). Powering virtual try-on via auxiliary human segmentation learning. In Proceedings of the IEEE\/CVF international conference on computer vision workshops, pp. 3193\u20133196.","DOI":"10.1109\/ICCVW.2019.00397"},{"key":"2305_CR4","doi-asserted-by":"crossref","unstructured":"Ayush, K., Jandial, S., Chopra, A., et al. (2019). Robust cloth warping via multi-scale patch adversarial loss for virtual try-on framework. In Proceedings of the IEEE\/CVF international conference on computer vision workshops, pp. 1279\u20131281.","DOI":"10.1109\/ICCVW.2019.00161"},{"key":"2305_CR5","doi-asserted-by":"crossref","unstructured":"Bai, S., Zhou, H., Li, Z., et al. (2022). Single stage virtual try-on via deformable attention flows. In European conference on computer vision, pp. 409\u2013425.","DOI":"10.1007\/978-3-031-19784-0_24"},{"key":"2305_CR6","doi-asserted-by":"crossref","unstructured":"Baldrati, A., Morelli, D., Cartella, G., et al. (2023). Multimodal garment designer: Human-centric latent diffusion models for fashion image editing. In Proceedings of the IEEE\/CVF international conference on computer vision, Paris, France, October 1\u20136, 2023. IEEE, pp. 23336\u201323345.","DOI":"10.1109\/ICCV51070.2023.02138"},{"issue":"4","key":"2305_CR7","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1109\/34.993558","volume":"24","author":"S Belongie","year":"2002","unstructured":"Belongie, S., Malik, J., & Puzicha, J. (2002). Shape matching and object recognition using shape contexts. IEEE Transactions on Pattern Analysis and Machine Intelligence, 24(4), 509\u2013522.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"5","key":"2305_CR8","doi-asserted-by":"publisher","first-page":"1712","DOI":"10.1007\/s11263-020-01424-w","volume":"129","author":"Y Benny","year":"2021","unstructured":"Benny, Y., Galanti, T., Benaim, S., et al. (2021). Evaluation metrics for conditional image generation. International Journal of Computer Vision, 129(5), 1712\u20131731.","journal-title":"International Journal of Computer Vision"},{"key":"2305_CR9","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S. E., et al. (2017). Realtime multi-person 2d pose estimation using part affinity fields. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7291\u20137299.","DOI":"10.1109\/CVPR.2017.143"},{"issue":"4","key":"2305_CR10","doi-asserted-by":"publisher","first-page":"46:1","DOI":"10.1145\/2766943","volume":"34","author":"C Cao","year":"2015","unstructured":"Cao, C., Bradley, D., Zhou, K., et al. (2015). Real-time high-fidelity facial performance capture. ACM Transactions on Graphics, 34(4), 46:1-46:9.","journal-title":"ACM Transactions on Graphics"},{"issue":"4","key":"2305_CR11","doi-asserted-by":"publisher","first-page":"41:1","DOI":"10.1145\/2461912.2462012","volume":"32","author":"C Cao","year":"2013","unstructured":"Cao, C., Weng, Y., Lin, S., et al. (2013). 3D shape regression for real-time facial animation. ACM Transactions on Graphics, 32(4), 41:1-41:10.","journal-title":"ACM Transactions on Graphics"},{"issue":"3","key":"2305_CR12","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1109\/TVCG.2013.249","volume":"20","author":"C Cao","year":"2014","unstructured":"Cao, C., Weng, Y., Zhou, S., et al. (2014). Facewarehouse: A 3d facial expression database for visual computing. IEEE Transactions on Visualization and Computer Graphics, 20(3), 413\u2013425.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"issue":"4","key":"2305_CR13","doi-asserted-by":"publisher","first-page":"126:1","DOI":"10.1145\/2897824.2925873","volume":"35","author":"C Cao","year":"2016","unstructured":"Cao, C., Wu, H., Weng, Y., et al. (2016). Real-time facial animation with image-based dynamic avatars. ACM Transactions on Graphics, 35(4), 126:1-126:12.","journal-title":"ACM Transactions on Graphics"},{"key":"2305_CR14","doi-asserted-by":"crossref","unstructured":"Chang, Y., Peng, T., Yu, F., et al. (2022). VTNCT: An image-based virtual try-on network by combining feature with pixel transformation. The Visual Computer, 1\u201314.","DOI":"10.1007\/s00371-022-02480-8"},{"key":"2305_CR15","doi-asserted-by":"crossref","unstructured":"Chen, C. Y., Chen, Y. C., Shuai, H. H., et al. (2023). Size does matter: Size-aware virtual try-on via clothing-oriented transformation try-on network. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 7513\u20137522.","DOI":"10.1109\/ICCV51070.2023.00691"},{"issue":"4","key":"2305_CR16","doi-asserted-by":"publisher","first-page":"88:1","DOI":"10.1145\/2461912.2461941","volume":"32","author":"Z Chen","year":"2013","unstructured":"Chen, Z., Feng, R., & Wang, H. (2013). Modeling friction and air effects between cloth and deformable bodies. ACM Transactions on Graphics, 32(4), 88:1-88:8.","journal-title":"ACM Transactions on Graphics"},{"key":"2305_CR17","doi-asserted-by":"crossref","unstructured":"Choi, S., Park, S., Lee, M., et al. (2021). Viton-hd: High-resolution virtual try-on via misalignment-aware normalization. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 14131\u201314140.","DOI":"10.1109\/CVPR46437.2021.01391"},{"key":"2305_CR18","doi-asserted-by":"publisher","first-page":"104568","DOI":"10.1016\/j.imavis.2022.104568","volume":"127","author":"Z Chong","year":"2022","unstructured":"Chong, Z., & Mo, L. (2022). ST-VTON: Self-supervised vision transformer for image-based virtual try-on. Image and Vision Computing, 127, 104568.","journal-title":"Image and Vision Computing"},{"key":"2305_CR19","doi-asserted-by":"crossref","unstructured":"Chopra, A., Jain, R., Hemani, M., et al. (2021). Zflow: Gated appearance flow-based virtual try-on with 3d priors. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 5433\u20135442.","DOI":"10.1109\/ICCV48922.2021.00538"},{"key":"2305_CR20","doi-asserted-by":"crossref","unstructured":"Cordier, F., Lee, W., Seo, H., et al. (2001). From 2D photos of yourself to virtual try-on dress on the web. In People and computers XV\u2014interaction without frontiers: Joint proceedings of HCI 2001 and IHM 2001, pp. 31\u201346.","DOI":"10.1007\/978-1-4471-0353-0_3"},{"key":"2305_CR21","unstructured":"Cui, A., Mahajan, J., Shah, V., et al. (2023). Street tryon: Learning in-the-wild virtual try-on from unpaired person images. arXiv preprint arXiv:2311.16094"},{"key":"2305_CR22","doi-asserted-by":"crossref","unstructured":"Cui, A., McKee, D., & Lazebnik, S. (2021). Dressing in order: Recurrent person image generation for pose transfer, virtual try-on and outfit editing. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 14638\u201314647.","DOI":"10.1109\/ICCV48922.2021.01437"},{"key":"2305_CR23","doi-asserted-by":"crossref","unstructured":"Dong, H., Liang, X., Shen, X., et al. (2019). Towards multi-pose guided virtual try-on network. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 9026\u20139035.","DOI":"10.1109\/ICCV.2019.00912"},{"key":"2305_CR24","doi-asserted-by":"crossref","unstructured":"Duchon, J. (1977). Splines minimizing rotation-invariant semi-norms in Sobolev spaces. In Constructive theory of functions of several variables, pp. 85\u2013100.","DOI":"10.1007\/BFb0086566"},{"issue":"4","key":"2305_CR25","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1007\/s41095-021-0264-2","volume":"8","author":"C Du","year":"2022","unstructured":"Du, C., Yu, F., Jiang, M., et al. (2022). High fidelity virtual try-on network via semantic adaptation and distributed componentization. Computational Visual Media, 8(4), 649\u2013663.","journal-title":"Computational Visual Media"},{"key":"2305_CR26","doi-asserted-by":"crossref","unstructured":"Fele, B., Lampe, A., Peer, P., et al. (2022). C-vton: Context-driven image-based virtual try-on network. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp. 3144\u20133153.","DOI":"10.1109\/WACV51458.2022.00226"},{"key":"2305_CR27","doi-asserted-by":"crossref","unstructured":"Feng, R., Ma, C., Shen, C., et al. (2022). Weakly supervised high-fidelity clothing model generation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3440\u20133449.","DOI":"10.1109\/CVPR52688.2022.00343"},{"key":"2305_CR28","doi-asserted-by":"crossref","unstructured":"Fincato, M., Landi, F., Cornia, M., et al. (2021). VITON-GT: An image-based virtual try-on model with geometric transformations. In 2020 25th international conference on pattern recognition (ICPR), pp. 7669\u20137676.","DOI":"10.1109\/ICPR48806.2021.9412052"},{"key":"2305_CR29","doi-asserted-by":"crossref","unstructured":"Fu, J., Li, S., Jiang, Y., et al. (2022). Stylegan-human: A data-centric odyssey of human generation. In European conference on computer vision, pp. 1\u201319.","DOI":"10.1007\/978-3-031-19787-1_1"},{"key":"2305_CR30","doi-asserted-by":"crossref","unstructured":"Gao, X., Liu, Z., Feng, Z., et al. (2021). Shape controllable virtual try-on for underwear models. In Proceedings of the 29th ACM international conference on multimedia, pp. 563\u2013572.","DOI":"10.1145\/3474085.3475210"},{"key":"2305_CR31","doi-asserted-by":"crossref","unstructured":"Ge, C., Song, Y., Ge, Y., et al. (2021a). Disentangled cycle consistency for highly-realistic virtual try-on. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 16928\u201316937.","DOI":"10.1109\/CVPR46437.2021.01665"},{"key":"2305_CR32","doi-asserted-by":"crossref","unstructured":"Ge, Y., Song, Y., Zhang, R., et al. (2021b). Parser-free virtual try-on via distilling appearance flows. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 8485\u20138493.","DOI":"10.1109\/CVPR46437.2021.00838"},{"issue":"14","key":"2305_CR33","doi-asserted-by":"publisher","first-page":"19967","DOI":"10.1007\/s11042-022-12802-6","volume":"81","author":"H Ghodhbani","year":"2022","unstructured":"Ghodhbani, H., Neji, M., Razzak, I., et al. (2022). You can try without visiting: A comprehensive survey on virtually try-on outfits. Multimedia Tools and Applications, 81(14), 19967\u201319998.","journal-title":"Multimedia Tools and Applications"},{"key":"2305_CR34","doi-asserted-by":"crossref","unstructured":"Gong, K., Liang, X., Li, Y., et al. (2018). Instance-level human parsing via part grouping network. In Proceedings of the European conference on computer vision (ECCV), pp. 770\u2013785.","DOI":"10.1007\/978-3-030-01225-0_47"},{"key":"2305_CR35","doi-asserted-by":"crossref","unstructured":"Gou, J., Sun, S., Zhang, J., et al. (2023). Taming the power of diffusion models for high-quality virtual try-on with appearance flow. In Mei, T., Cucchiara, R., El-Saddik, A., et al. (Eds.), pp. 7599\u20137607. MM. ACM: ACM.","DOI":"10.1145\/3581783.3612255"},{"key":"2305_CR36","doi-asserted-by":"crossref","unstructured":"G\u00fcler, R. A., Neverova, N., & Kokkinos, I. (2018). Densepose: Dense human pose estimation in the wild. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7297\u20137306.","DOI":"10.1109\/CVPR.2018.00762"},{"issue":"2","key":"2305_CR37","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1007\/s11263-021-01550-z","volume":"130","author":"C Guo","year":"2022","unstructured":"Guo, C., Zuo, X., Wang, S., et al. (2022). Action2video: Generating videos of human 3d actions. International Journal of Computer Vision, 130(2), 285\u2013315.","journal-title":"International Journal of Computer Vision"},{"key":"2305_CR38","doi-asserted-by":"crossref","unstructured":"Han, X., Hu, X., Huang, W., et al. (2019). Clothflow: A flow-based model for clothed person generation. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10471\u201310480.","DOI":"10.1109\/ICCV.2019.01057"},{"key":"2305_CR39","doi-asserted-by":"crossref","unstructured":"Han, X., Wu, Z., Wu, Z., et al. (2018). Viton: An image-based virtual try-on network. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7543\u20137552.","DOI":"10.1109\/CVPR.2018.00787"},{"key":"2305_CR40","doi-asserted-by":"publisher","first-page":"91603","DOI":"10.1109\/ACCESS.2020.2993574","volume":"8","author":"MF Hashmi","year":"2020","unstructured":"Hashmi, M. F., Ashish, B. K. K., Keskar, A. G., et al. (2020). Fashionfit: Analysis of mapping 3d pose and neural body fit for custom virtual try-on. IEEE Access, 8, 91603\u201391615.","journal-title":"IEEE Access"},{"key":"2305_CR41","doi-asserted-by":"crossref","unstructured":"He, S., Song, Y. Z., & Xiang, T. (2022). Style-based global appearance flow for virtual try-on. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3470\u20133479.","DOI":"10.1109\/CVPR52688.2022.00346"},{"key":"2305_CR42","first-page":"6626","volume":"30","author":"M Heusel","year":"2017","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., et al. (2017). Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in Neural Information Processing Systems, 30, 6626\u20136637.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"9","key":"2305_CR43","doi-asserted-by":"publisher","first-page":"2589","DOI":"10.1109\/TVCG.2017.2755646","volume":"24","author":"X He","year":"2018","unstructured":"He, X., Wang, H., & Wu, E. (2018). Projective peridynamics for modeling versatile elastoplastic materials. IEEE Transactions on Visualization and Computer Graphics, 24(9), 2589\u20132599.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"2305_CR44","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2305_CR45","unstructured":"Honda, S. (2019). Viton-gan: Virtual try-on image generator trained with adversarial loss. Eurographics, 9\u201310."},{"key":"2305_CR46","doi-asserted-by":"crossref","unstructured":"Huang, Q., Zhang, Z., Lu, T., et al. (2021). Cross-category virtual try-on technology research based on PF-AFN. In Proceedings of the 2021 5th international conference on video and image processing, pp. 162\u2013169.","DOI":"10.1145\/3511176.3511201"},{"key":"2305_CR47","first-page":"32736","volume":"35","author":"Z Huang","year":"2022","unstructured":"Huang, Z., Li, H., Xie, Z., et al. (2022). Towards hard-pose virtual try-on via 3d-aware global correspondence learning. Advances in Neural Information Processing Systems, 35, 32736\u201332748.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2305_CR48","doi-asserted-by":"crossref","unstructured":"Issenhuth, T., Mary, J., & Calauzenes, C. (2020). Do not mask what you do not need to mask: a parser-free virtual try-on. In Computer vision\u2013ECCV 2020: 16th European conference, pp. 619\u2013635.","DOI":"10.1007\/978-3-030-58565-5_37"},{"key":"2305_CR49","first-page":"2017","volume":"28","author":"M Jaderberg","year":"2015","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., et al. (2015). Spatial transformer networks. Advances in Neural Information Processing Systems, 28, 2017\u20132025.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2305_CR50","doi-asserted-by":"crossref","unstructured":"Jandial, S., Chopra, A., Ayush, K., et al. (2020). Sievenet: A unified framework for robust image-based virtual try-on. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp. 2182\u20132190.","DOI":"10.1109\/WACV45572.2020.9093458"},{"key":"2305_CR51","doi-asserted-by":"crossref","unstructured":"Jetchev, N., & Bergmann, U. (2017). The conditional analogy gan: Swapping fashion articles on people images. In Proceedings of the IEEE international conference on computer vision workshops, pp. 2287\u20132292.","DOI":"10.1109\/ICCVW.2017.269"},{"key":"2305_CR52","doi-asserted-by":"crossref","unstructured":"Jong, A., Moh, M., & Moh, T. S. (2020). Virtual try-on with generative adversarial networks: A taxonomical survey. In Advancements in computer vision applications in intelligent systems and multimedia technologies, pp. 76\u2013100.","DOI":"10.4018\/978-1-7998-4444-0.ch005"},{"key":"2305_CR53","doi-asserted-by":"crossref","unstructured":"Joo, H., Simon, T., & Sheikh, Y. (2018). Total capture: A 3d deformation model for tracking faces, hands, and bodies. In Proceedings of the IEEE conference on computer vision and pattern recognition, Computer Vision Foundation\/IEEE Computer Society, pp. 8320\u20138329.","DOI":"10.1109\/CVPR.2018.00868"},{"key":"2305_CR54","unstructured":"Kang, T., Park, S., Choi, S., et al. (2021). Data augmentation using random image cropping for high-resolution virtual try-on (VITON-CROP). arXiv preprint arXiv:2111.08270"},{"key":"2305_CR55","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., & Aila, T. (2019). A style-based generator architecture for generative adversarial networks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4401\u20134410.","DOI":"10.1109\/CVPR.2019.00453"},{"key":"2305_CR56","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., et al. (2020). Analyzing and improving the image quality of stylegan. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 8107\u20138116.","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"2305_CR57","doi-asserted-by":"crossref","unstructured":"Kim, J., Gu, G., Park, M., et al. (2024). Stableviton: Learning semantic correspondence with latent diffusion model for virtual try-on. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 8176\u20138185.","DOI":"10.1109\/CVPR52733.2024.00781"},{"key":"2305_CR58","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems"},{"key":"2305_CR59","doi-asserted-by":"crossref","unstructured":"Kubo, S., Iwasawa, Y., Suzuki, M., et al. (2019). UVTON: UV mapping to consider the 3d structure of a human in image-based virtual try-on network. In Proceedings of the IEEE\/CVF international conference on computer vision workshops, pp. 3105\u20133108.","DOI":"10.1109\/ICCVW.2019.00375"},{"key":"2305_CR60","doi-asserted-by":"crossref","unstructured":"Kumar, S., & Sinha, N. (2022). Probing tryongan. In Proceedings of the 5th joint international conference on data science & management of data (9th ACM IKDD CODS and 27th COMAD), pp. 300\u2013301.","DOI":"10.1145\/3493700.3493751"},{"key":"2305_CR61","doi-asserted-by":"crossref","unstructured":"Lee, S., Gu, G., Park, S., et al. (2022). High-resolution virtual try-on with misalignment and occlusion-handled conditions. In European conference on computer vision, pp. 204\u2013219.","DOI":"10.1007\/978-3-031-19790-1_13"},{"key":"2305_CR62","doi-asserted-by":"crossref","unstructured":"Lee, H. J., Lee, R., Kang, M., et al. (2019). LA-VITON: A network for looking-attractive virtual try-on. In Proceedings of the IEEE\/CVF international conference on computer vision workshops, pp. 3129\u20133132.","DOI":"10.1109\/ICCVW.2019.00381"},{"key":"2305_CR63","doi-asserted-by":"crossref","unstructured":"Lewis, K. M., Varadharajan, S., & Kemelmacher-Shlizerman, I. (2021a). Tryongan: Body-aware try-on via layered interpolation. ACM Transactions on Graphics,40(4), 1\u201310.","DOI":"10.1145\/3450626.3459884"},{"key":"2305_CR64","unstructured":"Lewis, K. M., Varadharajan, S., & Kemelmacher-Shlizerman, I. (2021b). VOGUE: Try-on by stylegan interpolation optimization. arXiv preprint arXiv:2101.02285"},{"key":"2305_CR65","unstructured":"Li, K., Chong, M. J., Liu, J., et al. (2020). Toward accurate and realistic virtual try-on through shape matching and multiple warps. arXiv preprint arXiv:2003.10817"},{"key":"2305_CR66","doi-asserted-by":"crossref","unstructured":"Li, K., Chong, M. J., Zhang, J., et al. (2021). Toward accurate and realistic outfits visualization with attention to details. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 15546\u201315555.","DOI":"10.1109\/CVPR46437.2021.01529"},{"key":"2305_CR67","doi-asserted-by":"crossref","unstructured":"Li, Y., Huang, C., & Loy, C. C. (2019). Dense intrinsic appearance flow for human pose transfer. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3693\u20133702.","DOI":"10.1109\/CVPR.2019.00381"},{"key":"2305_CR68","doi-asserted-by":"crossref","unstructured":"Li, N., Liu, Q., Singh, K. K., et al. (2024b). Unihuman: A unified model for editing human images in the wild. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2039\u20132048.","DOI":"10.1109\/CVPR52733.2024.00199"},{"key":"2305_CR69","doi-asserted-by":"crossref","unstructured":"Li, Z., Wei, P., Yin, X., et al. (2023b). Virtual try-on with pose-garment keypoints guided inpainting. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 22788\u201322797.","DOI":"10.1109\/ICCV51070.2023.02083"},{"key":"2305_CR70","doi-asserted-by":"crossref","unstructured":"Li, K., Zhang, J., & Forsyth, D. A. (2023a). Povnet: Image-based virtual try-on through accurate warping and residual. IEEE Transactions on Pattern Analysis and Machine Intelligence,45(10), 12222\u201312235.","DOI":"10.1109\/TPAMI.2023.3283302"},{"key":"2305_CR71","doi-asserted-by":"crossref","unstructured":"Li, K., Zhang, J., Chang, S., et al. (2024a). Controlling virtual try-on pipeline through rendering policies. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp. 5854\u20135836.","DOI":"10.1109\/WACV57701.2024.00576"},{"key":"2305_CR72","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02042-6","author":"H Liang","year":"2024","unstructured":"Liang, H., Zhang, W., Li, W., et al. (2024). Intergen: Diffusion-based multi-human motion generation under complex interactions. International Journal of Computer Vision. https:\/\/doi.org\/10.1007\/s11263-024-02042-6","journal-title":"International Journal of Computer Vision"},{"key":"2305_CR73","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Goyal, P., Girshick, R., et al. (2017). Focal loss for dense object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2980\u20132988.","DOI":"10.1109\/ICCV.2017.324"},{"key":"2305_CR74","doi-asserted-by":"crossref","unstructured":"Lin, C., Li, Z., Zhou, S., et al. (2022). RMGN: A regional mask guided network for parser-free virtual try-on. In International joint conference on artificial intelligence, pp. 1151\u20131158.","DOI":"10.24963\/ijcai.2022\/161"},{"key":"2305_CR75","doi-asserted-by":"crossref","unstructured":"Lin, J., Zhang, R., Ganz, F., et al. (2021). Anycost gans for interactive image synthesis and editing. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 14986\u201314996.","DOI":"10.1109\/CVPR46437.2021.01474"},{"key":"2305_CR76","doi-asserted-by":"crossref","unstructured":"Lin, A., Zhao, N., Ning, S., et al. (2023). Fashiontex: Controllable virtual try-on with text and texture. In ACM SIGGRAPH 2023 conference proceedings, pp. 1\u20139.","DOI":"10.1145\/3588432.3591568"},{"key":"2305_CR77","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, P., Qiu, S., et al. (2016). Deepfashion: Powering robust clothes recognition and retrieval with rich annotations. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1096\u20131104.","DOI":"10.1109\/CVPR.2016.124"},{"key":"2305_CR78","doi-asserted-by":"crossref","unstructured":"Liu, G., Song, D., Tong, R., et al. (2021). Toward realistic virtual try-on through landmark guided shape matching. In Proceedings of the AAAI conference on artificial intelligence, pp. 2118\u20132126.","DOI":"10.1609\/aaai.v35i3.16309"},{"key":"2305_CR79","unstructured":"Liu, Y., Zhao, M., Zhang, Z., et al. (2021b). Arbitrary virtual try-on network: Characteristics preservation and trade-off between body and clothing. arXiv preprint arXiv:2111.12346"},{"issue":"6","key":"2305_CR80","doi-asserted-by":"publisher","first-page":"3260","DOI":"10.1109\/TPAMI.2020.3048039","volume":"44","author":"P Li","year":"2022","unstructured":"Li, P., Xu, Y., Wei, Y., et al. (2022). Self-correction for human parsing. IEEE Transactions on Pattern Analysis and Machine Intelligence, 44(6), 3260\u20133271.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"6","key":"2305_CR81","doi-asserted-by":"publisher","first-page":"248:1","DOI":"10.1145\/2816795.2818013","volume":"34","author":"M Loper","year":"2015","unstructured":"Loper, M., Mahmood, N., Romero, J., et al. (2015). SMPL: A skinned multi-person linear model. ACM Transactions on Graphics, 34(6), 248:1-248:16.","journal-title":"ACM Transactions on Graphics"},{"key":"2305_CR82","first-page":"406","volume":"30","author":"L Ma","year":"2017","unstructured":"Ma, L., Jia, X., Sun, Q., et al. (2017). Pose guided person image generation. Advances in Neural Information Processing Systems, 30, 406\u2013416.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2305_CR83","doi-asserted-by":"crossref","unstructured":"Mandhana, V., Agrawal, T., & Sardana, A. (2021). Ndnet: natural deformation of apparel for better virtual try-on experience. In Proceedings of the 36th annual ACM symposium on applied computing, pp. 960\u2013966.","DOI":"10.1145\/3412841.3441971"},{"key":"2305_CR84","doi-asserted-by":"crossref","unstructured":"Mao, X., Li, Q., Xie, H., et al. (2017). Least squares generative adversarial networks. In Proceedings of the IEEE international conference on computer vision, pp. 2794\u20132802.","DOI":"10.1109\/ICCV.2017.304"},{"key":"2305_CR85","unstructured":"Minar, M. R., Tuan, T. T., Ahn, H., et al. (2020). Cp-vton+: Clothing shape and texture preserving image-based virtual try-on. In CVPR workshops, pp. 10\u201314."},{"key":"2305_CR86","doi-asserted-by":"crossref","unstructured":"Morelli, D., Baldrati, A., Cartella, G., et al. (2023). Ladi-vton: Latent diffusion textual-inversion enhanced virtual try-on. In Mei, T., Cucchiara, R., & El-Saddik, A., et al. (Eds.), pp. 8580\u20138589. MM. ACM: ACM.","DOI":"10.1145\/3581783.3612137"},{"key":"2305_CR87","doi-asserted-by":"crossref","unstructured":"Morelli, D., Fincato, M., Cornia, M., et al. (2022). Dress code: High-resolution multi-category virtual try-on. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2231\u20132235.","DOI":"10.1109\/CVPRW56347.2022.00243"},{"key":"2305_CR88","doi-asserted-by":"crossref","unstructured":"Neuberger, A., Borenstein, E., Hilleli, B., et al. (2020). Image based virtual try-on network from unpaired data. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 5184\u20135193.","DOI":"10.1109\/CVPR42600.2020.00523"},{"key":"2305_CR89","doi-asserted-by":"crossref","unstructured":"Nguyen-Ngoc, K., Phan-Nguyen, T., Le, K., et al. (2023). DM-VTON: Distilled mobile real-time virtual try-on. In 2023 IEEE international symposium on mixed and augmented reality adjunct (ISMAR-adjunct), pp. 695\u2013700.","DOI":"10.1109\/ISMAR-Adjunct60411.2023.00149"},{"key":"2305_CR90","unstructured":"Nichol, A. Q., & Dhariwal, P. (2021). Improved denoising diffusion probabilistic models. In International conference on machine learning, pp. 8162\u20138171."},{"key":"2305_CR91","doi-asserted-by":"crossref","unstructured":"Ning, S., Wang, D., Qin, Y., et al. (2024). PICTURE: Photorealistic virtual try-on from unconstrained designs. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 6976\u20136985.","DOI":"10.1109\/CVPR52733.2024.00666"},{"key":"2305_CR92","doi-asserted-by":"crossref","unstructured":"Park, S., & Park, J. (2022) WG-VITON: Wearing-guide virtual try-on for top and bottom clothes. arXiv preprint arXiv:2205.04759","DOI":"10.2139\/ssrn.4379142"},{"key":"2305_CR93","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M. Y., Wang, T. C., et al. (2019). Semantic image synthesis with spatially-adaptive normalization. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2337\u20132346.","DOI":"10.1109\/CVPR.2019.00244"},{"key":"2305_CR94","doi-asserted-by":"crossref","unstructured":"Pathak, S., Kaushik, V., & Lall, B. (2023). Single stage warped cloth learning and semantic-contextual attention feature fusion for virtual tryon. arXiv preprint arXiv:2310.05024","DOI":"10.1109\/ICME57554.2024.10687502"},{"key":"2305_CR95","doi-asserted-by":"crossref","unstructured":"Pecenakova, S., Karessli, N., & Shirvany, R. (2022). Fitgan: Fit-and shape-realistic generative adversarial networks for fashion. In 2022 26th international conference on pattern recognition (ICPR), pp. 3097\u20133104.","DOI":"10.1109\/ICPR56361.2022.9956089"},{"key":"2305_CR96","unstructured":"Pernus, M., Fookes, C., Struc, V., et al. (2023). FICE: Text-conditioned fashion image editing with guided GAN inversion. arXiv preprint arXiv:2301.02110"},{"issue":"2","key":"2305_CR97","first-page":"186","volume":"23","author":"DL Pham","year":"2020","unstructured":"Pham, D. L., Ngyuen, N. T., & Chung, S. T. (2020). Keypoints-based 2D virtual try-on network system. Journal of Korea Multimedia Society, 23(2), 186\u2013203.","journal-title":"Journal of Korea Multimedia Society"},{"key":"2305_CR98","unstructured":"Radford, A., Kim, J. W., Hallacy, C., et al. (2021). Learning transferable visual models from natural language supervision. In International conference on machine learning, pp. 8748\u20138763."},{"key":"2305_CR99","doi-asserted-by":"crossref","unstructured":"Raffiee, A. H., & Sollami, M. (2021). Garmentgan: Photo-realistic adversarial fashion transfer. In 2020 25th international conference on pattern recognition (ICPR), pp. 3923\u20133930.","DOI":"10.1109\/ICPR48806.2021.9412908"},{"key":"2305_CR100","unstructured":"Raj, A., Sangkloy, P., Chang, H., et al. (2018). Swapnet: Garment transfer in single view images. In Proceedings of the European conference on computer vision (ECCV), pp. 666\u2013682."},{"key":"2305_CR101","unstructured":"Ren, B., Tang, H., Meng, F., et al. (2021). Cloth interactive transformer for virtual try-on. arXiv preprint arXiv:2104.05519"},{"key":"2305_CR102","doi-asserted-by":"crossref","unstructured":"Ren, Y., Yu, X., Chen, J., et al. (2020). Deep image spatial transformation for person image generation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 7690\u20137699.","DOI":"10.1109\/CVPR42600.2020.00771"},{"issue":"4","key":"2305_CR103","doi-asserted-by":"publisher","first-page":"92:1","DOI":"10.1145\/3617374","volume":"20","author":"B Ren","year":"2024","unstructured":"Ren, B., Tang, H., Meng, F., et al. (2024). Cloth interactive transformer for virtual try-on. ACM Transactions on Multimedia Computing, Communications and Applications, 20(4), 92:1-92:20.","journal-title":"ACM Transactions on Multimedia Computing, Communications and Applications"},{"key":"2305_CR104","doi-asserted-by":"crossref","unstructured":"Ronneberger, O. (2017). Invited talk: U-net convolutional networks for biomedical image segmentation. In Proceedings des Workshops vom 12. bis 14. M\u00e4rz 2017 in Heidelberg, Springer, p. 3.","DOI":"10.1007\/978-3-662-54345-0_3"},{"key":"2305_CR105","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., & Brox, T. (2015). U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention\u2013MICCAI 2015: 18th international conference, pp. 234\u2013241.","DOI":"10.1007\/978-3-319-24574-4_28"},{"issue":"3","key":"2305_CR106","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1145\/1015706.1015720","volume":"23","author":"C Rother","year":"2004","unstructured":"Rother, C., Kolmogorov, V., & Blake, A. (2004). \u201cGrabcut\u2019\u2019 interactive foreground extraction using iterated graph cuts. ACM Transactions on Graphics, 23(3), 309\u2013314.","journal-title":"ACM Transactions on Graphics"},{"key":"2305_CR107","unstructured":"Roy, D., Mukherjee, D., Chanda, B. (2022a). Significance of skeleton-based features in virtual try-on. arXiv preprint arXiv:2208.08076"},{"key":"2305_CR108","doi-asserted-by":"crossref","unstructured":"Roy, D., Santra, S., & Chanda, B. (2022b). LGVTON: A landmark guided approach for model to person virtual try-on. Multimedia Tools and Applications,81(4), 5051\u20135087.","DOI":"10.1007\/s11042-021-11647-9"},{"key":"2305_CR109","first-page":"2226","volume":"29","author":"T Salimans","year":"2016","unstructured":"Salimans, T., Goodfellow, I. J., Zaremba, W., et al. (2016). Improved techniques for training gans. Advances in Neural Information Processing Systems, 29, 2226\u20132234.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2305_CR110","unstructured":"Seyfioglu, M. S., Bouyarmane, K., Kumar, S., et al. (2023). Dreampaint: Few-shot inpainting of e-commerce items for virtual try-on without 3D modeling. arXiv preprint arXiv:2305.01257"},{"key":"2305_CR111","first-page":"4856","volume-title":"AAAI 2024","author":"S Shim","year":"2024","unstructured":"Shim, S., Chung, J., & Heo, J. (2024). Towards squeezing-averse virtual try-on via sequential deformation. In M. J. Wooldridge, J. G. Dy, & S. Natarajan (Eds.), AAAI 2024 (pp. 4856\u20134863). AAAI Press."},{"key":"2305_CR112","unstructured":"Simonyan, K., Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In International conference on learning representations (ICLR)"},{"key":"2305_CR113","unstructured":"Song, J., Meng, C., Ermon, S. (2021). Denoising diffusion implicit models. In International conference on learning representations (ICLR)"},{"key":"2305_CR114","doi-asserted-by":"publisher","first-page":"33757","DOI":"10.1007\/s11042-019-08363-w","volume":"79","author":"D Song","year":"2020","unstructured":"Song, D., Li, T., Mao, Z., et al. (2020). Sp-viton: Shape-preserving image-based virtual try-on network. Multimedia Tools and Applications, 79, 33757\u201333769.","journal-title":"Multimedia Tools and Applications"},{"issue":"7","key":"2305_CR115","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1111\/cgf.13012","volume":"35","author":"D Song","year":"2016","unstructured":"Song, D., Tong, R., Chang, J., et al. (2016). 3D body shapes estimation from dressed-human silhouettes. Computer Graphics Forum, 35(7), 147\u2013156.","journal-title":"Computer Graphics Forum"},{"key":"2305_CR116","doi-asserted-by":"publisher","first-page":"27939","DOI":"10.1109\/ACCESS.2018.2837147","volume":"6","author":"D Song","year":"2018","unstructured":"Song, D., Tong, R., Du, J., et al. (2018). Data-driven 3-D human body customization with a mobile device. IEEE Access, 6, 27939\u201327948.","journal-title":"IEEE Access"},{"key":"2305_CR117","doi-asserted-by":"crossref","unstructured":"Sun, F., Guo, J., Su, Z., et al. (2019a). Image-based virtual try-on network with structural coherence. In 2019 IEEE international conference on image processing (ICIP), pp. 519\u2013523.","DOI":"10.1109\/ICIP.2019.8803811"},{"key":"2305_CR118","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., et al. (2019b). Deep high-resolution representation learning for human pose estimation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 5693\u20135703.","DOI":"10.1109\/CVPR.2019.00584"},{"key":"2305_CR119","doi-asserted-by":"crossref","unstructured":"Tang, M., Liu, Z., Tong, R., et al. (2018b). PSCC: Parallel self-collision culling with spatial hashing on GPUs. Proceedings of the ACM on Computer Graphics and Interactive Techniques,1(1), 18:1-18:18.","DOI":"10.1145\/3203188"},{"key":"2305_CR120","doi-asserted-by":"crossref","unstructured":"Tang, M., Wang, T., Liu, Z., et al. (2018a). I-cloth: Incremental collision handling for GPU-based interactive cloth simulation. ACM Transactions on Graphics,37(6), 204.","DOI":"10.1145\/3272127.3275005"},{"issue":"2","key":"2305_CR121","doi-asserted-by":"publisher","first-page":"511","DOI":"10.1111\/cgf.12851","volume":"35","author":"M Tang","year":"2016","unstructured":"Tang, M., Wang, H., Tang, L., et al. (2016). CAMA: Contact-aware matrix assembly with unified collision handling for GPU-based cloth simulation. Computer Graphics Forum, 35(2), 511\u2013521.","journal-title":"Computer Graphics Forum"},{"key":"2305_CR122","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et al. (2017). Attention is all you need. Advances in Neural Information Processing Systems, 5998\u20136008."},{"key":"2305_CR123","doi-asserted-by":"crossref","unstructured":"Wang, B., Zheng, H., Liang, X., et al. (2018). Toward characteristic-preserving image-based virtual try-on network. In Proceedings of the European conference on computer vision (ECCV), pp. 589\u2013604.","DOI":"10.1007\/978-3-030-01261-8_36"},{"issue":"4","key":"2305_CR124","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A. C., Sheikh, H. R., et al. (2004). Image quality assessment: From error visibility to structural similarity. IEEE Transactions on Image Processing, 13(4), 600\u2013612.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2305_CR125","doi-asserted-by":"publisher","first-page":"40899","DOI":"10.1109\/ACCESS.2022.3167509","volume":"10","author":"T Wang","year":"2022","unstructured":"Wang, T., Gu, X., & Zhu, J. (2022). A flow-based generative network for photo-realistic virtual try-on. IEEE Access, 10, 40899\u201340909.","journal-title":"IEEE Access"},{"issue":"4","key":"2305_CR126","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1145\/2010324.1964966","volume":"30","author":"H Wang","year":"2011","unstructured":"Wang, H., O\u2019Brien, J. F., & Ramamoorthi, R. (2011). Data-driven elastic models for cloth: Modeling and measurement. ACM Transactions on Graphics, 30(4), 71.","journal-title":"ACM Transactions on Graphics"},{"key":"2305_CR127","doi-asserted-by":"crossref","unstructured":"Xie, Z., Huang, Z., Dong, X., et al. (2023). Gp-vton: Towards general purpose virtual try-on via collaborative local-flow global-parsing learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 23550\u201323559.","DOI":"10.1109\/CVPR52729.2023.02255"},{"key":"2305_CR128","unstructured":"Xie, Z., Huang, Z., Zhao, F., et al. (2021a). Towards scalable unpaired virtual try-on via patch-routed spatially-adaptive gan. Advances in Neural Information Processing Systems,34, 2598\u20132610."},{"key":"2305_CR129","unstructured":"Xie, Z., Huang, Z., Zhao, F., et al. (2022). PASTA-GAN++: A versatile framework for high-resolution unpaired virtual try-on. arXiv preprint arXiv:2207.13475"},{"key":"2305_CR130","doi-asserted-by":"crossref","unstructured":"Xie, Z., Zhang, X., Zhao, F., et al. (2021b). Was-vton: Warping architecture search for virtual try-on network. In Proceedings of the 29th ACM international conference on multimedia, pp. 3350\u20133359.","DOI":"10.1145\/3474085.3475490"},{"key":"2305_CR131","doi-asserted-by":"publisher","first-page":"2222","DOI":"10.1109\/TMM.2021.3070972","volume":"23","author":"J Xu","year":"2021","unstructured":"Xu, J., Pu, Y., Nie, R., et al. (2021). Virtual try-on network with attribute transformation and local rendering. IEEE Transactions on Multimedia, 23, 2222\u20132234.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2305_CR132","doi-asserted-by":"crossref","unstructured":"Yan, K., Gao, T., Zhang, H., et al. (2023). Linking garment with person via semantically associated landmarks for virtual try-on. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 17194\u201317204.","DOI":"10.1109\/CVPR52729.2023.01649"},{"key":"2305_CR133","doi-asserted-by":"crossref","unstructured":"Yang, X., Ding, C., Hong, Z., et al. (2024b). Texture-preserving diffusion models for high-fidelity virtual try-on. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR52733.2024.00670"},{"key":"2305_CR134","doi-asserted-by":"publisher","unstructured":"Yang, L., Jia, W., Li, S., et al. (2024a). Deep learning technique for human parsing: A survey and outlook. International Journal of Computer Vision. https:\/\/doi.org\/10.1007\/s11263-024-02031-9","DOI":"10.1007\/s11263-024-02031-9"},{"key":"2305_CR135","doi-asserted-by":"crossref","unstructured":"Yang, H., Yu, X., & Liu, Z. (2022). Full-range virtual try-on with recurrent tri-level transform. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3460\u20133469.","DOI":"10.1109\/CVPR52688.2022.00345"},{"key":"2305_CR136","doi-asserted-by":"crossref","unstructured":"Yang, H., Zhang, R., Guo, X., et al. (2020). Towards photo-realistic virtual try-on by adaptively generating-preserving image content. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 7850\u20137859.","DOI":"10.1109\/CVPR42600.2020.00787"},{"key":"2305_CR137","doi-asserted-by":"publisher","first-page":"1477","DOI":"10.1109\/TMM.2023.3234399","volume":"25","author":"Z Yang","year":"2023","unstructured":"Yang, Z., Chen, J., Shi, Y., et al. (2023). Occlumix: Towards de-occlusion virtual try-on by semantically-guided mixup. IEEE Transactions on Multimedia, 25, 1477\u20131488.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2305_CR138","doi-asserted-by":"crossref","unstructured":"Yu, R., Wang, X., & Xie, X. (2019). Vtnfp: An image-based virtual try-on network with body and clothing feature preservation. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10511\u201310520.","DOI":"10.1109\/ICCV.2019.01061"},{"key":"2305_CR139","doi-asserted-by":"crossref","unstructured":"Zeng, J., Song, D., Nie, W., et al. (2024). CAT-DM: Controllable accelerated virtual try-on with diffusion model. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 8372\u20138382.","DOI":"10.1109\/CVPR52733.2024.00800"},{"key":"2305_CR140","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A. A., et\u00a0al. (2018). The unreasonable effectiveness of deep features as a perceptual metric. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 586\u2013595.","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2305_CR141","doi-asserted-by":"publisher","first-page":"1731","DOI":"10.1109\/TMM.2023.3286278","volume":"26","author":"S Zhang","year":"2023","unstructured":"Zhang, S., Han, X., Zhang, W., et al. (2023). Limb-aware virtual try-on network with progressive clothing warping. IEEE Transactions on Multimedia, 26, 1731\u20131746.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2305_CR142","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1016\/j.patcog.2019.01.011","volume":"90","author":"R Zhang","year":"2019","unstructured":"Zhang, R., Yang, W., Peng, Z., et al. (2019). Progressively diffused networks for semantic visual parsing. Pattern Recognition, 90, 78\u201386.","journal-title":"Pattern Recognition"},{"key":"2305_CR143","unstructured":"Zhou, H., Lan, T., & Venkataramani, G. (2021). PT-VTON: An image-based virtual try-on network with progressive pose attention transfer. arXiv preprint arXiv:2111.12167"},{"key":"2305_CR144","doi-asserted-by":"crossref","unstructured":"Zhu, X., Lei, Z., Yan, J., et al. (2015). High-fidelity pose and expression normalization for face recognition in the wild. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 787\u2013796.","DOI":"10.1109\/CVPR.2015.7298679"},{"key":"2305_CR145","doi-asserted-by":"crossref","unstructured":"Zhu, J. Y., Park, T., Isola, P., et al. (2017). Unpaired image-to-image translation using cycle-consistent adversarial networks. In Proceedings of the IEEE international conference on computer vision, pp. 2223\u20132232.","DOI":"10.1109\/ICCV.2017.244"},{"key":"2305_CR146","doi-asserted-by":"crossref","unstructured":"Zhu, L., Yang, D., Zhu, T., et al. (2023). Tryondiffusion: A tale of two unets. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4606\u20134615.","DOI":"10.1109\/CVPR52729.2023.00447"},{"issue":"1","key":"2305_CR147","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1109\/TPAMI.2017.2778152","volume":"41","author":"X Zhu","year":"2019","unstructured":"Zhu, X., Liu, X., Lei, Z., et al. (2019). Face alignment in full pose range: A 3D total solution. IEEE Transactions on Pattern Analysis and Machine Intelligence, 41(1), 78\u201392.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02305-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02305-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02305-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,17]],"date-time":"2025-04-17T06:01:43Z","timestamp":1744869703000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02305-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,10]]},"references-count":147,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["2305"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02305-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,10]]},"assertion":[{"value":"1 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no Conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}