{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:47:37Z","timestamp":1774021657694,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62201484, 62441615, 624B2124"],"award-info":[{"award-number":["62201484, 62441615, 624B2124"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730663","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["FashionComposer: Compositional Fashion Image Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8552-4985","authenticated-orcid":false,"given":"Sihui","family":"Ji","sequence":"first","affiliation":[{"name":"The University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1352-5883","authenticated-orcid":false,"given":"Yiyang","family":"Wang","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5008-4720","authenticated-orcid":false,"given":"Xi","family":"Chen","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7928-7336","authenticated-orcid":false,"given":"Xiaogang","family":"Xu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6405-4011","authenticated-orcid":false,"given":"Hao","family":"Luo","sequence":"additional","affiliation":[{"name":"DAMO Academy, Alibaba Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8277-2706","authenticated-orcid":false,"given":"Hengshuang","family":"Zhao","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et\u00a0al. 2023a. Qwen technical report. arXiv:https:\/\/arXiv.org\/abs\/2309.16609 (2023)."},{"key":"e_1_3_3_2_3_1","unstructured":"Jinze Bai Shuai Bai Shusheng Yang Shijie Wang Sinan Tan Peng Wang Junyang Lin Chang Zhou and Jingren Zhou. 2023b. Qwen-vl: A versatile vision-language model for understanding localization text reading and beyond. arXiv:https:\/\/arXiv.org\/abs\/2308.12966 (2023)."},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"crossref","unstructured":"Mengting Chen Xi Chen Zhonghua Zhai Chen Ju Xuewen Hong Jinsong Lan and Shuai Xiao. 2024a. Wear-any-way: Manipulable virtual try-on via sparse correspondence alignment. arXiv:https:\/\/arXiv.org\/abs\/2403.12965 (2024).","DOI":"10.1007\/978-3-031-72630-9_8"},{"key":"e_1_3_3_2_5_1","unstructured":"Weifeng Chen Tao Gu Yuhao Xu and Chengcai Chen. 2024c. Magic Clothing: Controllable Garment-Driven Image Synthesis. arXiv:https:\/\/arXiv.org\/abs\/2404.09512 (2024)."},{"key":"e_1_3_3_2_6_1","unstructured":"Xi Chen Yutong Feng Mengting Chen Yiyang Wang Shilong Zhang Yu Liu Yujun Shen and Hengshuang Zhao. 2024b. Zero-shot Image Editing with Reference Imitation. arXiv:https:\/\/arXiv.org\/abs\/2406.07547 (2024)."},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00630"},{"key":"e_1_3_3_2_8_1","volume-title":"CVPR","author":"Cheng Bowen","year":"2021","unstructured":"Bowen Cheng, Ishan Misra, Alexander\u00a0G. Schwing, Alexander Kirillov, and Rohit Girdhar. 2021. Masked-attention Mask Transformer for Universal Image Segmentation. In CVPR."},{"key":"e_1_3_3_2_9_1","unstructured":"Yisol Choi Sangkyung Kwak Kyungmin Lee Hyungwon Choi and Jinwoo Shin. 2024. Improving diffusion models for virtual try-on. arXiv:https:\/\/arXiv.org\/abs\/2403.05139 (2024)."},{"key":"e_1_3_3_2_10_1","first-page":"8235","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Cui Aiyu","year":"2024","unstructured":"Aiyu Cui, Jay Mahajan, Viraj Shah, Preeti Gomathinayagam, Chang Liu, and Svetlana Lazebnik. 2024. Street tryon: Learning in-the-wild virtual try-on from unpaired person images. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8235\u20138239."},{"key":"e_1_3_3_2_11_1","volume-title":"ICLR","author":"Gal Rinon","year":"2023","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit\u00a0H Bermano, Gal Chechik, and Daniel Cohen-Or. 2023. An image is worth one word: Personalizing text-to-image generation using textual inversion. In ICLR."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612255"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"crossref","unstructured":"Yuchao Gu Xintao Wang Jay\u00a0Zhangjie Wu Yujun Shi Yunpeng Chen Zihan Fan Wuyou Xiao Rui Zhao Shuning Chang Weijia Wu et\u00a0al. 2023. Mix-of-show: Decentralized low-rank adaptation for multi-concept customization of diffusion models. Advances in Neural Information Processing Systems 36 (2023) 15890\u201315902.","DOI":"10.52202\/075280-0699"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00762"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00787"},{"key":"e_1_3_3_2_16_1","unstructured":"Li Hu Xin Gao Peng Zhang Ke Sun Bang Zhang and Liefeng Bo. 2023. Animate anyone: Consistent and controllable image-to-video synthesis for character animation. arXiv:https:\/\/arXiv.org\/abs\/2311.17117 (2023)."},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"crossref","unstructured":"Levon Khachatryan Andranik Movsisyan Vahram Tadevosyan Roberto Henschel Zhangyang Wang Shant Navasardyan and Humphrey Shi. 2023. Text2video-zero: Text-to-image diffusion models are zero-shot video generators. arXiv:https:\/\/arXiv.org\/abs\/2303.13439 (2023).","DOI":"10.1109\/ICCV51070.2023.01462"},{"key":"e_1_3_3_2_18_1","volume-title":"CVPR","author":"Kim Jeongho","year":"2024","unstructured":"Jeongho Kim, Guojung Gu, Minho Park, Sunghyun Park, and Jaegul Choo. 2024. Stableviton: Learning semantic correspondence with latent diffusion model for virtual try-on. In CVPR."},{"key":"e_1_3_3_2_19_1","unstructured":"Zhiheng Liu Ruili Feng Kai Zhu Yifei Zhang Kecheng Zheng Yu Liu Deli Zhao Jingren Zhou and Yang Cao. 2023. Cones: Concept neurons in diffusion models for customized generation. arXiv:https:\/\/arXiv.org\/abs\/2303.05125 (2023)."},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.124"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3596711.3596800"},{"key":"e_1_3_3_2_22_1","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv:https:\/\/arXiv.org\/abs\/1711.05101 (2017)."},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612137"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00243"},{"key":"e_1_3_3_2_25_1","volume-title":"ICML","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In ICML."},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00416"},{"key":"e_1_3_3_2_28_1","unstructured":"Fei Shen Xin Jiang Xin He Hu Ye Cong Wang Xiaoyu Du Zechao Li and Jinhui Tang. 2024. IMAGDressing-v1: Customizable Virtual Dressing. arXiv:https:\/\/arXiv.org\/abs\/2407.12705 (2024)."},{"key":"e_1_3_3_2_29_1","unstructured":"Quan Sun Yufeng Cui Xiaosong Zhang Fan Zhang Qiying Yu Zhengxiong Luo Yueze Wang Yongming Rao Jingjing Liu Tiejun Huang et\u00a0al. 2023. Generative multimodal models are in-context learners. arXiv:https:\/\/arXiv.org\/abs\/2312.13286 (2023)."},{"key":"e_1_3_3_2_30_1","first-page":"184","volume-title":"European Conference on Computer Vision","author":"Wan Siqi","year":"2024","unstructured":"Siqi Wan, Yehao Li, Jingwen Chen, Yingwei Pan, Ting Yao, Yang Cao, and Tao Mei. 2024. Improving Virtual Try-On with Garment-Focused Diffusion Models. In European Conference on Computer Vision. Springer, 184\u2013199."},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_36"},{"key":"e_1_3_3_2_32_1","unstructured":"Haoyu Wang Zhilu Zhang Donglin Di Shiliang Zhang and Wangmeng Zuo. 2024b. MV-VTON: Multi-View Virtual Try-On with Diffusion Models. arXiv:https:\/\/arXiv.org\/abs\/2404.17364 (2024)."},{"key":"e_1_3_3_2_33_1","unstructured":"Rui Wang Hailong Guo Jiaming Liu and Huaxia Li. 2024a. StableGarment: Garment-Centric Generation via Stable Diffusion. arXiv:https:\/\/arXiv.org\/abs\/2403.10783 (2024)."},{"key":"e_1_3_3_2_34_1","unstructured":"Guangxuan Xiao Tianwei Yin William\u00a0T Freeman Fr\u00e9do Durand and Song Han. 2023. FastComposer: Tuning-Free Multi-Subject Image Generation with Localized Attention. arXiv:https:\/\/arXiv.org\/abs\/2305.10431 (2023)."},{"key":"e_1_3_3_2_35_1","volume-title":"CVPR","author":"Xie Zhenyu","year":"2023","unstructured":"Zhenyu Xie, Zaiyu Huang, Xin Dong, Fuwei Zhao, Haoye Dong, Xijin Zhang, Feida Zhu, and Xiaodan Liang. 2023. GP-VTON: Towards General Purpose Virtual Try-on via Collaborative Local-Flow Global-Parsing Learning. In CVPR."},{"key":"e_1_3_3_2_36_1","unstructured":"Yuhao Xu Tao Gu Weifeng Chen and Chengcai Chen. 2024. Ootdiffusion: Outfitting fusion based latent diffusion for controllable virtual try-on. arXiv:https:\/\/arXiv.org\/abs\/2403.01779 (2024)."},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_3_2_38_1","unstructured":"Lu Yang Wenhe Jia Shan Li and Qing Song. 2023b. Deep Learning Technique for Human Parsing: A Survey and Outlook. arXiv:https:\/\/arXiv.org\/abs\/2301.00394 (2023)."},{"key":"e_1_3_3_2_39_1","unstructured":"Shilong Zhang Lianghua Huang Xi Chen Yifei Zhang Zhi-Fan Wu Yutong Feng Wei Wang Yujun Shen Yu Liu and Ping Luo. 2024. FlashFace: Human Image Personalization with High-fidelity Identity Preservation. arXiv:https:\/\/arXiv.org\/abs\/2403.17008 (2024)."},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00134"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00447"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730663","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:53:23Z","timestamp":1774018403000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730663"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":40,"alternative-id":["10.1145\/3721238.3730663","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730663","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}