{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T18:15:00Z","timestamp":1774721700240,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["2021B1515020088"],"award-info":[{"award-number":["2021B1515020088"]}]},{"name":"Shenzhen Research Institute of Big Data Research Foundation","award":["T00120210002"],"award-info":[{"award-number":["T00120210002"]}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20210324131203009"],"award-info":[{"award-number":["JCYJ20210324131203009"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972112,61832004,62106211"],"award-info":[{"award-number":["61972112,61832004,62106211"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"HITSZ-J\\&A Joint Laboratory of Digital Design and Intelligent Fabrication","award":["HITSZ-J\\&A-2021A01"],"award-info":[{"award-number":["HITSZ-J\\&A-2021A01"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612127","type":"proceedings-article","created":{"date-parts":[[2023,11,2]],"date-time":"2023-11-02T10:35:09Z","timestamp":1698921309000},"page":"1401-1411","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["FashionDiff: A Controllable Diffusion Model Using Pairwise Fashion Elements for Intelligent Design"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7369-526X","authenticated-orcid":false,"given":"Han","family":"Yan","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1648-0227","authenticated-orcid":false,"given":"Haijun","family":"Zhang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6344-4803","authenticated-orcid":false,"given":"Xiangyu","family":"Mu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9665-0355","authenticated-orcid":false,"given":"Jicong","family":"Fan","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Shenzhen &amp; Shenzhen Research Institute of Big Data, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5703-7969","authenticated-orcid":false,"given":"Zhao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Hefei University of Technology, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Ilvr: Conditioning method for denoising diffusion probabilistic models. arXiv preprint arXiv:2108.02938","author":"Choi Jooyoung","year":"2021","unstructured":"Jooyoung Choi, Sungwon Kim, Yonghyun Jeong, Youngjune Gwon, and Sungroh Yoon. 2021. Ilvr: Conditioning method for denoising diffusion probabilistic models. arXiv preprint arXiv:2108.02938 (2021)."},{"key":"e_1_3_2_1_2_1","volume-title":"Stargan: Unified generative adversarial networks for multi-domain image-to-image translation. In CVPR. 8789--8797.","author":"Choi Yunjey","year":"2018","unstructured":"Yunjey Choi, Minje Choi, Munyoung Kim, Jung-Woo Ha, Sunghun Kim, and Jaegul Choo. 2018. Stargan: Unified generative adversarial networks for multi-domain image-to-image translation. In CVPR. 8789--8797."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"e_1_3_2_1_4_1","volume-title":"Cheng Ying Gao, and Zhongbo Su","author":"Cui Yi Rui","year":"2018","unstructured":"Yi Rui Cui, Qi Liu, Cheng Ying Gao, and Zhongbo Su. 2018. Fashiongan: Display your fashion design using conditional generative adversarial nets. In Computer Graphics Forum, Vol. 37. Wiley Online Library, 109--119."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475511"},{"key":"e_1_3_2_1_6_1","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in Neural Information Processing Systems, Vol. 34 (2021), 8780--8794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Haoye Dong Xiaodan Liang Yixuan Zhang Xujie Zhang Xiaohui Shen Zhenyu Xie Bowen Wu and Jian Yin. 2020. Fashion editing with adversarial parsing learning. In CVPR. 8120--8128.","DOI":"10.1109\/CVPR42600.2020.00814"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_9_1","volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_10_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, Vol. 33 (2020), 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_11"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3057892"},{"key":"e_1_3_2_1_14_1","unstructured":"Junho Kim Minjae Kim Hyeonwoo Kang and Kwang Hee Lee. 2020. U-GAT-IT: Unsupervised Generative Attentional Networks with Adaptive Layer-Instance Normalization for Image-to-Image Translation. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=BJlZ5ySKPH"},{"key":"e_1_3_2_1_15_1","volume-title":"Unsupervised image-to-image translation networks. Advances in neural information processing systems","author":"Liu Ming-Yu","year":"2017","unstructured":"Ming-Yu Liu, Thomas Breuel, and Jan Kautz. 2017. Unsupervised image-to-image translation networks. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00653"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_1_18_1","volume-title":"Sdedit: Image synthesis and editing with stochastic differential equations. arXiv preprint arXiv:2108.01073","author":"Meng Chenlin","year":"2021","unstructured":"Chenlin Meng, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, and Stefano Ermon. 2021. Sdedit: Image synthesis and editing with stochastic differential equations. arXiv preprint arXiv:2108.01073 (2021)."},{"key":"e_1_3_2_1_19_1","volume-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741","author":"Nichol Alex","year":"2021","unstructured":"Alex Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob McGrew, Ilya Sutskever, and Mark Chen. 2021. Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)."},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Machine Learning. PMLR, 8162--8171","author":"Nichol Alexander Quinn","year":"2021","unstructured":"Alexander Quinn Nichol and Prafulla Dhariwal. 2021. Improved denoising diffusion probabilistic models. In International Conference on Machine Learning. PMLR, 8162--8171."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"e_1_3_2_1_22_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530757"},{"key":"e_1_3_2_1_25_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022b. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems, Vol. 35 (2022), 36479--36494.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_26_1","volume-title":"MIDMs: Matching Interleaved Diffusion Models for Exemplar-based Image Translation. arXiv preprint arXiv:2209.11047","author":"Seo Junyoung","year":"2022","unstructured":"Junyoung Seo, Gyuseong Lee, Seokju Cho, Jiyoung Lee, and Seungryong Kim. 2022. MIDMs: Matching Interleaved Diffusion Models for Exemplar-based Image Translation. arXiv preprint arXiv:2209.11047 (2022)."},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Machine Learning. PMLR, 2256--2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International Conference on Machine Learning. PMLR, 2256--2265."},{"key":"e_1_3_2_1_28_1","volume-title":"Generative modeling by estimating gradients of the data distribution. Advances in neural information processing systems","author":"Song Yang","year":"2019","unstructured":"Yang Song and Stefano Ermon. 2019. Generative modeling by estimating gradients of the data distribution. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456","author":"Song Yang","year":"2020","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2020. Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)."},{"key":"e_1_3_2_1_30_1","volume-title":"Sketch-Guided Text-to-Image Diffusion Models. arXiv preprint arXiv:2211.13752","author":"Voynov Andrey","year":"2022","unstructured":"Andrey Voynov, Kfir Aberman, and Daniel Cohen-Or. 2022. Sketch-Guided Text-to-Image Diffusion Models. arXiv preprint arXiv:2211.13752 (2022)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Ting-Chun Wang Ming-Yu Liu Jun-Yan Zhu Andrew Tao Jan Kautz and Bryan Catanzaro. 2018. High-resolution image synthesis and semantic manipulation with conditional gans. In CVPR. 8798--8807.","DOI":"10.1109\/CVPR.2018.00917"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.164"},{"key":"e_1_3_2_1_33_1","volume-title":"Texture brush for fashion inspiration transfer: A generative adversarial network with heatmap-guided semantic disentanglement","author":"Yan Han","year":"2022","unstructured":"Han Yan, Haijun Zhang, Jianyang Shi, and Jianghong Ma. 2022. Texture brush for fashion inspiration transfer: A generative adversarial network with heatmap-guided semantic disentanglement. IEEE Transactions on Circuits and Systems for Video Technology (2022)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3567596"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Lvmin Zhang and Maneesh Agrawala. 2023. Adding Conditional Control to Text-to-Image Diffusion Models. arxiv: 2302.05543 [cs.CV]","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_36_1","volume-title":"The unreasonable effectiveness of deep features as a perceptual metric","author":"Zhang Richard","unstructured":"Richard Zhang, Phillip Isola, Alexei A Efros, Eli Shechtman, and Oliver Wang. 2018. The unreasonable effectiveness of deep features as a perceptual metric. In IEEE CVPR. 586--595."},{"key":"e_1_3_2_1_37_1","unstructured":"Jun-Yan Zhu Taesung Park Phillip Isola and Alexei A Efros. 2017a. Unpaired image-to-image translation using cycle-consistent adversarial networks. In ICCV. 2223--2232."},{"key":"e_1_3_2_1_38_1","volume-title":"Toward multimodal image-to-image translation. Advances in neural information processing systems","author":"Zhu Jun-Yan","year":"2017","unstructured":"Jun-Yan Zhu, Richard Zhang, Deepak Pathak, Trevor Darrell, Alexei A Efros, Oliver Wang, and Eli Shechtman. 2017c. Toward multimodal image-to-image translation. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_39_1","unstructured":"Shizhan Zhu Raquel Urtasun Sanja Fidler Dahua Lin and Chen Change Loy. 2017b. Be your own prada: Fashion synthesis with structural coherence. In ICCV. 1680--1688."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00551"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612127","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612127","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:02:31Z","timestamp":1755820951000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612127"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":40,"alternative-id":["10.1145\/3581783.3612127","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612127","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}