{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:10:00Z","timestamp":1765008600506,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1145\/3743093.3771015","type":"proceedings-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:06:16Z","timestamp":1765008376000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["STCGen:Sketch-based Text-to-Clothing Image Generation with Contour and Style Consistency"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7920-4843","authenticated-orcid":false,"given":"Fei","family":"Fang","sequence":"first","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China and Hubei Provincial Engineering Research Center for Intelligent Textile and Fashion, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9269-2310","authenticated-orcid":false,"given":"Heng","family":"Jiang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China and Hubei Provincial Engineering Research Center for Intelligent Textile and Fashion, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8007-2465","authenticated-orcid":false,"given":"Jiawen","family":"Yan","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China and Hubei Provincial Engineering Research Center for Intelligent Textile and Fashion, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4526-6297","authenticated-orcid":false,"given":"Chunxia","family":"Xiao","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1918-6939","authenticated-orcid":false,"given":"Ruhan","family":"He","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China and Hubei Provincial Engineering Research Center for Intelligent Textile and Fashion, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3329-6495","authenticated-orcid":false,"given":"Jia","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China and Hubei Provincial Engineering Research Center for Intelligent Textile and Fashion, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0487-0356","authenticated-orcid":false,"given":"Mingfu","family":"Xiong","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China and Hubei Provincial Engineering Research Center for Intelligent Textile and Fashion, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1085-7246","authenticated-orcid":false,"given":"Tao","family":"Peng","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6563-669X","authenticated-orcid":false,"given":"Xinrong","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Hong Chen Yipeng Zhang Xin Wang Xuguang Duan Yuwei Zhou and Wenwu Zhu. 2024. DisenDreamer: Subject-Driven Text-to-Image Generation with Sample-aware Disentangled Tuning. IEEE Transactions on Circuits and Systems for Video Technology (2024).","DOI":"10.1109\/TCSVT.2024.3369757"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00526"},{"key":"e_1_3_3_1_4_2","unstructured":"Shu-Yu Chen Wanchao Su Lin Gao Shihong Xia and Hongbo Fu. 2020. Deep generation of face images from sketches. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2006.01047 (2020)."},{"key":"e_1_3_3_1_5_2","unstructured":"Hongsuk Choi Isaac Kasahara Selim Engin Moritz Graule Nikhil Chavan-Dafle and Volkan Isler. 2023. FineControlNet: Fine-level Text Control for Image Generation with Spatially Aligned Text Control Injection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.09252 (2023)."},{"key":"e_1_3_3_1_6_2","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems 27 (2014)."},{"key":"e_1_3_3_1_7_2","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_1_8_2","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020) 6840\u20136851."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_6"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01465"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01592"},{"key":"e_1_3_3_1_13_2","unstructured":"Bowen Li Xiaojuan Qi Thomas Lukasiewicz and Philip Torr. 2019. Controllable text-to-image generation. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00154"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Jiadong Liang Wenjie Pei and Feng Lu. 2023. Layout-bridging text-to-image synthesis. IEEE Transactions on Circuits and Systems for Video Technology 33 12 (2023) 7438\u20137451.","DOI":"10.1109\/TCSVT.2023.3274228"},{"key":"e_1_3_3_1_17_2","unstructured":"I Loshchilov. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1711.05101 (2017)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_3_1_19_2","unstructured":"Adam Paszke Sam Gross Francisco Massa Adam Lerer James Bradbury Gregory Chanan Trevor Killeen Zeming Lin Natalia Gimelshein Luca Antiga et\u00a0al. 2019. Pytorch: An imperative style high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_1_20_2","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","volume":"3","author":"Phillip Isola","year":"2017","unstructured":"Isola Phillip, Zhu Jun-Yan, Zhou Tinghui, AEfros Alexei, et\u00a0al. 2017. Image-to-image translation with conditional adversarial networks. In Proceedings of the IEEE conference on computer vision and pattern recognition , Vol.\u00a03."},{"key":"e_1_3_3_1_21_2","unstructured":"Can Qin Shu Zhang Ning Yu Yihao Feng Xinyi Yang Yingbo Zhou Huan Wang Juan\u00a0Carlos Niebles Caiming Xiong Silvio Savarese et\u00a0al. 2023. Unicontrol: A unified diffusion model for controllable visual generation in the wild. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.11147 (2023)."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Zhen Qin Qingliang Zeng Yixin Zong and Fan Xu. 2021. Image inpainting based on deep learning: A review. Displays 69 (2021) 102028.","DOI":"10.1016\/j.displa.2021.102028"},{"key":"e_1_3_3_1_23_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_24_2","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.06125 1 2 (2022) 3."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591560"},{"key":"e_1_3_3_1_27_2","unstructured":"Qiang Wang Di Kong Fengyin Lin and Yonggang Qi. 2023. Diffsketching: Sketch control image synthesis with diffusion models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.18812 (2023)."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01379"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Zhou Wang Alan\u00a0C Bovik Hamid\u00a0R Sheikh and Eero\u00a0P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing 13 4 (2004) 600\u2013612.","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME55011.2023.00357"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00685"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01369"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGIP62525.2024.00035"},{"key":"e_1_3_3_1_38_2","unstructured":"Tianjun Zhang Yi Zhang Vibhav Vineet Neel Joshi and Xin Wang. 2023. Controllable text-to-image generation with gpt-4. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.18583 (2023)."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00978"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"Ruoyu Zhao Mingrui Zhu Shiyin Dong De Cheng Nannan Wang and Xinbo Gao. 2025. Catversion: Concatenating embeddings for diffusion-based text-to-image personalization. IEEE Transactions on Circuits and Systems for Video Technology (2025).","DOI":"10.1109\/TCSVT.2025.3531917"},{"key":"e_1_3_3_1_41_2","unstructured":"Hongsheng Zheng Wenju Xu Zhenyu Wang Xiao Lu and Chunxia Xiao. 2024. Facial Highlight Removal with Cross-Context Attention and Texture Enhancement. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"}],"event":{"name":"MMAsia '25: ACM Multimedia Asia","location":"Kuala Lumpur Malaysia","acronym":"MMAsia '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 7th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3743093.3771015","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:07:50Z","timestamp":1765008470000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3743093.3771015"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":41,"alternative-id":["10.1145\/3743093.3771015","10.1145\/3743093"],"URL":"https:\/\/doi.org\/10.1145\/3743093.3771015","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}