{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:09:04Z","timestamp":1777655344349,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T00:00:00Z","timestamp":1728604800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFF0904900"],"award-info":[{"award-number":["2023YFF0904900"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,13]]},"DOI":"10.1145\/3654777.3676370","type":"proceedings-article","created":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T10:50:36Z","timestamp":1728643836000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["StyleFactory: Towards Better Style Alignment in Image Creation through Style-Strength-Based Control and Evaluation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8479-2844","authenticated-orcid":false,"given":"Mingxu","family":"Zhou","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6307-7692","authenticated-orcid":false,"given":"Dengming","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Software Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9625-5547","authenticated-orcid":false,"given":"Weitao","family":"You","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8665-2300","authenticated-orcid":false,"given":"Ziqi","family":"Yu","sequence":"additional","affiliation":[{"name":"College of Media and International Culture, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0165-6100","authenticated-orcid":false,"given":"Yifei","family":"Wu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6561-7808","authenticated-orcid":false,"given":"Chenghao","family":"Pan","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9792-7974","authenticated-orcid":false,"given":"Huiting","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7669-8598","authenticated-orcid":false,"given":"Tianyu","family":"Lao","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0962-6459","authenticated-orcid":false,"given":"Pei","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Namhyuk Ahn Junsoo Lee Chunggi Lee Kunhee Kim Daesik Kim Seung-Hun Nam and Kibeom Hong. 2023. DreamStyler: Paint by Style Inversion with Text-to-Image Diffusion Models. arxiv:2309.06933\u00a0[cs.CV]"},{"key":"e_1_3_2_2_2_1","unstructured":"Atlee Arts. 2024. ELEMENTS OF ART and PRINCIPLES OF DESIGN. https:\/\/atleearts.weebly.com\/uploads\/5\/0\/4\/9\/50491891\/3.elements_and_principles_of_art-descriptive_words__1_.pdf"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01762"},{"key":"e_1_3_2_2_4_1","unstructured":"Seungho Baek Hyerin Im Jiseung Ryu Juhyeong Park and Takyeon Lee. 2023. PromptCrafter: Crafting Text-to-Image Prompt through Mixed-Initiative Dialogue with LLM. arxiv:2307.08985\u00a0[cs.HC]"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606725"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501988.2502008"},{"key":"e_1_3_2_2_7_1","unstructured":"Xiang\u00a0\u2019Anthony\u2019 Chen Jeff Burke Ruofei Du Matthew\u00a0K. Hong Jennifer Jacobs Philippe Laban Dingzeyu Li Nanyun Peng Karl D.\u00a0D. Willis Chien-Sheng Wu and Bolei Zhou. 2023. Next Steps for Human-Centered Generative AI: A Technical Perspective. arxiv:2306.15774\u00a0[cs.HC]"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2617588"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606777"},{"key":"e_1_3_2_2_10_1","unstructured":"Guillaume Couairon Jakob Verbeek Holger Schwenk and Matthieu Cord. 2022. DiffEdit: Diffusion-based semantic image editing with mask guidance. arxiv:2210.11427\u00a0[cs.CV]"},{"key":"e_1_3_2_2_11_1","volume-title":"TISE: Bag of\u00a0Metrics for\u00a0Text-to-Image Synthesis Evaluation. In Computer Vision \u2013 ECCV","author":"Dinh M.","year":"2022","unstructured":"Tan\u00a0M. Dinh, Rang Nguyen, and Binh-Son Hua. 2022. TISE: Bag of\u00a0Metrics for\u00a0Text-to-Image Synthesis Evaluation. In Computer Vision \u2013 ECCV 2022, Shai Avidan, Gabriel Brostow, Moustapha Ciss\u00e9, Giovanni\u00a0Maria Farinella, and Tal Hassner (Eds.). Springer Nature Switzerland, Cham, 594\u2013609."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545638"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581226"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2023.3327168"},{"key":"e_1_3_2_2_15_1","unstructured":"Rinon Gal Yuval Alaluf Yuval Atzmon Or Patashnik Amit\u00a0H. Bermano Gal Chechik and Daniel Cohen-Or. 2022. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. arxiv:2208.01618\u00a0[cs.CV]"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.265"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00694"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2024.3408255"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/302979.303030"},{"key":"e_1_3_2_2_20_1","unstructured":"Edward\u00a0J. Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2021. LoRA: Low-Rank Adaptation of Large Language Models. arxiv:2106.09685\u00a0[cs.CL]"},{"key":"e_1_3_2_2_21_1","volume-title":"Composer: Creative and Controllable Image Synthesis with Composable Conditions. arxiv:2302.09778\u00a0[cs.CV]","author":"Huang Lianghua","year":"2023","unstructured":"Lianghua Huang, Di Chen, Yu Liu, Yujun Shen, Deli Zhao, and Jingren Zhou. 2023. Composer: Creative and Controllable Image Synthesis with Composable Conditions. arxiv:2302.09778\u00a0[cs.CV]"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642824"},{"key":"e_1_3_2_2_23_1","unstructured":"Jiaming Ji Tianyi Qiu Boyuan Chen Borong Zhang Hantao Lou Kaile Wang Yawen Duan Zhonghao He Jiayi Zhou Zhaowei Zhang Fanzhi Zeng Kwan\u00a0Yee Ng Juntao Dai Xuehai Pan Aidan O\u2019Gara Yingshan Lei Hua Xu Brian Tse Jie Fu Stephen McAleer Yaodong Yang Yizhou Wang Song-Chun Zhu Yike Guo and Wen Gao. 2024. AI Alignment: A Comprehensive Survey. arxiv:2310.19852\u00a0[cs.AI]"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581641.3584078"},{"key":"e_1_3_2_2_26_1","unstructured":"Chinmay Kulkarni Stefania Druga Minsuk Chang Alex Fiannaca Carrie Cai and Michael Terry. 2023. A Word is Worth a Thousand Pictures: Prompts as AI Design Material. arxiv:2303.12647\u00a0[cs.HC]"},{"key":"e_1_3_2_2_27_1","unstructured":"Zeqiang Lai Xizhou Zhu Jifeng Dai Yu Qiao and Wenhai Wang. 2023. Mini-DALLE3: Interactive Text to Image by Prompting Large Language Models. arxiv:2310.07653\u00a0[cs.AI]"},{"key":"e_1_3_2_2_28_1","unstructured":"Yaniv Leviathan Matan Kalman and Yossi Matias. 2023. Fast Inference from Transformers via Speculative Decoding. arxiv:2211.17192\u00a0[cs.LG]"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2009.2015077"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00655"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501825"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545621"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/258734.258887"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642462"},{"key":"e_1_3_2_2_35_1","unstructured":"Midjourney. 2024 (Accessed Apr 1 2024). Midjourney. https:\/\/www.midjourney.com."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2601097.2601110"},{"key":"e_1_3_2_2_38_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_2_39_1","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arxiv:2204.06125\u00a0[cs.CV]"},{"key":"e_1_3_2_2_40_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0139)","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. In Proceedings of the 38th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0139), Marina Meila and Tong Zhang (Eds.). PMLR, 8821\u20138831. https:\/\/proceedings.mlr.press\/v139\/ramesh21a.html"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. arxiv:2112.10752\u00a0[cs.CV]","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445296"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"crossref","unstructured":"Nataniel Ruiz Yuanzhen Li Varun Jampani Yael Pritch Michael Rubinstein and Kfir Aberman. 2023. DreamBooth: Fine Tuning Text-to-Image Diffusion Models for Subject-Driven Generation. arxiv:2208.12242\u00a0[cs.CV]","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_2_2_44_1","volume-title":"Burcu\u00a0Karagol Ayan, S.\u00a0Sara Mahdavi, Rapha\u00a0Gontijo Lopes, Tim Salimans, Jonathan Ho, David\u00a0J Fleet, and Mohammad Norouzi.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily Denton, Seyed Kamyar\u00a0Seyed Ghasemipour, Burcu\u00a0Karagol Ayan, S.\u00a0Sara Mahdavi, Rapha\u00a0Gontijo Lopes, Tim Salimans, Jonathan Ho, David\u00a0J Fleet, and Mohammad Norouzi. 2022. Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding. arxiv:2205.11487\u00a0[cs.CV]"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00844"},{"key":"e_1_3_2_2_46_1","unstructured":"Kihyuk Sohn Nataniel Ruiz Kimin Lee Daniel\u00a0Castro Chin Irina Blok Huiwen Chang Jarred Barber Lu Jiang Glenn Entis Yuanzhen Li Yuan Hao Irfan Essa Michael Rubinstein and Dilip Krishnan. 2023. StyleDrop: Text-to-Image Generation in Any Style. arxiv:2306.00983\u00a0[cs.CV]"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642847"},{"key":"e_1_3_2_2_48_1","unstructured":"Michael Terry Chinmay Kulkarni Martin Wattenberg Lucas Dixon and Meredith\u00a0Ringel Morris. 2023. AI Alignment in the Design of Interactive AI: Specification Alignment Process Alignment and Evaluation Support. arxiv:2311.00710\u00a0[cs.HC]"},{"key":"e_1_3_2_2_49_1","volume-title":"The role of interface design on prompt-mediated creativity in Generative AI. arXiv preprint arXiv:2312.00233","author":"Torricelli Maddalena","year":"2023","unstructured":"Maddalena Torricelli, Mauro Martino, Andrea Baronchelli, and Luca\u00a0Maria Aiello. 2023. The role of interface design on prompt-mediated creativity in Generative AI. arXiv preprint arXiv:2312.00233 (2023)."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397481.3450666"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591560"},{"key":"e_1_3_2_2_52_1","unstructured":"Jianyi Wang Kelvin C.\u00a0K. Chan and Chen\u00a0Change Loy. 2022. Exploring CLIP for Assessing the Look and Feel of Images. arxiv:2207.12396\u00a0[cs.CV]"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581402"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642803"},{"key":"e_1_3_2_2_55_1","volume-title":"Advances in Neural Information Processing Systems, A.\u00a0Oh, T.\u00a0Neumann, A.\u00a0Globerson, K.\u00a0Saenko, M.\u00a0Hardt, and S.\u00a0Levine (Eds.). Vol.\u00a036. Curran Associates","author":"Wen Yuxin","year":"2023","unstructured":"Yuxin Wen, Neel Jain, John Kirchenbauer, Micah Goldblum, Jonas Geiping, and Tom Goldstein. 2023. Hard Prompts Made Easy: Gradient-Based Discrete Optimization for Prompt Tuning and Discovery. In Advances in Neural Information Processing Systems, A.\u00a0Oh, T.\u00a0Neumann, A.\u00a0Globerson, K.\u00a0Saenko, M.\u00a0Hardt, and S.\u00a0Levine (Eds.). Vol.\u00a036. Curran Associates, Inc., 51008\u201351025. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/a00548031e4647b13042c97c922fadf1-Paper-Conference.pdf"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517582"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00200"},{"key":"e_1_3_2_2_58_1","volume-title":"AI Creativity and the Human-AI Co-creation Model","author":"Wu Zhuohao","unstructured":"Zhuohao Wu, Danwen Ji, Kaiwen Yu, Xianxu Zeng, Dingming Wu, and Mohammad Shidujaman. 2021. AI Creativity and the Human-AI Co-creation Model. In Human-Computer Interaction. Theory, Methods and Tools, Masaaki Kurosu (Ed.). Springer International Publishing, Cham, 171\u2013190."},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02148"},{"key":"e_1_3_2_2_60_1","unstructured":"Jiazheng Xu Xiao Liu Yuchen Wu Yuxuan Tong Qinkai Li Ming Ding Jie Tang and Yuxiao Dong. 2023. ImageReward: Learning and Evaluating Human Preferences for Text-to-Image Generation. arxiv:2304.05977\u00a0[cs.CV]"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376301"},{"key":"e_1_3_2_2_63_1","volume-title":"Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721","author":"Ye Hu","year":"2023","unstructured":"Hu Ye, Jun Zhang, Sibo Liu, Xiao Han, and Wei Yang. 2023. Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)."},{"key":"e_1_3_2_2_64_1","unstructured":"Ahmet\u00a0Burak Yildirim Vedat Baday Erkut Erdem Aykut Erdem and Aysegul Dundar. 2023. Inst-Inpaint: Instructing to Remove Objects with Diffusion Models. arxiv:2304.03246\u00a0[cs.CV]"},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586182.3615790"},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581388"},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642165"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_69_1","unstructured":"Brian\u00a0Nlong Zhao Yuhang Xiao Jiashu Xu Xinyang Jiang Yifan Yang Dongsheng Li Laurent Itti Vibhav Vineet and Yunhao Ge. 2023. DreamDistribution: Prompt Distribution Learning for Text-to-Image Diffusion Models. arxiv:2312.14216\u00a0[cs.CV]"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02154"}],"event":{"name":"UIST '24: The 37th Annual ACM Symposium on User Interface Software and Technology","location":"Pittsburgh PA USA","acronym":"UIST '24"},"container-title":["Proceedings of the 37th Annual ACM Symposium on User Interface Software and Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654777.3676370","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3654777.3676370","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,4]],"date-time":"2025-08-04T21:11:08Z","timestamp":1754341868000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654777.3676370"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,11]]},"references-count":70,"alternative-id":["10.1145\/3654777.3676370","10.1145\/3654777"],"URL":"https:\/\/doi.org\/10.1145\/3654777.3676370","relation":{},"subject":[],"published":{"date-parts":[[2024,10,11]]},"assertion":[{"value":"2024-10-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}