{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T14:50:46Z","timestamp":1777733446498,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,29]],"date-time":"2023-10-29T00:00:00Z","timestamp":1698537600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Sciences and Engineering Research Council of Canada (NSERC)","award":["Grant IRCPJ 545100 - 18"],"award-info":[{"award-number":["Grant IRCPJ 545100 - 18"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,29]]},"DOI":"10.1145\/3586183.3606725","type":"proceedings-article","created":{"date-parts":[[2023,10,20]],"date-time":"2023-10-20T20:46:22Z","timestamp":1697834782000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":156,"title":["Promptify: Text-to-Image Generation through Interactive Prompt Exploration with Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-3182-1498","authenticated-orcid":false,"given":"Stephen","family":"Brade","sequence":"first","affiliation":[{"name":"Computer Science, University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9016-038X","authenticated-orcid":false,"given":"Bryan","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1438-2882","authenticated-orcid":false,"given":"Mauricio","family":"Sousa","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8130-3569","authenticated-orcid":false,"given":"Sageev","family":"Oore","sequence":"additional","affiliation":[{"name":"Faculty of Computer Science, Dalhousie University, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0494-5373","authenticated-orcid":false,"given":"Tovi","family":"Grossman","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Toronto, Canada"}]}],"member":"320","published-online":{"date-parts":[[2023,10,29]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2022. Automatic1111 Extensions. https:\/\/github.com\/AUTOMATIC1111\/stable-diffusion-webui\/wiki\/Extensions"},{"key":"e_1_3_2_2_2_1","unstructured":"2022. Promptgen. https:\/\/github.com\/AUTOMATIC1111\/stable-diffusion-webui-promptgen"},{"key":"e_1_3_2_2_3_1","unstructured":"2023. CLIP. https:\/\/github.com\/openai\/CLIP"},{"key":"e_1_3_2_2_4_1","unstructured":"2023. Lexica. https:\/\/lexica.art\/"},{"key":"e_1_3_2_2_5_1","unstructured":"2023. MagicPrompt-Stable-Diffusion. https:\/\/huggingface.co\/spaces\/Gustavosta\/MagicPrompt-Stable-Diffusion"},{"key":"e_1_3_2_2_6_1","unstructured":"2023. Midjourney. https:\/\/www.midjourney.com\/home\/?callbackUrl=%2Fapp%2F"},{"key":"e_1_3_2_2_7_1","unstructured":"2023. OpenAI API. https:\/\/openai.com\/blog\/openai-api"},{"key":"e_1_3_2_2_8_1","unstructured":"2023. React Flow. https:\/\/reactflow.dev\/"},{"key":"e_1_3_2_2_9_1","unstructured":"2023. SentenceTransformers. https:\/\/www.sbert.net\/"},{"key":"e_1_3_2_2_10_1","unstructured":"Maneesh Agrawala. 2023. Unpredictable Black Boxes are Terrible Interfaces. https:\/\/magrawala.substack.com\/p\/unpredictable-black-boxes-are-terrible"},{"key":"e_1_3_2_2_11_1","unstructured":"automatic1111. 2022. Stable Diffusion Web UI. https:\/\/github.com\/AUTOMATIC1111\/stable-diffusion-webui."},{"key":"e_1_3_2_2_12_1","volume-title":"Natural language processing with Python: analyzing text with the natural language toolkit. \" O\u2019Reilly Media","author":"Bird Steven","unstructured":"Steven Bird, Ewan Klein, and Edward Loper. 2009. Natural language processing with Python: analyzing text with the natural language toolkit. \" O\u2019Reilly Media, Inc.\"."},{"key":"e_1_3_2_2_13_1","unstructured":"Tom\u00a0B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel\u00a0M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. arxiv:2005.14165\u00a0[cs.CL]"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501988.2502008"},{"key":"e_1_3_2_2_15_1","unstructured":"cmdr2. 2022. Easy Diffusion. https:\/\/github.com\/cmdr2\/stable-diffusion-ui."},{"key":"e_1_3_2_2_16_1","unstructured":"Danny Driess Fei Xia Mehdi S.\u00a0M. Sajjadi Corey Lynch Aakanksha Chowdhery Brian Ichter Ayzaan Wahid Jonathan Tompson Quan Vuong Tianhe Yu Wenlong Huang Yevgen Chebotar Pierre Sermanet Daniel Duckworth Sergey Levine Vincent Vanhoucke Karol Hausman Marc Toussaint Klaus Greff Andy Zeng Igor Mordatch and Pete Florence. 2023. PaLM-E: An Embodied Multimodal Language Model. In arXiv preprint arXiv:2303.03378."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545638"},{"key":"e_1_3_2_2_18_1","volume-title":"GANravel: User-Driven Direction Disentanglement in Generative Adversarial Networks. arXiv preprint arXiv:2302.00079","author":"Evirgen Noyan","year":"2023","unstructured":"Noyan Evirgen and Xiang\u2019Anthony\u2019 Chen. 2023. GANravel: User-Driven Direction Disentanglement in Generative Adversarial Networks. arXiv preprint arXiv:2302.00079 (2023)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.1136800"},{"key":"e_1_3_2_2_20_1","volume-title":"Development of NASA-TLX (Task Load Index): Results of empirical and theoretical research.[W]: PA Hancock","author":"Hart G","unstructured":"Sandra\u00a0G Hart and LE Staveland. 1988. Development of NASA-TLX (Task Load Index): Results of empirical and theoretical research.[W]: PA Hancock, N. Meshkati (Eds.): Human Mental Workload."},{"key":"e_1_3_2_2_21_1","unstructured":"Amir Hertz Ron Mokady Jay Tenenbaum Kfir Aberman Yael Pritch and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. (2022)."},{"key":"e_1_3_2_2_22_1","volume-title":"Creative Works. ArXiv abs\/2210.08477","author":"Ko Hyung-Kwon","year":"2022","unstructured":"Hyung-Kwon Ko, Gwanmo Park, Hyeon Jeon, Jaemin Jo, Juho Kim, and Jinwook Seo. 2022. Large-scale Text-to-Image Generation Models for Visual Artists\u2019 Creative Works. ArXiv abs\/2210.08477 (2022)."},{"key":"e_1_3_2_2_23_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. arxiv:2201.12086\u00a0[cs.CV]","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. arxiv:2201.12086\u00a0[cs.CV]"},{"key":"e_1_3_2_2_24_1","unstructured":"Jiachang Liu Dinghan Shen Yizhe Zhang Bill Dolan Lawrence Carin and Weizhu Chen. 2021. What Makes Good In-Context Examples for GPT-3?arxiv:2101.06804\u00a0[cs.CL]"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501825"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545621"},{"key":"e_1_3_2_2_27_1","unstructured":"Vivian Liu Jo Vermeulen George Fitzmaurice and Justin Matejka. 2022. 3DALL-E: Integrating Text-to-Image AI in 3D Design Workflows. arxiv:2210.11603\u00a0[cs.HC]"},{"key":"e_1_3_2_2_28_1","unstructured":"Elman Mansimov Emilio Parisotto Jimmy\u00a0Lei Ba and Ruslan Salakhutdinov. 2016. Generating Images from Captions with Attention. arxiv:1511.02793\u00a0[cs.LG]"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/258734.258887"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173943"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1622176.1622214"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"crossref","unstructured":"Anh Nguyen Jeff Clune Yoshua Bengio Alexey Dosovitskiy and Jason Yosinski. 2017. Plug & Play Generative Networks: Conditional Iterative Generation of Images in Latent Space. arxiv:1612.00005\u00a0[cs.CV]","DOI":"10.1109\/CVPR.2017.374"},{"key":"e_1_3_2_2_33_1","unstructured":"nolan dev. 2019. GANInterface. https:\/\/github.com\/nolan-dev\/GANInterface."},{"key":"e_1_3_2_2_34_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arxiv:2303.08774\u00a0[cs.CL]"},{"key":"e_1_3_2_2_35_1","unstructured":"Jonas Oppenlaender. 2022. A Taxonomy of Prompt Modifiers for Text-To-Image Generation. arxiv:2204.13988\u00a0[cs.MM]"},{"key":"e_1_3_2_2_36_1","unstructured":"pharmapsychotic. 2022. clip-interrogator. https:\/\/github.com\/pharmapsychotic\/clip-interrogator."},{"key":"e_1_3_2_2_37_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arxiv:2103.00020\u00a0[cs.CV]"},{"key":"e_1_3_2_2_38_1","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arxiv:2204.06125\u00a0[cs.CV]"},{"key":"e_1_3_2_2_39_1","unstructured":"Aditya Ramesh Mikhail Pavlov Gabriel Goh Scott Gray Chelsea Voss Alec Radford Mark Chen and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. arxiv:2102.12092\u00a0[cs.CV]"},{"key":"e_1_3_2_2_40_1","unstructured":"Scott Reed Zeynep Akata Santosh Mohan Samuel Tenka Bernt Schiele and Honglak Lee. 2016. Learning What and Where to Draw. arxiv:1610.02454\u00a0[cs.CV]"},{"key":"e_1_3_2_2_41_1","volume-title":"Proceedings of The 33rd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a048)","author":"Reed Scott","year":"2016","unstructured":"Scott Reed, Zeynep Akata, Xinchen Yan, Lajanugen Logeswaran, Bernt Schiele, and Honglak Lee. 2016. Generative Adversarial Text to Image Synthesis. In Proceedings of The 33rd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a048), Maria\u00a0Florina Balcan and Kilian\u00a0Q. Weinberger (Eds.). PMLR, New York, New York, USA, 1060\u20131069. https:\/\/proceedings.mlr.press\/v48\/reed16.html"},{"key":"e_1_3_2_2_42_1","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2021. High-Resolution Image Synthesis with Latent Diffusion Models. arxiv:2112.10752\u00a0[cs.CV]"},{"key":"e_1_3_2_2_43_1","volume-title":"Prompt Engineering Guide. https:\/\/github.com\/dair-ai\/Prompt-Engineering-Guide (12","author":"Saravia Elvis","year":"2022","unstructured":"Elvis Saravia. 2022. Prompt Engineering Guide. https:\/\/github.com\/dair-ai\/Prompt-Engineering-Guide (12 2022)."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2980179.2980242"},{"key":"e_1_3_2_2_45_1","volume-title":"Proceedings of the 32nd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a037)","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep Unsupervised Learning using Nonequilibrium Thermodynamics. In Proceedings of the 32nd International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a037), Francis Bach and David Blei (Eds.). PMLR, Lille, France, 2256\u20132265. https:\/\/proceedings.mlr.press\/v37\/sohl-dickstein15.html"},{"key":"e_1_3_2_2_46_1","volume-title":"The concept of discourse community. Wardle and Downs","author":"Swales John","year":"2014","unstructured":"John Swales. 2014. The concept of discourse community. Wardle and Downs (2014), 215\u201328."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/571985.571996"},{"key":"e_1_3_2_2_48_1","volume-title":"Visualizing data using t-SNE.Journal of machine learning research 9, 11","author":"Maaten Laurens Van\u00a0der","year":"2008","unstructured":"Laurens Van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE.Journal of machine learning research 9, 11 (2008)."},{"key":"e_1_3_2_2_49_1","volume-title":"RePrompt: Automatic Prompt Editing to Refine AI-Generative Art Towards Precise Expressions. arXiv preprint arXiv:2302.09466","author":"Wang Yunlong","year":"2023","unstructured":"Yunlong Wang, Shuyuan Shen, and Brian\u00a0Y Lim. 2023. RePrompt: Automatic Prompt Editing to Refine AI-Generative Art Towards Precise Expressions. arXiv preprint arXiv:2302.09466 (2023)."},{"key":"e_1_3_2_2_50_1","volume-title":"DiffusionDB: A Large-Scale Prompt Gallery Dataset for Text-to-Image Generative Models. arXiv:2210.14896 [cs]","author":"Wang J.","year":"2022","unstructured":"Zijie\u00a0J. Wang, Evan Montoya, David Munechika, Haoyang Yang, Benjamin Hoover, and Duen\u00a0Horng Chau. 2022. DiffusionDB: A Large-Scale Prompt Gallery Dataset for Text-to-Image Generative Models. arXiv:2210.14896 [cs] (2022). https:\/\/arxiv.org\/abs\/2210.14896"},{"key":"e_1_3_2_2_51_1","volume-title":"Chi, Quoc Le, and Denny Zhou","author":"Wei Jason","year":"2023","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, and Denny Zhou. 2023. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. arxiv:2201.11903\u00a0[cs.CL]"},{"key":"e_1_3_2_2_52_1","unstructured":"Yuxin Wen Neel Jain John Kirchenbauer Micah Goldblum Jonas Geiping and Tom Goldstein. 2023. Hard Prompts Made Easy: Gradient-Based Discrete Optimization for Prompt Tuning and Discovery. arxiv:2302.03668\u00a0[cs.LG]"},{"key":"e_1_3_2_2_53_1","unstructured":"Sam Witteveen and Martin Andrews. 2022. Investigating Prompt Engineering in Diffusion Models. arxiv:2211.15462\u00a0[cs.CV]"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2807442.2807448"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766908"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2702123.2702398"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"crossref","unstructured":"Han Zhang Tao Xu Hongsheng Li Shaoting Zhang Xiaogang Wang Xiaolei Huang and Dimitris Metaxas. 2017. StackGAN: Text to Photo-realistic Image Synthesis with Stacked Generative Adversarial Networks. arxiv:1612.03242\u00a0[cs.CV]","DOI":"10.1109\/ICCV.2017.629"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"crossref","unstructured":"Lvmin Zhang and Maneesh Agrawala. 2023. Adding Conditional Control to Text-to-Image Diffusion Models. arxiv:2302.05543\u00a0[cs.CV]","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_59_1","unstructured":"Tony\u00a0Z. Zhao Eric Wallace Shi Feng Dan Klein and Sameer Singh. 2021. Calibrate Before Use: Improving Few-Shot Performance of Language Models. arxiv:2102.09690\u00a0[cs.CL]"}],"event":{"name":"UIST '23: The 36th Annual ACM Symposium on User Interface Software and Technology","location":"San Francisco CA USA","acronym":"UIST '23","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 36th Annual ACM Symposium on User Interface Software and Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3586183.3606725","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3586183.3606725","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:51:42Z","timestamp":1755820302000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3586183.3606725"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,29]]},"references-count":59,"alternative-id":["10.1145\/3586183.3606725","10.1145\/3586183"],"URL":"https:\/\/doi.org\/10.1145\/3586183.3606725","relation":{},"subject":[],"published":{"date-parts":[[2023,10,29]]},"assertion":[{"value":"2023-10-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}