{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T15:32:13Z","timestamp":1773156733674,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T00:00:00Z","timestamp":1710720000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-2047297"],"award-info":[{"award-number":["IIS-2047297"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-22-1-2188"],"award-info":[{"award-number":["N00014-22-1-2188"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,18]]},"DOI":"10.1145\/3640543.3645173","type":"proceedings-article","created":{"date-parts":[[2024,4,5]],"date-time":"2024-04-05T18:23:12Z","timestamp":1712341392000},"page":"74-87","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["From Text to Pixels: Enhancing User Understanding through Text-to-Image Model Explanations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2408-3798","authenticated-orcid":false,"given":"Noyan","family":"Evirgen","sequence":"first","affiliation":[{"name":"HCI Research, UCLA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9327-3793","authenticated-orcid":false,"given":"Ruolin","family":"Wang","sequence":"additional","affiliation":[{"name":"HCI Research, UCLA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8527-1744","authenticated-orcid":false,"given":"Xiang 'Anthony","family":"Chen","sequence":"additional","affiliation":[{"name":"HCI Research, UCLA, United States"}]}],"member":"320","published-online":{"date-parts":[[2024,4,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377498"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3449287"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592116"},{"key":"e_1_3_2_1_4_1","volume-title":"X-IQE: eXplainable Image Quality Evaluation for Text-to-Image Generation with Visual Large Language Models. arXiv preprint arXiv:2305.10843","author":"Chen Yixiong","year":"2023","unstructured":"Yixiong Chen. 2023. X-IQE: eXplainable Image Quality Evaluation for Text-to-Image Generation with Visual Large Language Models. arXiv preprint arXiv:2305.10843 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545638"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581226"},{"key":"e_1_3_2_1_8_1","volume-title":"PromptMagician: Interactive Prompt Engineering for Text-to-Image Creation","author":"Feng Yingchaojie","year":"2023","unstructured":"Yingchaojie Feng, Xingbo Wang, Kam\u00a0Kwai Wong, Sijia Wang, Yuhong Lu, Minfeng Zhu, Baicheng Wang, and Wei Chen. 2023. PromptMagician: Interactive Prompt Engineering for Text-to-Image Creation. IEEE Transactions on Visualization and Computer Graphics (2023)."},{"key":"e_1_3_2_1_9_1","unstructured":"Raymond Fok and Daniel\u00a0S Weld. 2023. In Search of Verifiability: Explanations Rarely Enable Complementary Performance in AI-Advised Decision Making. arXiv preprint arXiv:2305.07722 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3449084"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577011"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580694"},{"key":"e_1_3_2_1_13_1","volume-title":"Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725","author":"Guo Yuwei","year":"2023","unstructured":"Yuwei Guo, Ceyuan Yang, Anyi Rao, Yaohui Wang, Yu Qiao, Dahua Lin, and Bo Dai. 2023. Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)."},{"key":"e_1_3_2_1_14_1","volume-title":"Optimizing prompts for text-to-image generation. arXiv preprint arXiv:2212.09611","author":"Hao Yaru","year":"2022","unstructured":"Yaru Hao, Zewen Chi, Li Dong, and Furu Wei. 2022. Optimizing prompts for text-to-image generation. arXiv preprint arXiv:2212.09611 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"The psychology of interpersonal relations","author":"Heider Fritz","unstructured":"Fritz Heider. 2013. The psychology of interpersonal relations. Psychology Press."},{"key":"e_1_3_2_1_16_1","volume-title":"Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)."},{"key":"e_1_3_2_1_17_1","volume-title":"Contextual design: defining customer-centered systems","author":"Holtzblatt Karen","unstructured":"Karen Holtzblatt and Hugh Beyer. 1997. Contextual design: defining customer-centered systems. Elsevier."},{"key":"e_1_3_2_1_18_1","volume-title":"How machine-learning recommendations influence clinician treatment selections: the example of antidepressant selection. Translational Psychiatry 11","author":"Jacobs Maia","year":"2021","unstructured":"Maia Jacobs, Melanie\u00a0F. Pradier, Thomas H.\u00a0McCoy Jr, Roy\u00a0H. Perlis, Finale Doshi-Velez, and Krzysztof\u00a0Z. Gajos. 2021. How machine-learning recommendations influence clinician treatment selections: the example of antidepressant selection. Translational Psychiatry 11 (2021)."},{"key":"e_1_3_2_1_19_1","first-page":"4211","article-title":"How can i explain this to you? an empirical study of deep neural network explanation methods","volume":"33","author":"Jeyakumar Jeya\u00a0Vikranth","year":"2020","unstructured":"Jeya\u00a0Vikranth Jeyakumar, Joseph Noor, Yu-Hsi Cheng, Luis Garcia, and Mani Srivastava. 2020. How can i explain this to you? an empirical study of deep neural network explanation methods. Advances in Neural Information Processing Systems 33 (2020), 4211\u20134222.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581641.3584078"},{"key":"e_1_3_2_1_21_1","volume-title":"Using the\" thinking-aloud\" method in cognitive interface design","author":"Lewis Clayton","unstructured":"Clayton Lewis. 1982. Using the\" thinking-aloud\" method in cognitive interface design. IBM TJ Watson Research Center Yorktown Heights, NY."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings, Part IV 16","author":"Liang Jiadong","year":"2020","unstructured":"Jiadong Liang, Wenjie Pei, and Feng Lu. 2020. Cpgan: Content-parsing generative adversarial networks for text-to-image synthesis. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IV 16. Springer, 491\u2013508."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501825"},{"key":"e_1_3_2_1_24_1","volume-title":"The structure and function of explanations. Trends in cognitive sciences 10, 10","author":"Lombrozo Tania","year":"2006","unstructured":"Tania Lombrozo. 2006. The structure and function of explanations. Trends in cognitive sciences 10, 10 (2006), 464\u2013470."},{"key":"e_1_3_2_1_25_1","volume-title":"T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453","author":"Mou Chong","year":"2023","unstructured":"Chong Mou, Xintao Wang, Liangbin Xie, Jian Zhang, Zhongang Qi, Ying Shan, and Xiaohu Qie. 2023. T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11257-017-9195-0"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3569219.3569352"},{"key":"e_1_3_2_1_28_1","volume-title":"Editing implicit assumptions in text-to-image diffusion models. arXiv preprint arXiv:2303.08084","author":"Orgad Hadas","year":"2023","unstructured":"Hadas Orgad, Bahjat Kawar, and Yonatan Belinkov. 2023. Editing implicit assumptions in text-to-image diffusion models. arXiv preprint arXiv:2303.08084 (2023)."},{"key":"e_1_3_2_1_29_1","unstructured":"Atila Orhon Michael Siracusa and Aseem Wadhwa. 2022. Stable Diffusion with Core ML on Apple Silicon. null"},{"key":"e_1_3_2_1_30_1","volume-title":"Localizing Object-level Shape Variations with Text-to-Image Diffusion Models. arXiv preprint arXiv:2303.11306","author":"Patashnik Or","year":"2023","unstructured":"Or Patashnik, Daniel Garibi, Idan Azuri, Hadar Averbuch-Elor, and Daniel Cohen-Or. 2023. Localizing Object-level Shape Variations with Text-to-Image Diffusion Models. arXiv preprint arXiv:2303.11306 (2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_1_32_1","volume-title":"International Conference on Machine Learning. PMLR, 8821\u20138831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-shot text-to-image generation. In International Conference on Machine Learning. PMLR, 8821\u20138831."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_34_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily\u00a0L Denton, Kamyar Ghasemipour, Raphael Gontijo\u00a0Lopes, Burcu Karagol\u00a0Ayan, Tim Salimans, 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems 35 (2022), 36479\u201336494.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3534639"},{"key":"e_1_3_2_1_36_1","volume-title":"The methodology of participatory design. Technical communication 52, 2","author":"Spinuzzi Clay","year":"2005","unstructured":"Clay Spinuzzi. 2005. The methodology of participatory design. Technical communication 52, 2 (2005), 163\u2013174."},{"key":"e_1_3_2_1_37_1","volume-title":"What the daam: Interpreting stable diffusion using cross attention. arXiv preprint arXiv:2210.04885","author":"Tang Raphael","year":"2022","unstructured":"Raphael Tang, Akshat Pandey, Zhiying Jiang, Gefei Yang, Karun Kumar, Jimmy Lin, and Ferhan Ture. 2022. What the daam: Interpreting stable diffusion using cross attention. arXiv preprint arXiv:2210.04885 (2022)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1080\/14626268.2023.2174557"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300831"},{"key":"e_1_3_2_1_40_1","volume-title":"Medclip: Contrastive learning from unpaired medical images and text. arXiv preprint arXiv:2210.10163","author":"Wang Zifeng","year":"2022","unstructured":"Zifeng Wang, Zhenbang Wu, Dinesh Agarwal, and Jimeng Sun. 2022. Medclip: Contrastive learning from unpaired medical images and text. arXiv preprint arXiv:2210.10163 (2022)."},{"key":"e_1_3_2_1_41_1","volume-title":"Diffusiondb: A large-scale prompt gallery dataset for text-to-image generative models. arXiv preprint arXiv:2210.14896","author":"Wang J","year":"2022","unstructured":"Zijie\u00a0J Wang, Evan Montoya, David Munechika, Haoyang Yang, Benjamin Hoover, and Duen\u00a0Horng Chau. 2022. Diffusiondb: A large-scale prompt gallery dataset for text-to-image generative models. arXiv preprint arXiv:2210.14896 (2022)."},{"key":"e_1_3_2_1_42_1","volume-title":"Better aligning text-to-image models with human preference. arXiv preprint arXiv:2303.14420","author":"Wu Xiaoshi","year":"2023","unstructured":"Xiaoshi Wu, Keqiang Sun, Feng Zhu, Rui Zhao, and Hongsheng Li. 2023. Better aligning text-to-image models with human preference. arXiv preprint arXiv:2303.14420 (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"Imagereward: Learning and evaluating human preferences for text-to-image generation. arXiv preprint arXiv:2304.05977","author":"Xu Jiazheng","year":"2023","unstructured":"Jiazheng Xu, Xiao Liu, Yuchen Wu, Yuxuan Tong, Qinkai Li, Ming Ding, Jie Tang, and Yuxiao Dong. 2023. Imagereward: Learning and evaluating human preferences for text-to-image generation. arXiv preprint arXiv:2304.05977 (2023)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_4"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"}],"event":{"name":"IUI '24: 29th International Conference on Intelligent User Interfaces","location":"Greenville SC USA","acronym":"IUI '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 29th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640543.3645173","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640543.3645173","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:57:02Z","timestamp":1764550622000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640543.3645173"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,18]]},"references-count":45,"alternative-id":["10.1145\/3640543.3645173","10.1145\/3640543"],"URL":"https:\/\/doi.org\/10.1145\/3640543.3645173","relation":{},"subject":[],"published":{"date-parts":[[2024,3,18]]},"assertion":[{"value":"2024-04-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}