{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T12:32:51Z","timestamp":1776083571896,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Google Research Collabs program"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,29]]},"DOI":"10.1145\/3689904.3694710","type":"proceedings-article","created":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T12:19:49Z","timestamp":1729685989000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Auditing Gender Presentation Differences in Text-to-Image Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1874-2622","authenticated-orcid":false,"given":"Yanzhe","family":"Zhang","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0286-8439","authenticated-orcid":false,"given":"Lu","family":"Jiang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3419-6369","authenticated-orcid":false,"given":"Greg","family":"Turk","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1220-3983","authenticated-orcid":false,"given":"Diyi","family":"Yang","sequence":"additional","affiliation":[{"name":"Stanford University, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Guidelines for psychological practice with transgender and gender nonconforming people. American psychologist 70, 9","author":"American\u00a0Psychological Association","year":"2015","unstructured":"American\u00a0Psychological Association. 2015. Guidelines for psychological practice with transgender and gender nonconforming people. American psychologist 70, 9 (2015), 832\u2013864."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Hritik Bansal Da Yin Masoud Monajatipoor and Kai-Wei Chang. 2022. How well can Text-to-Image Generative Models understand Ethical Natural Language Interventions?arxiv:2210.15230\u00a0[cs.CL]","DOI":"10.18653\/v1\/2022.emnlp-main.88"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Federico Bianchi Pratyusha Kalluri Esin Durmus Faisal Ladhak Myra Cheng Debora Nozza Tatsunori Hashimoto Dan Jurafsky James Zou and Aylin Caliskan. 2022. Easily Accessible Text-to-Image Generation Amplifies Demographic Stereotypes at Large Scale. arxiv:2211.03759\u00a0[cs.CL]","DOI":"10.1145\/3593013.3594095"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Tim Brooks Aleksander Holynski and Alexei\u00a0A. Efros. 2022. InstructPix2Pix: Learning to Follow Image Editing Instructions. arxiv:2211.09800\u00a0[cs.CV]","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_2_5_1","volume-title":"Muse: Text-To-Image Generation via Masked Generative Transformers. arxiv:2301.00704\u00a0[cs.CV]","author":"Chang Huiwen","year":"2023","unstructured":"Huiwen Chang, Han Zhang, Jarred Barber, AJ Maschinot, Jose Lezama, Lu Jiang, Ming-Hsuan Yang, Kevin Murphy, William\u00a0T. Freeman, Michael Rubinstein, Yuanzhen Li, and Dilip Krishnan. 2023. Muse: Text-To-Image Generation via Masked Generative Transformers. arxiv:2301.00704\u00a0[cs.CV]"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Jaemin Cho Abhay Zala and Mohit Bansal. 2022. DALL-Eval: Probing the Reasoning Skills and Social Biases of Text-to-Image Generative Models. arxiv:2202.04053\u00a0[cs.CV]","DOI":"10.1109\/ICCV51070.2023.00283"},{"key":"e_1_3_2_2_7_1","unstructured":"Colin Conwell and Tomer Ullman. 2022. Testing Relational Understanding in Text-Guided Image Generation. arxiv:2208.00005\u00a0[cs.CV]"},{"key":"e_1_3_2_2_8_1","unstructured":"Ming Ding Wendi Zheng Wenyi Hong and Jie Tang. 2022. CogView2: Faster and Better Text-to-Image Generation via Hierarchical Transformers. arxiv:2204.14217\u00a0[cs.CV]"},{"key":"e_1_3_2_2_9_1","volume-title":"The AAAI-23 Workshop on Creative AI Across Modalities. https:\/\/openreview.net\/forum?id=UqvWNBQKf5","author":"Fraser C.","year":"2023","unstructured":"Kathleen\u00a0C. Fraser, Isar Nejadgholi, and Svetlana Kiritchenko. 2023. A Friendly Face: Do Text-to-Image Systems Rely on Stereotypes when the Input is Under-Specified?. In The AAAI-23 Workshop on Creative AI Across Modalities. https:\/\/openreview.net\/forum?id=UqvWNBQKf5"},{"key":"e_1_3_2_2_10_1","unstructured":"Tejas Gokhale Hamid Palangi Besmira Nushi Vibhav Vineet Eric Horvitz Ece Kamar Chitta Baral and Yezhou Yang. 2022. Benchmarking Spatial Relationships in Text-to-Image Generation. In ArXiv."},{"key":"e_1_3_2_2_11_1","volume-title":"I Can\u2019t Believe There\u2019s No Images! Learning Visual Tasks Using only Language Data. ArXiv abs\/2211.09778","author":"Gu Sophia","year":"2022","unstructured":"Sophia Gu, Christopher Clark, and Aniruddha Kembhavi. 2022. I Can\u2019t Believe There\u2019s No Images! Learning Visual Tasks Using only Language Data. ArXiv abs\/2211.09778 (2022)."},{"key":"e_1_3_2_2_12_1","unstructured":"Amir Hertz Ron Mokady Jay Tenenbaum Kfir Aberman Yael Pritch and Daniel Cohen-Or. 2022. Prompt-to-Prompt Image Editing with Cross Attention Control. arxiv:2208.01626\u00a0[cs.CV]"},{"key":"e_1_3_2_2_13_1","volume-title":"Clipscore: A reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718","author":"Hessel Jack","year":"2021","unstructured":"Jack Hessel, Ari Holtzman, Maxwell Forbes, Ronan\u00a0Le Bras, and Yejin Choi. 2021. Clipscore: A reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718 (2021)."},{"key":"e_1_3_2_2_14_1","volume-title":"Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/8a1d694707eb0fefe65871369074926d-Paper.pdf"},{"key":"e_1_3_2_2_15_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. arxiv:2006.11239\u00a0[cs.LG]"},{"key":"e_1_3_2_2_16_1","volume-title":"Imagic: Text-Based Real Image Editing with Diffusion Models. arxiv:2210.09276\u00a0[cs.CV]","author":"Kawar Bahjat","year":"2022","unstructured":"Bahjat Kawar, Shiran Zada, Oran Lang, Omer Tov, Huiwen Chang, Tali Dekel, Inbar Mosseri, and Michal Irani. 2022. Imagic: Text-Based Real Image Editing with Diffusion Models. arxiv:2210.09276\u00a0[cs.CV]"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/30.1-2.81"},{"key":"e_1_3_2_2_18_1","unstructured":"Saehoon Kim Sanghun Cho Chiheon Kim Doyup Lee and Woonhyuk Baek. 2021. minDALL-E on Conceptual Captions. https:\/\/github.com\/kakaobrain\/minDALL-E."},{"key":"e_1_3_2_2_19_1","volume-title":"DALL-E 2 Fails to Reliably Capture Common Syntactic Processes. ArXiv abs\/2210.12889","author":"Leivada Evelina","year":"2022","unstructured":"Evelina Leivada, Elliot Murphy, and Gary Marcus. 2022. DALL-E 2 Fails to Reliably Capture Common Syntactic Processes. ArXiv abs\/2210.12889 (2022)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Andreas Lugmayr Martin Danelljan Andres Romero Fisher Yu Radu Timofte and Luc\u00a0Van Gool. 2022. RePaint: Inpainting using Denoising Diffusion Probabilistic Models. arxiv:2201.09865\u00a0[cs.CV]","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1177\/0956797614541129"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/0005-2795(75)90109-9"},{"key":"e_1_3_2_2_24_1","unstructured":"Mary Norris. 2019. Female trouble: The debate over \"woman\" as an adjective. https:\/\/www.newyorker.com\/culture\/comma-queen\/female-trouble-the-debate-over-woman-as-an-adjective"},{"key":"e_1_3_2_2_25_1","volume-title":"Text-Only Training for Image Captioning using Noise-Injected CLIP. ArXiv abs\/2211.00575","author":"Nukrai David","year":"2022","unstructured":"David Nukrai, Ron Mokady, and Amir Globerson. 2022. Text-Only Training for Image Captioning using Noise-Injected CLIP. ArXiv abs\/2211.00575 (2022)."},{"key":"e_1_3_2_2_26_1","unstructured":"OpenAI. 2022. Reducing bias and improving safety in dall\u00b7e 2. https:\/\/openai.com\/blog\/reducing-bias-and-improving-safety-in-dall-e-2\/"},{"key":"e_1_3_2_2_27_1","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1).","author":"Park Dong\u00a0Huk","year":"2021","unstructured":"Dong\u00a0Huk Park, Samaneh Azadi, Xihui Liu, Trevor Darrell, and Anna Rohrbach. 2021. Benchmark for compositional text-to-image synthesis. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)."},{"key":"e_1_3_2_2_28_1","volume-title":"Scikit-learn: Machine Learning in Python. arxiv:1201.0490\u00a0[cs.LG]","author":"Pedregosa Fabian","year":"2012","unstructured":"Fabian Pedregosa, Ga\u00ebl Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Andreas M\u00fcller, Joel Nothman, Gilles Louppe, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, Jake Vanderplas, Alexandre Passos, David Cournapeau, Matthieu Brucher, Matthieu Perrot, and \u00c9douard Duchesnay. 2012. Scikit-learn: Machine Learning in Python. arxiv:1201.0490\u00a0[cs.LG]"},{"key":"e_1_3_2_2_29_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arxiv:2103.00020\u00a0[cs.CV]"},{"key":"e_1_3_2_2_30_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter\u00a0J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21, 140 (2020), 1\u201367. http:\/\/jmlr.org\/papers\/v21\/20-074.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_31_1","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arxiv:2204.06125\u00a0[cs.CV]"},{"key":"e_1_3_2_2_32_1","volume-title":"DALLE-2 is Seeing Double: Flaws in Word-to-Concept Mapping in Text2Image Models. arXiv preprint arXiv:2210.10606","author":"Rassin Royi","year":"2022","unstructured":"Royi Rassin, Shauli Ravfogel, and Yoav Goldberg. 2022. DALLE-2 is Seeing Double: Flaws in Word-to-Concept Mapping in Text2Image Models. arXiv preprint arXiv:2210.10606 (2022)."},{"key":"e_1_3_2_2_33_1","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2021. High-Resolution Image Synthesis with Latent Diffusion Models. arxiv:2112.10752\u00a0[cs.CV]"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_35_1","volume-title":"Burcu\u00a0Karagol Ayan, S.\u00a0Sara Mahdavi, Rapha\u00a0Gontijo Lopes, Tim Salimans, Jonathan Ho, David\u00a0J Fleet, and Mohammad Norouzi.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily Denton, Seyed Kamyar\u00a0Seyed Ghasemipour, Burcu\u00a0Karagol Ayan, S.\u00a0Sara Mahdavi, Rapha\u00a0Gontijo Lopes, Tim Salimans, Jonathan Ho, David\u00a0J Fleet, and Mohammad Norouzi. 2022. Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding. arxiv:2205.11487\u00a0[cs.CV]"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359246"},{"key":"e_1_3_2_2_37_1","unstructured":"Christoph Schuhmann Romain Beaumont Richard Vencu Cade Gordon Ross Wightman Mehdi Cherti Theo Coombes Aarush Katta Clayton Mullis Mitchell Wortsman Patrick Schramowski Srivatsa Kundurthy Katherine Crowson Ludwig Schmidt Robert Kaczmarczyk and Jenia Jitsev. 2022. LAION-5B: An open large-scale dataset for training next generation image-text models. arxiv:2210.08402\u00a0[cs.CV]"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461702.3462594"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","unstructured":"Robyn Speer. 2022. rspeer\/wordfreq: v3.0. https:\/\/doi.org\/10.5281\/zenodo.7199437","DOI":"10.5281\/zenodo.7199437"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"crossref","unstructured":"Robyn Speer Joshua Chin and Catherine Havasi. 2016. ConceptNet 5.5: An Open Multilingual Graph of General Knowledge. arxiv:1612.03975\u00a0[cs.CL]","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"e_1_3_2_2_41_1","volume-title":"The Biased Artist: Exploiting Cultural Biases via Homoglyphs in Text-Guided Image Generation Models. ArXiv abs\/2209.08891","author":"Struppek Lukas","year":"2022","unstructured":"Lukas Struppek, Dominik Hintersdorf, and Kristian Kersting. 2022. The Biased Artist: Exploiting Cultural Biases via Homoglyphs in Text-Guided Image Generation Models. ArXiv abs\/2209.08891 (2022)."},{"key":"e_1_3_2_2_42_1","volume-title":"Neural discrete representation learning. Advances in neural information processing systems 30","author":"Den\u00a0Oord Aaron Van","year":"2017","unstructured":"Aaron Van Den\u00a0Oord, Oriol Vinyals, 2017. Neural discrete representation learning. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_43_1","unstructured":"Wikipedia. 2022. Gender expression. https:\/\/en.wikipedia.org\/wiki\/Gender_expression"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394231.3397900"},{"key":"e_1_3_2_2_45_1","unstructured":"Jiahui Yu Yuanzhong Xu Jing\u00a0Yu Koh Thang Luong Gunjan Baid Zirui Wang Vijay Vasudevan Alexander Ku Yinfei Yang Burcu\u00a0Karagol Ayan Ben Hutchinson Wei Han Zarana Parekh Xin Li Han Zhang Jason Baldridge and Yonghui Wu. 2022. Scaling Autoregressive Models for Content-Rich Text-to-Image Generation. arxiv:2206.10789\u00a0[cs.CV]"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1515\/ijsl-2018-2016"}],"event":{"name":"EAAMO '24: Equity and Access in Algorithms, Mechanisms, and Optimization","location":"San Luis Potosi Mexico","acronym":"EAAMO '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGecom Special Interest Group on Economics and Computation"]},"container-title":["Proceedings of the 4th ACM Conference on Equity and Access in Algorithms, Mechanisms, and Optimization"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689904.3694710","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3689904.3694710","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:47:41Z","timestamp":1755913661000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3689904.3694710"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"references-count":46,"alternative-id":["10.1145\/3689904.3694710","10.1145\/3689904"],"URL":"https:\/\/doi.org\/10.1145\/3689904.3694710","relation":{},"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"2024-10-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}