{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T18:10:05Z","timestamp":1755886205525,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3630106.3658940","type":"proceedings-article","created":{"date-parts":[[2024,6,5]],"date-time":"2024-06-05T09:14:21Z","timestamp":1717578861000},"page":"797-821","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Generalized People Diversity: Learning a Human Perception-Aligned Diversity Representation for People Images"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9252-7754","authenticated-orcid":false,"given":"Hansa","family":"Srinivasan","sequence":"first","affiliation":[{"name":"Google, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2313-1279","authenticated-orcid":false,"given":"Candice","family":"Schumann","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0092-8214","authenticated-orcid":false,"given":"Aradhana","family":"Sinha","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6817-8743","authenticated-orcid":false,"given":"David","family":"Madras","sequence":"additional","affiliation":[{"name":"Google, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8967-1013","authenticated-orcid":false,"given":"Gbolahan Oluwafemi","family":"Olanubi","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5917-2849","authenticated-orcid":false,"given":"Alex","family":"Beutel","sequence":"additional","affiliation":[{"name":"OpenAI, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1505-7055","authenticated-orcid":false,"given":"Susanna","family":"Ricco","sequence":"additional","affiliation":[{"name":"Google, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3359-0938","authenticated-orcid":false,"given":"Jilin","family":"Chen","sequence":"additional","affiliation":[{"name":"Google, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,5]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"A View From Somewhere: Human-Centric Face Representations. arXiv preprint arXiv:2303.17176","author":"Andrews TA","year":"2023","unstructured":"Jerone\u00a0TA Andrews, Przemyslaw Joniak, and Alice Xiang. 2023. A View From Somewhere: Human-Centric Face Representations. arXiv preprint arXiv:2303.17176 (2023)."},{"key":"e_1_3_2_2_2_1","volume-title":"The Reasonable Effectiveness of Diverse Evaluation Data. arXiv preprint arXiv:2301.09406","author":"Aroyo Lora","year":"2023","unstructured":"Lora Aroyo, Mark Diaz, Christopher Homan, Vinodkumar Prabhakaran, Alex Taylor, and Ding Wang. 2023. The Reasonable Effectiveness of Diverse Evaluation Data. arXiv preprint arXiv:2301.09406 (2023)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1177\/014920630202800304"},{"volume-title":"What Families are Seeing on TV","author":"Baruah Sabyasachee","key":"e_1_3_2_2_4_1","unstructured":"Sabyasachee Baruah, Digbalay Bose, Meredith Conroy, Shrikanth\u00a0S. Narayanan, Susanna Ricco, Komal Singh, and Krishna Somandepalli. 2022. #SeeItBeIt: What Families are Seeing on TV.The Geena Davis Institute on Gender in Media."},{"key":"e_1_3_2_2_5_1","volume-title":"Man is to computer programmer as woman is to homemaker? debiasing word embeddings. Advances in neural information processing systems 29","author":"Bolukbasi Tolga","year":"2016","unstructured":"Tolga Bolukbasi, Kai-Wei Chang, James\u00a0Y Zou, Venkatesh Saligrama, and Adam\u00a0T Kalai. 2016. Man is to computer programmer as woman is to homemaker? debiasing word embeddings. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2213556.2213580"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/290941.291025"},{"key":"e_1_3_2_2_8_1","volume-title":"International conference on machine learning. PMLR, 716\u2013725","author":"Celis Elisa","year":"2018","unstructured":"Elisa Celis, Vijay Keswani, Damian Straszak, Amit Deshpande, Tarun Kathuria, and Nisheeth Vishnoi. 2018. Fair and diverse DPP-based data summarization. In International conference on machine learning. PMLR, 716\u2013725."},{"key":"e_1_3_2_2_9_1","volume-title":"How to be fair and diverse?arXiv preprint arXiv:1610.07183","author":"Celis L\u00a0Elisa","year":"2016","unstructured":"L\u00a0Elisa Celis, Amit Deshpande, Tarun Kathuria, and Nisheeth\u00a0K Vishnoi. 2016. How to be fair and diverse?arXiv preprint arXiv:1610.07183 (2016)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3415210"},{"key":"e_1_3_2_2_11_1","volume-title":"Large scale online learning of image similarity through ranking.Journal of Machine Learning Research 11, 3","author":"Chechik Gal","year":"2010","unstructured":"Gal Chechik, Varun Sharma, Uri Shalit, and Samy Bengio. 2010. Large scale online learning of image similarity through ranking.Journal of Machine Learning Research 11, 3 (2010)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00283"},{"volume-title":"Vol.\u00a01","author":"Chopra Sumit","key":"e_1_3_2_2_13_1","unstructured":"Sumit Chopra, Raia Hadsell, and Yann LeCun. 2005. Learning a similarity metric discriminatively, with application to face verification. In 2005 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201905), Vol.\u00a01. IEEE, 539\u2013546."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682170"},{"key":"e_1_3_2_2_15_1","volume-title":"Debiasing vision-language models via biased prompts. arXiv preprint arXiv:2302.00070","author":"Chuang Ching-Yao","year":"2023","unstructured":"Ching-Yao Chuang, Varun Jampani, Yuanzhen Li, Antonio Torralba, and Stefanie Jegelka. 2023. Debiasing vision-language models via biased prompts. arXiv preprint arXiv:2302.00070 (2023)."},{"key":"e_1_3_2_2_16_1","volume-title":"International conference on machine learning. PMLR, 1436\u20131445","author":"Creager Elliot","year":"2019","unstructured":"Elliot Creager, David Madras, J\u00f6rn-Henrik Jacobsen, Marissa Weis, Kevin Swersky, Toniann Pitassi, and Richard Zemel. 2019. Flexibly fair representation learning by disentanglement. In International conference on machine learning. PMLR, 1436\u20131445."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.130"},{"key":"e_1_3_2_2_18_1","volume-title":"Drawing the diversity line: Numerical thresholds of diversity vary by group status.Journal of Personality and Social Psychology 118, 2","author":"Danbold Felix","year":"2020","unstructured":"Felix Danbold and Miguel\u00a0M Unzueta. 2020. Drawing the diversity line: Numerical thresholds of diversity vary by group status.Journal of Personality and Social Psychology 118, 2 (2020), 283."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00449"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1002\/hpja.21"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1177\/20539517211035955"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3534647"},{"key":"e_1_3_2_2_24_1","volume-title":"Censoring representations with an adversary. arXiv preprint arXiv:1511.05897","author":"Edwards Harrison","year":"2015","unstructured":"Harrison Edwards and Amos Storkey. 2015. Censoring representations with an adversary. arXiv preprint arXiv:1511.05897 (2015)."},{"key":"e_1_3_2_2_25_1","volume-title":"Fair diffusion: Instructing text-to-image generation models on fairness. arXiv preprint arXiv:2302.10893","author":"Friedrich Felix","year":"2023","unstructured":"Felix Friedrich, Patrick Schramowski, Manuel Brack, Lukas Struppek, Dominik Hintersdorf, Sasha Luccioni, and Kristian Kersting. 2023. Fair diffusion: Instructing text-to-image generation models on fairness. arXiv preprint arXiv:2302.10893 (2023)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10869-013-9290-0"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3555088"},{"volume-title":"Vol.\u00a02","author":"Hadsell Raia","key":"e_1_3_2_2_28_1","unstructured":"Raia Hadsell, Sumit Chopra, and Yann LeCun. 2006. Dimensionality reduction by learning an invariant mapping. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201906), Vol.\u00a02. IEEE, 1735\u20131742."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22145245"},{"key":"e_1_3_2_2_30_1","unstructured":"Alexander Hermans Lucas Beyer and Bastian Leibe. 2017. In Defense of the Triplet Loss for Person Re-Identification. arxiv:1703.07737\u00a0[cs.CV]"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24261-3_7"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0149-2063(03)00080-1"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.121"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3213586.3226206"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2702123.2702520"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1002\/job.698"},{"key":"e_1_3_2_2_37_1","volume-title":"Aligning text-to-image models using human feedback. arXiv preprint arXiv:2302.12192","author":"Lee Kimin","year":"2023","unstructured":"Kimin Lee, Hao Liu, Moonkyung Ryu, Olivia Watkins, Yuqing Du, Craig Boutilier, Pieter Abbeel, Mohammad Ghavamzadeh, and Shixiang\u00a0Shane Gu. 2023. Aligning text-to-image models using human feedback. arXiv preprint arXiv:2302.12192 (2023)."},{"key":"e_1_3_2_2_38_1","volume-title":"Does clip bind concepts? probing compositionality in large image models. arXiv preprint arXiv:2212.10537","author":"Lewis Martha","year":"2022","unstructured":"Martha Lewis, Qinan Yu, Jack Merullo, and Ellie Pavlick. 2022. Does clip bind concepts? probing compositionality in large image models. arXiv preprint arXiv:2212.10537 (2022)."},{"key":"e_1_3_2_2_39_1","volume-title":"What is gender, anyway: a review of the options for operationalising gender. Psychology & sexuality 12, 4","author":"Lindqvist Anna","year":"2021","unstructured":"Anna Lindqvist, Marie\u00a0Gustafsson Send\u00e9n, and Emma\u00a0A Renstr\u00f6m. 2021. What is gender, anyway: a review of the options for operationalising gender. Psychology & sexuality 12, 4 (2021), 332\u2013344."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1080\/21670811.2020.1766987"},{"key":"e_1_3_2_2_41_1","volume-title":"The variational fair autoencoder. arXiv preprint arXiv:1511.00830","author":"Louizos Christos","year":"2015","unstructured":"Christos Louizos, Kevin Swersky, Yujia Li, Max Welling, and Richard Zemel. 2015. The variational fair autoencoder. arXiv preprint arXiv:1511.00830 (2015)."},{"key":"e_1_3_2_2_42_1","volume-title":"International Conference on Machine Learning. PMLR, 3384\u20133393","author":"Madras David","year":"2018","unstructured":"David Madras, Elliot Creager, Toniann Pitassi, and Richard Zemel. 2018. Learning adversarially fair and transferable representations. In International Conference on Machine Learning. PMLR, 3384\u20133393."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00396"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2014.2359765"},{"volume-title":"Algorithms of Oppression","author":"Noble Safiya\u00a0Umoja","key":"e_1_3_2_2_45_1","unstructured":"Safiya\u00a0Umoja Noble. 2018. Algorithms of Oppression. NYU Press."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210094"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"crossref","unstructured":"Victoria\u00a0C Plaut Sapna Cheryan and Flannery\u00a0G Stevens. 2015. New frontiers in diversity research: Conceptions of diversity and their theoretical and practical implications. (2015).","DOI":"10.1037\/14341-019"},{"key":"e_1_3_2_2_48_1","volume-title":"Large image datasets: A pyrrhic win for computer vision?arXiv preprint arXiv:2006.16923","author":"Prabhu Vinay\u00a0Uday","year":"2020","unstructured":"Vinay\u00a0Uday Prabhu and Abeba Birhane. 2020. Large image datasets: A pyrrhic win for computer vision?arXiv preprint arXiv:2006.16923 (2020)."},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2699623"},{"key":"e_1_3_2_2_50_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1002\/ejsp.1892"},{"key":"e_1_3_2_2_52_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Ravfogel Shauli","year":"2022","unstructured":"Shauli Ravfogel, Michael Twiton, Yoav Goldberg, and Ryan\u00a0D Cotterell. 2022. Linear adversarial concept erasure. In International Conference on Machine Learning. PMLR, 18400\u201318421."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1108\/02610151311305614"},{"key":"e_1_3_2_2_54_1","volume-title":"Consensus and Subjectivity of Skin Tone Annotation for ML Fairness. arXiv preprint arXiv:2305.09073","author":"Schumann Candice","year":"2023","unstructured":"Candice Schumann, Gbolahan\u00a0O Olanubi, Auriel Wright, Ellis Monk\u00a0Jr, Courtney Heldreth, and Susanna Ricco. 2023. Consensus and Subjectivity of Skin Tone Annotation for ML Fairness. arXiv preprint arXiv:2305.09073 (2023)."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461702.3462594"},{"key":"e_1_3_2_2_56_1","volume-title":"No classification without representation: Assessing geodiversity issues in open data sets for the developing world. arXiv preprint arXiv:1711.08536","author":"Shankar Shreya","year":"2017","unstructured":"Shreya Shankar, Yoni Halpern, Eric Breck, James Atwood, Jimbo Wilson, and D Sculley. 2017. No classification without representation: Assessing geodiversity issues in open data sets for the developing world. arXiv preprint arXiv:1711.08536 (2017)."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594112"},{"key":"e_1_3_2_2_58_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_2_59_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 2164\u20132173","author":"Song Jiaming","year":"2019","unstructured":"Jiaming Song, Pratyusha Kalluri, Aditya Grover, Shengjia Zhao, and Stefano Ermon. 2019. Learning controllable fair representations. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 2164\u20132173."},{"key":"e_1_3_2_2_60_1","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 411\u2013419","author":"Tanjim Md\u00a0Mehrab","year":"2022","unstructured":"Md\u00a0Mehrab Tanjim, Ritwik Sinha, Krishna\u00a0Kumar Singh, Sridhar Mahadevan, David Arbour, Moumita Sinha, and Garrison\u00a0W Cottrell. 2022. Generating and controlling diversity in image search. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 411\u2013419."},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/1526709.1526756"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.180"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2012.2207397"},{"key":"e_1_3_2_2_64_1","volume-title":"The Independent Compositional Subspace Hypothesis for the Structure of CLIP\u2019s Last Layer. In ICLR 2023 Workshop on Mathematical and Empirical Understanding of Foundation Models.","author":"Wolff Max","year":"2023","unstructured":"Max Wolff, Wieland Brendel, and Stuart Wolff. 2023. The Independent Compositional Subspace Hypothesis for the Structure of CLIP\u2019s Last Layer. In ICLR 2023 Workshop on Mathematical and Empirical Understanding of Foundation Models."},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394231.3397900"},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3375709"},{"key":"e_1_3_2_2_67_1","volume-title":"Learning to rank using user clicks and visual features for image retrieval","author":"Yu Jun","year":"2014","unstructured":"Jun Yu, Dacheng Tao, Meng Wang, and Yong Rui. 2014. Learning to rank using user clicks and visual features for image retrieval. IEEE transactions on cybernetics 45, 4 (2014), 767\u2013779."},{"key":"e_1_3_2_2_68_1","volume-title":"Coca: Contrastive captioners are image-text foundation models. arXiv preprint arXiv:2205.01917","author":"Yu Jiahui","year":"2022","unstructured":"Jiahui Yu, Zirui Wang, Vijay Vasudevan, Legg Yeung, Mojtaba Seyedhosseini, and Yonghui Wu. 2022. Coca: Contrastive captioners are image-text foundation models. arXiv preprint arXiv:2205.01917 (2022)."},{"key":"e_1_3_2_2_69_1","volume-title":"Scaling autoregressive models for content-rich text-to-image generation. arXiv preprint arXiv:2206.10789 2, 3","author":"Yu Jiahui","year":"2022","unstructured":"Jiahui Yu, Yuanzhong Xu, Jing\u00a0Yu Koh, Thang Luong, Gunjan Baid, Zirui Wang, Vijay Vasudevan, Alexander Ku, Yinfei Yang, Burcu\u00a0Karagol Ayan, 2022. Scaling autoregressive models for content-rich text-to-image generation. arXiv preprint arXiv:2206.10789 2, 3 (2022), 5."},{"key":"e_1_3_2_2_70_1","volume-title":"Interpretable and Controllable Text-Guided Face Manipulation. In ACM SIGGRAPH 2023 Conference Proceedings. 1\u20139.","author":"Zhou Chenliang","year":"2023","unstructured":"Chenliang Zhou, Fangcheng Zhong, and Cengiz \u00d6ztireli. 2023. CLIP-PAE: Projection-Augmentation Embedding to Extract Relevant Features for a Disentangled, Interpretable and Controllable Text-Guided Face Manipulation. In ACM SIGGRAPH 2023 Conference Proceedings. 1\u20139."}],"event":{"name":"FAccT '24: The 2024 ACM Conference on Fairness, Accountability, and Transparency","acronym":"FAccT '24","location":"Rio de Janeiro Brazil"},"container-title":["The 2024 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658940","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3630106.3658940","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:34:22Z","timestamp":1755884062000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658940"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":70,"alternative-id":["10.1145\/3630106.3658940","10.1145\/3630106"],"URL":"https:\/\/doi.org\/10.1145\/3630106.3658940","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}