{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T00:35:52Z","timestamp":1768005352911,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":98,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,18]]},"DOI":"10.1145\/3715070.3748285","type":"proceedings-article","created":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T05:55:41Z","timestamp":1760680541000},"page":"99-105","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Responsibly Training Foundation Models: Actualizing Ethical Principles for Curating Large-Scale Training Datasets in the Era of Massive AI Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6049-3965","authenticated-orcid":false,"given":"Morgan Klaus","family":"Scheuerman","sequence":"first","affiliation":[{"name":"Sony AI, Denver, Colorado, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8893-143X","authenticated-orcid":false,"given":"Dora","family":"Zhao","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8552-1213","authenticated-orcid":false,"given":"Jerone T. A.","family":"Andrews","sequence":"additional","affiliation":[{"name":"AI Ethics, Sony AI, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6319-7937","authenticated-orcid":false,"given":"Abeba","family":"Birhane","sequence":"additional","affiliation":[{"name":"Trinity College Dublin, Dublin, Ireland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4543-7196","authenticated-orcid":false,"given":"Q. Vera","family":"Liao","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, Ann Arbor, Michigan, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4408-6371","authenticated-orcid":false,"given":"Georgia","family":"Panagiotidou","sequence":"additional","affiliation":[{"name":"King's College London, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3269-2585","authenticated-orcid":false,"given":"Pooja","family":"Chitre","sequence":"additional","affiliation":[{"name":"Arizona State University, Tempe, Arizona, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2172-8100","authenticated-orcid":false,"given":"Kathleen","family":"Pine","sequence":"additional","affiliation":[{"name":"Arizona State university, Tempe, Arizona, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7052-5705","authenticated-orcid":false,"given":"Shawn","family":"Walker","sequence":"additional","affiliation":[{"name":"Arizona State University, Phoenix, Arizona, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9956-5481","authenticated-orcid":false,"given":"Jieyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7907-9353","authenticated-orcid":false,"given":"Alice","family":"Xiang","sequence":"additional","affiliation":[{"name":"Sony AI, Seattle, Washington, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,17]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"2023. EU AI Act: First Regulation on Artificial Intelligence. https:\/\/www.europarl.europa.eu\/topics\/en\/article\/20230601STO93804\/eu-ai-act-first-regulation-on-artificial-intelligence."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3593990"},{"key":"e_1_3_3_2_4_2","unstructured":"Antonio Aloisi. 2015. Commoditized Workers: Case Study Research on Labor Law Issues Arising from a Set of on-Demand\/Gig Economy Platforms. Comparative Labor Law & Policy Journal 37 (2015)."},{"key":"e_1_3_3_2_5_2","volume-title":"Thirty-Seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track","author":"Andrews Jerone","year":"2023","unstructured":"Jerone Andrews, Dora Zhao, William Thong, Apostolos Modas, Orestis Papakyriakopoulos, and Alice Xiang. 2023. Ethical Considerations for Responsible Data Curation. In Thirty-Seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track."},{"key":"e_1_3_3_2_6_2","unstructured":"Jerone\u00a0TA Andrews Dora Zhao William Thong Apostolos Modas Orestis Papakyriakopoulos Shruti Nagpal and Alice Xiang. 2023. Ethical considerations for collecting human-centric image datasets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.03629 (2023)."},{"key":"e_1_3_3_2_7_2","unstructured":"P Barger TS Behrend DJ Sharek and EF Sinar. 2011. IO and the crowd: Frequently asked questions about using Mechanical Turk for research. The Industrial-Organizational Psychologist 49 2 (2011) 11\u201317."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","unstructured":"Amirsiavosh Bashardoust Stefan Feuerriegel and Yash\u00a0Raj Shrestha. 2024. Comparing the Willingness to Share for Human-generated vs. AI-generated Fake News. Proc. ACM Hum.-Comput. Interact. 8 CSCW2 (Nov. 2024) 489:1\u2013489:21. 10.1145\/3687028","DOI":"10.1145\/3687028"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","unstructured":"Jo Bates Elli Gerakopoulou and Alessandro Checco. 2023. Addressing Labour Exploitation in the Data Science Pipeline: Views of Precarious US-based Crowdworkers on Adversarial and Co-Operative Interventions. Journal of Information Communication and Ethics in Society 21 3 (Jan. 2023) 342\u2013357. 10.1108\/JICES-08-2022-0069","DOI":"10.1108\/JICES-08-2022-0069"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658968"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00158"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00158"},{"key":"e_1_3_3_2_14_2","unstructured":"Abeba Birhane Vinay\u00a0Uday Prabhu and Emmanuel Kahembwe. 2021. Multimodal datasets: misogyny pornography and malignant stereotypes. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.01963 (2021)."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","unstructured":"Alceu Bissoto Catarina Barata Eduardo Valle and Sandra Avila. 2024. Even Small Correlation and Diversity Shifts Pose Dataset-Bias Issues. Pattern Recognition Letters 179 (March 2024) 87\u201393. 10.1016\/j.patrec.2024.01.026","DOI":"10.1016\/j.patrec.2024.01.026"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","unstructured":"Hannah Bleher and Matthias Braun. 2023. Reflections on Putting AI Ethics into Practice: How Three AI Ethics Approaches Conceptualize Theory and Practice. Science and Engineering Ethics 29 3 (May 2023) 21. 10.1007\/s11948-023-00443-3","DOI":"10.1007\/s11948-023-00443-3"},{"key":"e_1_3_3_2_17_2","volume-title":"Advances in Neural Information Processing Systems","author":"Bolukbasi Tolga","year":"2016","unstructured":"Tolga Bolukbasi, Kai-Wei Chang, James\u00a0Y Zou, Venkatesh Saligrama, and Adam\u00a0T Kalai. 2016. Man Is to Computer Programmer as Woman Is to Homemaker? Debiasing Word Embeddings. In Advances in Neural Information Processing Systems , Vol.\u00a029. Curran Associates, Inc."},{"key":"e_1_3_3_2_18_2","unstructured":"Rishi Bommasani Drew\u00a0A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael\u00a0S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill Erik Brynjolfsson S Buch Dallas Card Rodrigo Castellon Niladri\u00a0S Chatterji Annie\u00a0S Chen Kathleen\u00a0A Creel Jared Davis Dora Demszky Chris Donahue Moussa Doumbouya Esin Durmus Stefano Ermon John Etchemendy Kawin Ethayarajh Li Fei-Fei Chelsea Finn Trevor Gale Lauren\u00a0E Gillespie Karan Goel Noah\u00a0D Goodman Shelby Grossman Neel Guha Tatsunori Hashimoto Peter Henderson John Hewitt Daniel\u00a0E Ho Jenny Hong Kyle Hsu Jing Huang Thomas\u00a0F Icard Saahil Jain Dan Jurafsky Pratyusha Kalluri Siddharth Karamcheti Geoff Keeling Fereshte Khani O Khattab Pang\u00a0Wei Koh Mark\u00a0S Krass Ranjay Krishna Rohith Kuditipudi Ananya Kumar Faisal Ladhak Mina Lee Tony Lee Jure Leskovec Isabelle Levent Xiang\u00a0Lisa Li Xuechen Li Tengyu Ma Ali Malik Christopher\u00a0D Manning Suvir\u00a0P Mirchandani Eric Mitchell Zanele Munyikwa Suraj Nair Avanika Narayan Deepak Narayanan Benjamin Newman Allen Nie Juan\u00a0Carlos Niebles Hamed Nilforoshan J\u00a0F Nyarko Giray Ogut Laurel Orr Isabel Papadimitriou Joon\u00a0Sung Park Chris Piech Eva Portelance Christopher Potts Aditi Raghunathan Robert Reich Hongyu Ren Frieda Rong Yusuf\u00a0H Roohani Camilo Ruiz Jack Ryan Christopher R\u2019e Dorsa Sadigh Shiori Sagawa Keshav Santhanam Andy Shih Krishna\u00a0Parasuram Srinivasan Alex Tamkin Rohan Taori Armin\u00a0W Thomas Florian Tram\u00e8r Rose\u00a0E Wang William Wang Bohan Wu Jiajun Wu Yuhuai Wu Sang\u00a0Michael Xie Michihiro Yasunaga Jiaxuan You Matei\u00a0A Zaharia Michael Zhang Tianyi Zhang Xikun Zhang Yuhui Zhang Lucia Zheng Kaitlyn Zhou and Percy Liang. 2021. On the Opportunities and Risks of Foundation Models. ArXiv abs\/2108.0 (2021)."},{"key":"e_1_3_3_2_19_2","first-page":"77","volume-title":"Conference on fairness, accountability and transparency","author":"Buolamwini Joy","year":"2018","unstructured":"Joy Buolamwini and Timnit Gebru. 2018. Gender shades: Intersectional accuracy disparities in commercial gender classification. In Conference on fairness, accountability and transparency. PMLR, 77\u201391."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","unstructured":"Marco Cascella Jonathan Montomoli Valentina Bellini and Elena Bignami. 2023. Evaluating the Feasibility of ChatGPT in Healthcare: An Analysis of Multiple Clinical and Research Scenarios. Journal of Medical Systems 47 1 (March 2023) 33. 10.1007\/s10916-023-01925-4","DOI":"10.1007\/s10916-023-01925-4"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","unstructured":"Boyang Chen Zongxiao Wu and Ruoran Zhao. 2023. From Fiction to Fact: The Growing Role of Generative AI in Business and Finance. Journal of Chinese Economic and Business Studies 21 4 (Oct. 2023) 471\u2013496. 10.1080\/14765284.2023.2245279","DOI":"10.1080\/14765284.2023.2245279"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3675094.3678378"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Jan Clusmann Fiona\u00a0R. Kolbinger Hannah\u00a0Sophie Muti Zunamys\u00a0I. Carrero Jan-Niklas Eckardt Narmin\u00a0Ghaffari Laleh Chiara Maria\u00a0Lavinia L\u00f6ffler Sophie-Caroline Schwarzkopf Michaela Unger Gregory\u00a0P. Veldhuizen Sophia\u00a0J. Wagner and Jakob\u00a0Nikolas Kather. 2023. The Future Landscape of Large Language Models in Medicine. Communications Medicine 3 1 (Oct. 2023) 1\u20138. 10.1038\/s43856-023-00370-1","DOI":"10.1038\/s43856-023-00370-1"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","unstructured":"Nicholas\u00a0Kluge Corr\u00eaa Camila Galv\u00e3o James\u00a0William Santos Carolina Del\u00a0Pino Edson\u00a0Pontes Pinto Camila Barbosa Diogo Massmann Rodrigo Mambrini Luiza Galv\u00e3o Edmund Terem and Nythamar de Oliveira. 2023. Worldwide AI Ethics: A Review of 200 Guidelines and Recommendations for AI Governance. Patterns 4 10 (Oct. 2023) 100857. 10.1016\/j.patter.2023.100857","DOI":"10.1016\/j.patter.2023.100857"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Kate Crawford and Trevor Paglen. 2021. Excavating AI: The politics of images in machine learning training sets. Ai & Society 36 4 (2021) 1105\u20131116.","DOI":"10.1007\/s00146-021-01301-1"},{"key":"e_1_3_3_2_26_2","unstructured":"Nicolas Croce and Moh Musa. 2019. The new assembly lines: Why ai needs low-skilled workers too. https:\/\/www.weforum.org\/agenda\/2019\/08\/ai-low-skilled-workers\/"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Roxana Daneshjou Mary\u00a0P. Smith Mary\u00a0D. Sun Veronica Rotemberg and James Zou. 2021. Lack of Transparency and Potential Bias in Artificial Intelligence Data Sets and Algorithms: A Scoping Review. JAMA Dermatology 157 11 (Nov. 2021) 1362\u20131369. 10.1001\/jamadermatol.2021.3129","DOI":"10.1001\/jamadermatol.2021.3129"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_3_2_29_2","unstructured":"Devansh. 2023. Data Laundering: How Stability AI Managed to Get Millions of Copyrighted Artworks without Paying...."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3534647"},{"key":"e_1_3_3_2_31_2","unstructured":"Vittoria Elliott. 2024. AI Tools Are Secretly Training on Real Images of Children. Wired (June 2024)."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3551624.3555286"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3617694.3623223"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3571884.3603751"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3514094.3534137"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00672"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502004"},{"key":"e_1_3_3_2_38_2","volume-title":"Ghost Work: How to Stop Silicon Valley from Building a New Global Underclass","author":"Gray Mary\u00a0L.","year":"2019","unstructured":"Mary\u00a0L. Gray and Suri Siddharth. 2019. Ghost Work: How to Stop Silicon Valley from Building a New Global Underclass."},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"Ben Green. 2021. The Contestation of Tech Ethics: A Sociotechnical Approach to Technology Ethics in Practice. Journal of Social Computing 2 3 (Sept. 2021) 209\u2013225. 10.23919\/JSC.2021.0018","DOI":"10.23919\/JSC.2021.0018"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Philipp Hacker. 2024. Sustainable AI Regulation. Common Market Law Review 61 2 (April 2024).","DOI":"10.54648\/COLA2024025"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174023"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/2998181.2998248"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533184"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533184"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300830"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658911"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","unstructured":"Oana Inel Tim Draws and Lora Aroyo. 2023. Collect Measure Repeat: Reliability Factors for Responsible AI Data Collection. Proceedings of the AAAI Conference on Human Computation and Crowdsourcing 11 1 (Nov. 2023) 51\u201364. 10.1609\/hcomp.v11i1.27547","DOI":"10.1609\/hcomp.v11i1.27547"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Lilly Irani. 2015. The cultural work of microwork. New media & society 17 5 (2015) 720\u2013739.","DOI":"10.1177\/1461444813511926"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","unstructured":"Nitisha Jain Mubashara Akhtar Joan Giner-Miguelez Rajat Shinde Joaquin Vanschoren Steffen Vogler Sujata Goswami Yuhan Rao Tim Santos Luis Oala Michalis Karamousadakis Manil Maskey Pierre Marcenac Costanza Conforti Michael Kuchnik Lora Aroyo Omar Benjelloun and Elena Simperl. 2024. A Standardized Machine-readable Dataset Documentation Format for Responsible AI. 10.48550\/arXiv.2407.16883 arxiv:https:\/\/arXiv.org\/abs\/2407.16883\u00a0[cs]","DOI":"10.48550\/arXiv.2407.16883"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533097"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","unstructured":"Chenyan Jia Michelle\u00a0S. Lam Minh\u00a0Chau Mai Jeffrey\u00a0T. Hancock and Michael\u00a0S. Bernstein. 2024. Embedding Democratic Values into Social Media AIs via Societal Objective Functions. Proc. ACM Hum.-Comput. Interact. 8 CSCW1 (April 2024) 163:1\u2013163:36. 10.1145\/3641002","DOI":"10.1145\/3641002"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","unstructured":"Anna Jobin Marcello Ienca and Effy Vayena. 2019. The Global Landscape of AI Ethics Guidelines. Nature Machine Intelligence 1 9 (Sept. 2019) 389\u2013399. 10.1038\/s42256-019-0088-2","DOI":"10.1038\/s42256-019-0088-2"},{"key":"e_1_3_3_2_53_2","unstructured":"Yotam Kaplan and Ayelet Gordon-Tapiero. 2024. Generative AI Training as Unjust Enrichment."},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3584931.3608438"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"crossref","unstructured":"Michelle\u00a0S Lam Mitchell\u00a0L Gordon Dana\u00eb Metaxa Jeffrey\u00a0T Hancock James\u00a0A Landay and Michael\u00a0S Bernstein. 2022. End-user audits: A system empowering communities to lead large-scale investigations of harmful algorithmic behavior. proceedings of the ACM on Human-Computer Interaction 6 CSCW2 (2022) 1\u201334.","DOI":"10.1145\/3555625"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","unstructured":"Tina\u00a0B. Lassiter and Kenneth\u00a0R. Fleischmann. 2024. \"Something Fast and Cheap\" or \"A Core Element of Building Trust\"? - AI Auditing Professionals\u2019 Perspectives on Trust in AI. Proc. ACM Hum.-Comput. Interact. 8 CSCW2 (Nov. 2024) 424:1\u2013424:22. 10.1145\/3686963","DOI":"10.1145\/3686963"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","unstructured":"David Kuo\u00a0Chuen Lee Chong Guan Yinghui Yu and Qinxu Ding. 2024. A Comprehensive Review of Generative AI in Finance. FinTech 3 3 (Sept. 2024) 460\u2013478. 10.3390\/fintech3030025","DOI":"10.3390\/fintech3030025"},{"key":"e_1_3_3_2_58_2","unstructured":"Shayne Longpre Robert Mahari Ariel Lee Campbell Lund Hamidah Oderinwale William Brannon Nayan Saxena Naana Obeng-Marnu Tobin South Cole Hunter Christopher Klamm Hailey Schoelkopf Nikhil Singh Manuel Cherep Mustafa Anis An Dinh Caroline Chitongo Da Yin Damien Sileo Deividas Mataciunas Diganta Misra Emad Alghamdi Enrico Shippole Jianguo Zhang Joanna Materzynska Kun Qian Kush Tiwary Lester Miranda Manan Dey Minnie Liang Niklas Muennighoff Seonghyeon Ye Seungone Kim Shrestha Mohanty Vivek Sharma Vu\u00a0Minh Chien Xuhui Zhou Yizhi Li Caiming Xiong Luis Villa Stella Biderman Hanlin Li Daphne Ippolito Sara Hooker and Jad Kabbara. 2024. Consent in Crisis: The Rapid Decline of the AI Data Commons. (2024)."},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-short.24"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","unstructured":"Jabez Magomere Shu Ishida Tejumade Afonja Aya Salama Daniel Kochin Foutse Yuehgoh Imane Hamzaoui Raesetje Sefala Aisha Alaagib Elizaveta Semenova Lauren Crais and Siobhan\u00a0Mackenzie Hall. 2024. You Are What You Eat? Feeding Foundation Models a Regionally Diverse Food Dataset of World Wide Dishes. 10.48550\/arXiv.2406.09496 arxiv:https:\/\/arXiv.org\/abs\/2406.09496\u00a0[cs]","DOI":"10.48550\/arXiv.2406.09496"},{"key":"e_1_3_3_2_61_2","unstructured":"Nicolas Malev\u00e9. 2020. On the data set\u2019s ruins. AI & SOCIETY (2020) 1\u201315."},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","unstructured":"Kate McCandless. 2021. Just Because the Data Is There It Doesn\u2019t Mean It\u2019s Yours to Take : Exploring User Researcher and Review Board Perceptions in Twitter Data Research. Emerging Library & Information Perspectives 4 1 (July 2021) 34\u201361. 10.5206\/elip.v4i1.13554","DOI":"10.5206\/elip.v4i1.13554"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"publisher","unstructured":"Shira Mitchell Eric Potash Solon Barocas Alexander D\u2019Amour and Kristian Lum. 2021. Algorithmic Fairness: Choices Assumptions and Definitions. Annual Review of Statistics and Its Application 8 1 (March 2021) 141\u2013163. 10.1146\/annurev-statistics-042720-125902","DOI":"10.1146\/annurev-statistics-042720-125902"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","unstructured":"Luke Munn. 2023. The Uselessness of AI Ethics. AI and Ethics 3 3 (Aug. 2023) 869\u2013877. 10.1007\/s43681-022-00209-w","DOI":"10.1007\/s43681-022-00209-w"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600211.3604659"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3659028"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"publisher","unstructured":"Cecilia Panigutti Andrea Beretta Daniele Fadda Fosca Giannotti Dino Pedreschi Alan Perotti and Salvatore Rinzivillo. 2023. Co-Design of Human-centered Explainable AI for Clinical Decision Support. ACM Trans. Interact. Intell. Syst. 13 4 (Dec. 2023) 21:1\u201321:35. 10.1145\/3587271","DOI":"10.1145\/3587271"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594049"},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"crossref","unstructured":"Amandalynne Paullada Inioluwa\u00a0Deborah Raji Emily\u00a0M Bender Emily Denton and Alex Hanna. 2021. Data and its (dis) contents: A survey of dataset development and use in machine learning research. Patterns 2 11 (2021).","DOI":"10.1016\/j.patter.2021.100336"},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"publisher","DOI":"10.35199\/EPDE.2024.94"},{"key":"e_1_3_3_2_71_2","unstructured":"Kenny Peng Arunesh Mathur and Arvind Narayanan. 2021. Mitigating Dataset Harms Requires Stewardship: Lessons from 1000 Papers. (Aug. 2021). arxiv:https:\/\/arXiv.org\/abs\/2108.02922"},{"key":"e_1_3_3_2_72_2","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)","author":"Peng Kenneth\u00a0L","year":"2021","unstructured":"Kenneth\u00a0L Peng, Arunesh Mathur, and Arvind Narayanan. 2021. Mitigating dataset harms requires stewardship: Lessons from 1000 papers. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)."},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"crossref","unstructured":"Amit Pinchevski. 2023. Social media\u2019s canaries: content moderators between digital labor and mediated trauma. Media Culture & Society 45 1 (2023) 212\u2013221.","DOI":"10.1177\/01634437221122226"},{"key":"e_1_3_3_2_74_2","doi-asserted-by":"publisher","unstructured":"Ana\u00efs Ress\u00e9guier and Rowena Rodrigues. 2020. AI Ethics Should Not Remain Toothless! A Call to Bring Back the Teeth of Ethics. Big Data & Society 7 2 (July 2020) 2053951720942541. 10.1177\/2053951720942541","DOI":"10.1177\/2053951720942541"},{"key":"e_1_3_3_2_75_2","doi-asserted-by":"publisher","unstructured":"Kat Roemmich Shanley Corvite Cassidy Pyle Nadia Karizat and Nazanin Andalibi. 2024. Emotion AI Use in U.S. Mental Healthcare: Potentially Unjust and Techno-Solutionist. Proc. ACM Hum.-Comput. Interact. 8 CSCW1 (April 2024) 47:1\u201347:46. 10.1145\/3637324","DOI":"10.1145\/3637324"},{"key":"e_1_3_3_2_76_2","unstructured":"Niamh Rowe. 2023. \u2019It\u2019s destroyed me completely\u2019: Kenyan moderators decry toll of training of AI models. The Guardian (2023). https:\/\/www.theguardian.com\/technology\/2023\/aug\/02\/ai-chatbot-training-human-toll-content-moderator-meta-openai"},{"key":"e_1_3_3_2_77_2","doi-asserted-by":"publisher","unstructured":"Pamela Samuelson. 2023. Generative AI Meets Copyright. Science 381 6654 (July 2023) 158\u2013161. 10.1126\/science.adi0656","DOI":"10.1126\/science.adi0656"},{"key":"e_1_3_3_2_78_2","doi-asserted-by":"crossref","unstructured":"Morgan\u00a0Klaus Scheuerman Alex Hanna and Emily Denton. 2021. Do datasets have politics? Disciplinary values in computer vision dataset development. Proceedings of the ACM on Human-Computer Interaction 5 CSCW2 (2021) 1\u201337.","DOI":"10.1145\/3476058"},{"key":"e_1_3_3_2_79_2","volume-title":"Thirty-Sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track","author":"Schuhmann Christoph","year":"2022","unstructured":"Christoph Schuhmann, Romain Beaumont, Richard Vencu, Cade\u00a0W. Gordon, Ross Wightman, Mehdi Cherti, Theo Coombes, Aarush Katta, Clayton Mullis, Mitchell Wortsman, Patrick Schramowski, Srivatsa\u00a0R. Kundurthy, Katherine Crowson, Ludwig Schmidt, Robert Kaczmarczyk, and Jenia Jitsev. 2022. LAION-5B: An Open Large-Scale Dataset for Training next Generation Image-Text Models. In Thirty-Sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track."},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"publisher","unstructured":"Cathrine Seidelin Yvonne Dittrich and Erik Gr\u00f6nvall. 2020. Foregrounding Data in Co-Design \u2013 An Exploration of How Data May Become an Object of Design. International Journal of Human-Computer Studies 143 (Nov. 2020) 102505. 10.1016\/j.ijhcs.2020.102505","DOI":"10.1016\/j.ijhcs.2020.102505"},{"key":"e_1_3_3_2_81_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.295"},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"publisher","unstructured":"Luke Stark. 2023. Breaking Up (with) AI Ethics. American Literature 95 2 (June 2023) 365\u2013379. 10.1215\/00029831-10575148","DOI":"10.1215\/00029831-10575148"},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1355"},{"key":"e_1_3_3_2_84_2","unstructured":"David Thiel. 2023. Investigation Finds AI Image Generation Models Trained on Child Abuse. https:\/\/fsi.stanford.edu\/news\/investigation-finds-ai-image-generation-models-trained-child-abuse."},{"key":"e_1_3_3_2_85_2","doi-asserted-by":"crossref","unstructured":"Eddie\u00a0L. Ungless Bj\u00f6rn Ross and Anne Lauscher. 2023. Stereotypes and Smut: The (Mis)Representation of Non-cisgender Identities by Text-to-Image Models. (May 2023). arxiv:https:\/\/arXiv.org\/abs\/2305.17072","DOI":"10.18653\/v1\/2023.findings-acl.502"},{"key":"e_1_3_3_2_86_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643834.3661525"},{"key":"e_1_3_3_2_87_2","doi-asserted-by":"publisher","unstructured":"Hamish van der Ven Diego Corry Rawie Elnur Viola\u00a0Jasmine Provost and Muh Syukron. 2024. Generative AI and Social Media May Exacerbate the Climate Crisis. Global Environmental Politics 24 2 (May 2024) 9\u201318. 10.1162\/glep_a_00747","DOI":"10.1162\/glep_a_00747"},{"key":"e_1_3_3_2_88_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533088"},{"key":"e_1_3_3_2_89_2","doi-asserted-by":"publisher","unstructured":"Lauren Wilcox Robin Brewer and Fernando Diaz. 2023. AI Consent Futures: A Case Study on Voice Data Collection with Clinicians. Proc. ACM Hum.-Comput. Interact. 7 CSCW2 (Oct. 2023) 316:1\u2013316:30. 10.1145\/3610107","DOI":"10.1145\/3610107"},{"key":"e_1_3_3_2_90_2","unstructured":"Adrienne Williams Milagros Miceli and Timnit Gebru. 2022. The Exploited Labor behind Artificial Intelligence. NOEMA (2022) 1\u201311."},{"key":"e_1_3_3_2_91_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394231.3397900"},{"key":"e_1_3_3_2_92_2","unstructured":"Chloe Xiang. 2023. OpenAI and Microsoft Sued for $3 Billion Over Alleged ChatGPT \u2019Privacy Violations\u2019."},{"key":"e_1_3_3_2_93_2","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287577"},{"key":"e_1_3_3_2_94_2","doi-asserted-by":"publisher","DOI":"10.1145\/3278721.3278779"},{"key":"e_1_3_3_2_95_2","first-page":"60644","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Zhao Dora","year":"2024","unstructured":"Dora Zhao, Jerone Andrews, Orestis Papakyriakopoulos, and Alice Xiang. 2024. Position: Measure Dataset Diversity, Don\u2019t Just Claim It. In Proceedings of the 41st International Conference on Machine Learning. PMLR, 60644\u201360673."},{"key":"e_1_3_3_2_96_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2406.06407"},{"key":"e_1_3_3_2_97_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01456"},{"key":"e_1_3_3_2_98_2","doi-asserted-by":"publisher","unstructured":"Michael Zimmer. 2010. \"But the Data Is Already Public\": On the Ethics of Research in Facebook. Ethics and Information Technology 12 4 (Dec. 2010) 313\u2013325. 10.1007\/s10676-010-9227-5","DOI":"10.1007\/s10676-010-9227-5"},{"key":"e_1_3_3_2_99_2","doi-asserted-by":"crossref","unstructured":"Matthew Zook Solon Barocas Danah Boyd Kate Crawford Emily Keller Seeta\u00a0Pe\u00f1a Gangadharan Alyssa Goodman Rachelle Hollander Barbara\u00a0A Koenig Jacob Metcalf et\u00a0al. 2017. Ten simple rules for responsible big data research. e1005399\u00a0pages.","DOI":"10.1371\/journal.pcbi.1005399"}],"event":{"name":"CSCW Companion '25: Companion of the Computer-Supported Cooperative Work and Social Computing","location":"Bergen Norway","acronym":"CSCW Companion '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Companion Publication of the 2025 Conference on Computer-Supported Cooperative Work and Social Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3715070.3748285","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T17:46:56Z","timestamp":1767980816000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3715070.3748285"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,17]]},"references-count":98,"alternative-id":["10.1145\/3715070.3748285","10.1145\/3715070"],"URL":"https:\/\/doi.org\/10.1145\/3715070.3748285","relation":{},"subject":[],"published":{"date-parts":[[2025,10,17]]},"assertion":[{"value":"2025-10-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}