{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T15:40:40Z","timestamp":1776094840305,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":79,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3630106.3658932","type":"proceedings-article","created":{"date-parts":[[2024,6,5]],"date-time":"2024-06-05T09:14:21Z","timestamp":1717578861000},"page":"660-686","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["Auditing GPT's Content Moderation Guardrails: Can ChatGPT Write Your Favorite TV Show?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-0295-3803","authenticated-orcid":false,"given":"Yaaseen","family":"Mahomed","sequence":"first","affiliation":[{"name":"University of Pennsylvania, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3955-0316","authenticated-orcid":false,"given":"Charlie M.","family":"Crawford","sequence":"additional","affiliation":[{"name":"Haverford College, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2933-304X","authenticated-orcid":false,"given":"Sanjana","family":"Gautam","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6023-1597","authenticated-orcid":false,"given":"Sorelle A.","family":"Friedler","sequence":"additional","affiliation":[{"name":"Haverford College, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9359-6090","authenticated-orcid":false,"given":"Dana\u00eb","family":"Metaxa","sequence":"additional","affiliation":[{"name":"University of Pennsylvania, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,6,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372871"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-021-00359-2"},{"key":"e_1_3_2_1_3_1","unstructured":"The\u00a0Internet Archive. 2023. The Internet Archive. archive.org"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3449148"},{"key":"e_1_3_2_1_5_1","unstructured":"Pragyan Banerjee Abhinav Java Surgan Jandial Simra Shahid Shaz Furniturewala Balaji Krishnamurthy and Sumit Bhatia. 2023. All Should Be Equal in the Eyes of Language Models: Counterfactually Aware Fair Text Generation. arxiv:2311.05451\u00a0[cs.CL]"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.148"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.2991\/ijcis.d.200519.003"},{"key":"e_1_3_2_1_8_1","unstructured":"BBC. 2023. BBC Writers Script Library. https:\/\/www.bbc.co.uk\/writers\/scripts\/"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_2_1_10_1","unstructured":"Sunay Bhat Jeffrey Jiang Omead Pooladzandi and Gregory Pottie. 2023. De-Biasing Generative Models using Counterfactual Methods. arxiv:2207.01575\u00a0[cs.LG]"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157382.3157584"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308560.3317593"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research, Vol.\u00a081)","author":"Buolamwini Joy","year":"2018","unstructured":"Joy Buolamwini and Timnit Gebru. 2018. Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification. In Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research, Vol.\u00a081), Sorelle\u00a0A. Friedler and Christo Wilson (Eds.). PMLR, 77\u201391. https:\/\/proceedings.mlr.press\/v81\/buolamwini18a.html"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aal4230"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2940716.2940798"},{"key":"e_1_3_2_1_16_1","volume-title":"Deep reinforcement learning from human preferences. Advances in neural information processing systems 30","author":"Christiano F","year":"2017","unstructured":"Paul\u00a0F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep reinforcement learning from human preferences. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_17_1","unstructured":"The Script Lab An Arts\u00a0Industry Company. 2023. The Script Lab. https:\/\/thescriptlab.com\/"},{"key":"e_1_3_2_1_18_1","first-page":"33","article-title":"Evaluating the underlying gender bias in contextualized word embeddings","volume":"2019","author":"Noe Casas Christine Basta","year":"2019","unstructured":"Christine Basta Marta\u00a0R Costa-juss and Noe Casas. 2019. Evaluating the underlying gender bias in contextualized word embeddings. GeBNLP 2019 (2019), 33.","journal-title":"GeBNLP"},{"key":"e_1_3_2_1_19_1","unstructured":"The\u00a0Movie Database. 2023. The Movie Database API. https:\/\/www.themoviedb.org\/"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v11i1.14955"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5102"},{"key":"e_1_3_2_1_22_1","unstructured":"Mandalit Del\u00a0Barco. [n. d.]. Striking Hollywood scribes ponder AI in the writer\u2019s room. NPR ([n. d.]). https:\/\/www.npr.org\/2023\/05\/18\/1176876301\/striking-hollywood-writers-contemplate-ai"},{"key":"e_1_3_2_1_23_1","volume-title":"Double standards in social media content moderation","author":"D\u00edaz \u00c1ngel","year":"2021","unstructured":"\u00c1ngel D\u00edaz and Laura Hecht-Felella. 2021. Double standards in social media content moderation. Brennan Center for Justice, https:\/\/www.brennancenter.org\/our-work\/research-reports\/double-standards-social-media-content-moderation (2021)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3278721.3278729"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.3384\/cu.2000.1525.1792163"},{"key":"e_1_3_2_1_26_1","unstructured":"Inc. Fandom. 2023. Fandom. https:\/\/www.fandom.com\/"},{"key":"e_1_3_2_1_27_1","unstructured":"Alex Freedman. 2023. TV Calling. https:\/\/www.tv-calling.com\/"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.301"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3555088"},{"key":"e_1_3_2_1_30_1","volume-title":"Ghost work: How to stop Silicon Valley from building a new global underclass","author":"Gray L","unstructured":"Mary\u00a0L Gray and Siddharth Suri. 2019. Ghost work: How to stop Silicon Valley from building a new global underclass. Harper Business."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.234"},{"key":"e_1_3_2_1_32_1","unstructured":"IMDb. 2019. Top 100 most watched tv shows of all time. https:\/\/web.archive.org\/web\/20231104142125\/https:\/\/www.imdb.com\/list\/ls095964455\/"},{"key":"e_1_3_2_1_33_1","unstructured":"Scribd Inc.2023. Scribd. https:\/\/www.scribd.com\/home"},{"key":"e_1_3_2_1_34_1","unstructured":"8FLiX Institute. 2023. 8FLiX. https:\/\/8flix.com\/"},{"key":"e_1_3_2_1_35_1","unstructured":"Jigsaw. 2024. How it Works: Using Machine Learning to Reduce Toxicity Online. https:\/\/perspectiveapi.com\/how-it-works\/"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2702123.2702520"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-3823"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v27i1.8539"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1080\/1057610X.2023.2259195"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610209"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.2018.3093"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539147"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.nuse-1.5"},{"key":"e_1_3_2_1_44_1","volume-title":"What caused the Hollywood writers","author":"Malik Yuvraj","year":"2023","unstructured":"Yuvraj Malik and Zaheer Kachwala. 2023. What caused the Hollywood writers\u2019 strike and is it over?Reuters (Sept. 2023). https:\/\/www.reuters.com\/world\/us\/is-hollywood-writers-strike-over-2023-09-25\/"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i12.26752"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594109"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3449100"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1561\/1100000083"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872427.2883062"},{"key":"e_1_3_2_1_50_1","unstructured":"OpenAI. 2023. GPT-4 System Card. https:\/\/cdn.openai.com\/papers\/gpt-4-system-card.pdf"},{"key":"e_1_3_2_1_51_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. https:\/\/arxiv.org\/abs\/2303.08774v3"},{"key":"e_1_3_2_1_52_1","unstructured":"OpenAI. 2023. OpenAI API Platform. https:\/\/platform.openai.com"},{"key":"e_1_3_2_1_53_1","unstructured":"OpenAI. 2024. Terms of Use. https:\/\/openai.com\/policies\/terms-of-use"},{"key":"e_1_3_2_1_54_1","volume-title":"Training language models to follow instructions with human feedback. Advances in neural information processing systems 35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 (2022), 27730\u201327744."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594078"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1177\/08944393211073169"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016883"},{"key":"e_1_3_2_1_58_1","volume-title":"Exclusive: OpenAI used Kenyan workers on less than $2 per hour to make ChatGPT less toxic. Time","author":"Perrigo Billy","year":"2023","unstructured":"Billy Perrigo. 2023. Exclusive: OpenAI used Kenyan workers on less than $2 per hour to make ChatGPT less toxic. Time (2023)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1080\/23268743.2022.2066566"},{"key":"e_1_3_2_1_60_1","volume-title":"Behind the Screen: Content Moderation in the Shadows of Social Media","author":"Roberts T","unstructured":"Sarah\u00a0T Roberts. 2019. Behind the Screen: Content Moderation in the Shadows of Social Media. Yale University Press."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.4"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.3390\/socsci12030148"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1163"},{"key":"e_1_3_2_1_64_1","unstructured":"The\u00a0Daily Script. 2023. The Daily Script. https:\/\/www.dailyscript.com\/index.html"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3548606.3560599"},{"key":"e_1_3_2_1_66_1","unstructured":"Script Slug. 2023. Script Slug. https:\/\/www.scriptslug.com\/"},{"key":"e_1_3_2_1_67_1","unstructured":"Spectrum. [n. d.]. TV and Movie Ratings with Descriptions. https:\/\/www.spectrum.net\/support\/tv\/tv-and-movie-ratings-descriptions"},{"key":"e_1_3_2_1_68_1","volume-title":"Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research, Vol.\u00a081)","author":"Speicher Till","year":"2018","unstructured":"Till Speicher, Muhammad Ali, Giridhari Venkatadri, Filipe\u00a0Nunes Ribeiro, George Arvanitakis, Fabr\u00edcio Benevenuto, Krishna\u00a0P. Gummadi, Patrick Loiseau, and Alan Mislove. 2018. Potential for Discrimination in Online Targeted Advertising. In Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research, Vol.\u00a081), Sorelle\u00a0A. Friedler and Christo Wilson (Eds.). PMLR, 5\u201319. https:\/\/proceedings.mlr.press\/v81\/speicher18a.html"},{"key":"e_1_3_2_1_69_1","volume-title":"Exploring Ethical Boundaries: Can ChatGPT Be Prompted to Give Advice on How to Cheat in University Assignments?","author":"Spennemann HR","year":"2023","unstructured":"Dirk\u00a0HR Spennemann. 2023. Exploring Ethical Boundaries: Can ChatGPT Be Prompted to Give Advice on How to Cheat in University Assignments? (2023)."},{"key":"e_1_3_2_1_70_1","first-page":"3008","article-title":"Learning to summarize with human feedback","volume":"33","author":"Stiennon Nisan","year":"2020","unstructured":"Nisan Stiennon, Long Ouyang, Jeffrey Wu, Daniel Ziegler, Ryan Lowe, Chelsea Voss, Alec Radford, Dario Amodei, and Paul\u00a0F Christiano. 2020. Learning to summarize with human feedback. Advances in Neural Information Processing Systems 33 (2020), 3008\u20133021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","unstructured":"Latanya Sweeney. 2013. Discrimination in Online Ad Delivery. https:\/\/doi.org\/10.2139\/ssrn.2208240","DOI":"10.2139\/ssrn.2208240"},{"key":"e_1_3_2_1_72_1","volume-title":"Breaking Bad: Unraveling Influences and Risks of User Inputs to ChatGPT for Game Story Generation","author":"Taveekitworachai Pittawat","year":"2023","unstructured":"Pittawat Taveekitworachai, Febri Abdullah, Mustafa\u00a0Can Gursesli, Mury\u00a0F. Dewantoro, Siyuan Chen, Antonio Lanata, Andrea Guazzini, and Ruck Thawonmas. 2023. Breaking Bad: Unraveling Influences and Risks of User Inputs to ChatGPT for Game Story Generation. In Interactive Storytelling, Lissa Holloway-Attaway and John\u00a0T. Murray (Eds.). Springer Nature Switzerland, Cham, 285\u2013296."},{"key":"e_1_3_2_1_73_1","volume-title":"Ethical and social risks of harm from language models. arXiv preprint arXiv:2112.04359","author":"Weidinger Laura","year":"2021","unstructured":"Laura Weidinger, John Mellor, Maribeth Rauh, Conor Griffin, Jonathan Uesato, Po-Sen Huang, Myra Cheng, Mia Glaese, Borja Balle, Atoosa Kasirzadeh, 2021. Ethical and social risks of harm from language models. arXiv preprint arXiv:2112.04359 (2021), \u00a0."},{"key":"e_1_3_2_1_74_1","unstructured":"Writers Guild of America. 2023. Writers Guild of America Calls Strike Effective Tuesday May 2. https:\/\/www.wga.org\/news-events\/news\/press\/writers-guild-of-america-calls-strike-effective-tuesday-may-2"},{"key":"e_1_3_2_1_75_1","unstructured":"TV Writing. 2023. TV Writing. https:\/\/sites.google.com\/site\/tvwriting\/home?authuser=0"},{"key":"e_1_3_2_1_76_1","volume-title":"Defending ChatGPT against jailbreak attack via self-reminders. Nature Machine Intelligence \u00a0, \u00a0","author":"Xie Yueqi","year":"2023","unstructured":"Yueqi Xie, Jingwei Yi, Jiawei Shao, Justin Curl, Lingjuan Lyu, Qifeng Chen, Xing Xie, and Fangzhao Wu. 2023. Defending ChatGPT against jailbreak attack via self-reminders. Nature Machine Intelligence \u00a0, \u00a0 (2023), 1\u201311."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368555.3384448"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1064"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1323"}],"event":{"name":"FAccT '24: The 2024 ACM Conference on Fairness, Accountability, and Transparency","location":"Rio de Janeiro Brazil","acronym":"FAccT '24"},"container-title":["The 2024 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658932","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3630106.3658932","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:32:34Z","timestamp":1755883954000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658932"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":79,"alternative-id":["10.1145\/3630106.3658932","10.1145\/3630106"],"URL":"https:\/\/doi.org\/10.1145\/3630106.3658932","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}