{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:49:00Z","timestamp":1776109740565,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":98,"publisher":"ACM","funder":[{"name":"Apple, Inc."},{"name":"Renaissance Philanthropy"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,23]]},"DOI":"10.1145\/3715275.3732137","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T17:01:18Z","timestamp":1750698078000},"page":"2025-2039","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["A Framework for Auditing Chatbots for Dialect-Based Quality-of-Service Harms"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8453-4963","authenticated-orcid":false,"given":"Emma","family":"Harvey","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6283-5546","authenticated-orcid":false,"given":"Rene F.","family":"Kizilcec","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6233-8256","authenticated-orcid":false,"given":"Allison","family":"Koenecke","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,23]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"1997. A Language Is a Dialect with an Army and a Navy. Language in Society 26 3 (1997) 469\u2013469. http:\/\/www.jstor.org\/stable\/4168793","DOI":"10.1017\/S0047404500019679"},{"key":"e_1_3_3_2_3_2","unstructured":"2025. Greenhouse Gas Emissions from a Typical Passenger Vehicle. United States Environmental Protection Agency. https:\/\/www.epa.gov\/greenvehicles\/greenhouse-gas-emissions-typical-passenger-vehicle"},{"key":"e_1_3_3_2_4_2","unstructured":"Khan Academy. 2024. Why We\u2019re Deeply Invested in Making AI Better at Math Tutoring (and What We\u2019ve Been Up to Lately). https:\/\/blog.khanacademy.org\/why-were-deeply-invested-in-making-ai-better-at-math-tutoring-and-what-weve-been-up-to-lately\/"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642703"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3659033"},{"key":"e_1_3_3_2_7_2","unstructured":"Matt Barnum. 2024. We Tested an AI Tutor for Kids. It Struggled With Basic Math. The Wall Street Journal. https:\/\/www.wsj.com\/tech\/ai\/ai-is-tutoring-students-but-still-struggles-with-basic-math-694e76d3"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Yoav Benjamini and Yosef Hochberg. 1995. Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing. Journal of the Royal Statistical Society. Series B (Methodological) 57 1 (1995) 289\u2013300. http:\/\/www.jstor.org\/stable\/2346101","DOI":"10.1111\/j.2517-6161.1995.tb02031.x"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/SaTML59370.2024.00037"},{"key":"e_1_3_3_2_11_2","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track","author":"Birhane Abeba","year":"2023","unstructured":"Abeba Birhane, vinay\u00a0uday prabhu, Sanghyun Han, Vishnu Boddeti, and Sasha Luccioni. 2023. Into the LAION\u2019s Den: Investigating Hate in Multimodal Datasets. In Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track. https:\/\/openreview.net\/forum?id=6URyQ9QhYv"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533204"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.485"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1120"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","unstructured":"Mark Bovens. 2007. Analysing and Assessing Accountability: A Conceptual Framework. European Law Journal 13 4 (2007) 447\u2013468. 10.1111\/j.1468-0386.2007.00378.x arXiv:https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1111\/j.1468-0386.2007.00378.x","DOI":"10.1111\/j.1468-0386.2007.00378.x"},{"key":"e_1_3_3_2_16_2","unstructured":"Tom\u00a0B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel\u00a0M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. http:\/\/arxiv.org\/abs\/2005.14165 arXiv:https:\/\/arXiv.org\/abs\/2005.14165 [cs]."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1002\/9780470996522.ch16"},{"key":"e_1_3_3_2_18_2","series-title":"(FAT*)","first-page":"77","volume-title":"Proceedings of the 1st Conference on Fairness, Accountability and Transparency","author":"Buolamwini Joy","year":"2018","unstructured":"Joy Buolamwini and Timnit Gebru. 2018. Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification. In Proceedings of the 1st Conference on Fairness, Accountability and Transparency(FAT*). PMLR, 77\u201391. https:\/\/proceedings.mlr.press\/v81\/buolamwini18a.html"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Wallace Chafe and Deborah Tannen. 1987. The Relation between Written and Spoken Language. Annual Review of Anthropology 16 (1987) 383\u2013407. http:\/\/www.jstor.org\/stable\/2155877","DOI":"10.1146\/annurev.an.16.100187.002123"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.84"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.669"},{"key":"e_1_3_3_2_22_2","unstructured":"Trishul Chilimbi. 2024. How We Built Rufus Amazon\u2019s AI-Powered Shopping Assistant. IEEE Spectrum. https:\/\/spectrum.ieee.org\/amazon-rufus"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1610.07524"},{"key":"e_1_3_3_2_24_2","volume-title":"Advances in Neural Information Processing Systems","author":"Christiano Paul\u00a0F","year":"2017","unstructured":"Paul\u00a0F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep Reinforcement Learning from Human Preferences. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.), Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/d5e2c0adad503c91f91df240d0cd4e49-Paper.pdf"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533213"},{"key":"e_1_3_3_2_26_2","unstructured":"Scott\u00a0J. Crichton. 2017. STATE OF LOUISIANA VERSUS WARREN DEMESME. https:\/\/www.lasc.org\/opinions\/2017\/17KK0954.sjc.addconc.pdf"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Tony Crowley. 2006. The Political Production of a Language. Journal of Linguistic Anthropology 16 1 (2006) 23\u201335. 10.1525\/jlin.2006.16.1.023 arXiv:https:\/\/anthrosource.onlinelibrary.wiley.com\/doi\/pdf\/10.1525\/jlin.2006.16.1.023","DOI":"10.1525\/jlin.2006.16.1.023"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.150"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.98"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Franz Faul Edgar Erdfelder Albert-Georg Lang and Axel Buchner. 2007. G*Power 3: a flexible statistical power analysis program for the social behavioral and biomedical sciences. Behav. Res. Methods 39 2 (May 2007) 175\u2013191.","DOI":"10.3758\/BF03193146"},{"key":"e_1_3_3_2_31_2","unstructured":"Eve Fleisig Genevieve Smith Madeline Bossi Ishita Rustagi Xavier Yin and Dan Klein. 2024. Linguistic Bias in ChatGPT: Language Models Reinforce Dialect Discrimination. http:\/\/arxiv.org\/abs\/2406.08818 arXiv:https:\/\/arXiv.org\/abs\/2406.08818 [cs]."},{"key":"e_1_3_3_2_32_2","unstructured":"Geoffrey\u00a0A. Fowler. 2024. TurboTax and H&R Block now use AI for tax advice. It\u2019s awful. The Washington Post. https:\/\/www.washingtonpost.com\/technology\/2024\/03\/04\/ai-taxes-turbotax-hrblock-chatbot\/"},{"key":"e_1_3_3_2_33_2","unstructured":"Susan Gal and Judith\u00a0T. Irvine. 1995. The Boundaries of Languages and Disciplines: How Ideologies Construct Difference. Social Research 62 4 (1995) 967\u20131001. http:\/\/www.jstor.org\/stable\/40971131"},{"key":"e_1_3_3_2_34_2","unstructured":"Isabel\u00a0O. Gallegos Ryan\u00a0A. Rossi Joe Barrow Md\u00a0Mehrab Tanjim Sungchul Kim Franck Dernoncourt Tong Yu Ruiyi Zhang and Nesreen\u00a0K. Ahmed. 2023. Bias and Fairness in Large Language Models: A Survey. http:\/\/arxiv.org\/abs\/2309.00770 arXiv:https:\/\/arXiv.org\/abs\/2309.00770 [cs]."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.301"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.473"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"Jeffrey Grogger. 2011. Speech Patterns and Racial Wage Inequality. Journal of Human Resources 46 1 (2011) 1\u201325. 10.3368\/jhr.46.1.1 arXiv:https:\/\/jhr.uwpress.org\/content\/46\/1\/1.full.pdf","DOI":"10.3368\/jhr.46.1.1"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658959"},{"key":"e_1_3_3_2_39_2","unstructured":"Kenneth\u00a0R Hammond. 1998. Ecological validity: Then and now."},{"key":"e_1_3_3_2_40_2","volume-title":"AI for Education: Bridging Innovation and Responsibility at the 38th AAAI Annual Conference on AI","author":"Harvey Emma","year":"2024","unstructured":"Emma Harvey, Allison Koenecke, and Rene\u00a0F Kizilcec. 2024. Towards an Educator-Centered Method for Measuring Bias in Large Language Model-Based Chatbot Tutors. In AI for Education: Bridging Innovation and Responsibility at the 38th AAAI Annual Conference on AI. https:\/\/openreview.net\/forum?id=REidmDpL9r"},{"key":"e_1_3_3_2_41_2","unstructured":"Emma Harvey Emily Sheng Su\u00a0Lin Blodgett Alexandra Chouldechova Jean Garcia-Gathright Alexandra Olteanu and Hanna Wallach. 2024. Gaps Between Research and Practice When Measuring Representational Harms Caused by LLM-Based Systems. arxiv:https:\/\/arXiv.org\/abs\/2411.15662\u00a0[cs.CY] https:\/\/arxiv.org\/abs\/2411.15662"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","unstructured":"Einar Haugen. 1996. Dialect Language Nation. American Anthropologist 68 4 (Aug. 1996) 922 \u2013 935. 10.1525\/aa.1966.68.4.02a00040","DOI":"10.1525\/aa.1966.68.4.02a00040"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","unstructured":"Valentin Hofmann Pratyusha\u00a0Ria Kalluri Dan Jurafsky and Sharese King. 2024. AI generates covertly racist decisions about people based on their dialect. Nature 633 8028 (01 Sep 2024) 147\u2013154. 10.1038\/s41586-024-07856-5","DOI":"10.1038\/s41586-024-07856-5"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","unstructured":"Gijs\u00a0A. Holleman Ignace T.\u00a0C. Hooge Chantal Kemner and Roy\u00a0S. Hessels. 2020. The \u2018Real-World Approach\u2019 and Its Problems: A Critique of the Term Ecological Validity. Frontiers in Psychology 11 (2020). 10.3389\/fpsyg.2020.00721","DOI":"10.3389\/fpsyg.2020.00721"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714227"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445901"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","unstructured":"David Johnson and Lewis VanBrackle. 2012. Linguistic discrimination in writing assessment: How raters react to African American \u201cerrors \u201d ESL errors and standard English errors on a state-mandated writing exam. Assessing Writing 17 1 (2012) 35\u201354. 10.1016\/j.asw.2011.10.001","DOI":"10.1016\/j.asw.2011.10.001"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Taylor Jones Jessica\u00a0Rose Kalbfeld Ryan Hancock and Robin Clark. 2019. Testifying while black: An experimental study of court reporter accuracy in transcription of African American English. Language 95 2 (2019) e216\u2013e252.","DOI":"10.1353\/lan.2019.0042"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","unstructured":"John\u00a0F. Kihlstrom. 2021. Ecological Validity and \u201cEcological Validity\u201d. Perspectives on Psychological Science 16 2 (2021) 466\u2013471. 10.1177\/1745691620966791 arXiv:10.1177\/1745691620966791 PMID: 33593121.","DOI":"10.1177\/1745691620966791"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658996"},{"key":"e_1_3_3_2_51_2","volume-title":"eWAVE","author":"Kortmann Bernd","year":"2020","unstructured":"Bernd Kortmann, Kerstin Lunkenheimer, and Katharina Ehret (Eds.). 2020. eWAVE. https:\/\/ewave-atlas.org\/"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3582269.3615599"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139023832.008"},{"key":"e_1_3_3_2_54_2","unstructured":"Colin Lecher. 2024. NYC\u2019s AI Chatbot Tells Businesses to Break the Law. The Markup. https:\/\/themarkup.org\/news\/2024\/03\/29\/nycs-ai-chatbot-tells-businesses-to-break-the-law"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","unstructured":"Tamara\u00a0G.J. Leech Amy Irby-Shasanmi and Anne\u00a0L. Mitchell. 2019. \u201cAre you accepting new patients?\u201d A pilot field experiment on telephone-based gatekeeping and Black patients\u2019 access to pediatric care. Health Services Research 54 S1 (2019) 234\u2013242. 10.1111\/1475-6773.13089 arXiv:https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1111\/1475-6773.13089","DOI":"10.1111\/1475-6773.13089"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539147"},{"key":"e_1_3_3_2_57_2","first-page":"9459","volume-title":"Advances in Neural Information Processing Systems","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 9459\u20139474. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/6b493230205f780e1bc26945df7481e5-Paper.pdf"},{"key":"e_1_3_3_2_58_2","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arxiv:https:\/\/arXiv.org\/abs\/1907.11692\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1907.11692"},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-short.24"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1145\/3715275.3732182"},{"key":"e_1_3_3_2_61_2","unstructured":"Rajiv Mehta. 2024. How customers are making more informed shopping decisions with Rufus Amazon\u2019s generative AI-powered shopping assistant. Amazon. https:\/\/www.aboutamazon.com\/news\/retail\/how-to-use-amazon-rufus"},{"key":"e_1_3_3_2_62_2","unstructured":"Rajiv Mehta and Trishul Chilimbi. 2024. Amazon announces Rufus a new generative AI-powered conversational shopping experience. Amazon. https:\/\/www.aboutamazon.com\/news\/retail\/amazon-rufus"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"publisher","unstructured":"Pepper Miller and Kristen DiCerbo. 2024. LLM Based Math Tutoring: Challenges and Dataset. 10.35542\/osf.io\/5zwv3","DOI":"10.35542\/osf.io\/5zwv3"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","unstructured":"Jakob M\u00f6kander Jonas Schuett Hannah\u00a0Rose Kirk and Luciano Floridi. 2023. Auditing large language models: a three-layered approach. AI and Ethics (May 2023). 10.1007\/s43681-023-00289-2","DOI":"10.1007\/s43681-023-00289-2"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","unstructured":"Roberto Navigli Simone Conia and Bj\u00f6rn Ross. 2023. Biases in Large Language Models: Origins Inventory and Discussion. J. Data and Information Quality 15 2 Article 10 (June 2023) 21\u00a0pages. 10.1145\/3597307","DOI":"10.1145\/3597307"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","unstructured":"Ziad Obermeyer Brian Powers Christine Vogeli and Sendhil Mullainathan. 2019. Dissecting racial bias in an algorithm used to manage the health of populations. Science 366 6464 (Oct. 2019) 447\u2013453. 10.1126\/science.aax2342","DOI":"10.1126\/science.aax2342"},{"key":"e_1_3_3_2_67_2","unstructured":"OpenAI Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat Red Avila Igor Babuschkin Suchir Balaji Valerie Balcom Paul Baltescu Haiming Bao Mohammad Bavarian Jeff Belgum Irwan Bello Jake Berdine Gabriel Bernadett-Shapiro Christopher Berner Lenny Bogdonoff Oleg Boiko Madelaine Boyd Anna-Luisa Brakman Greg Brockman Tim Brooks Miles Brundage Kevin Button Trevor Cai Rosie Campbell Andrew Cann Brittany Carey Chelsea Carlson Rory Carmichael Brooke Chan Che Chang Fotis Chantzis Derek Chen Sully Chen Ruby Chen Jason Chen Mark Chen Ben Chess Chester Cho Casey Chu Hyung\u00a0Won Chung Dave Cummings Jeremiah Currier Yunxing Dai Cory Decareaux Thomas Degry Noah Deutsch Damien Deville Arka Dhar David Dohan Steve Dowling Sheila Dunning Adrien Ecoffet Atty Eleti Tyna Eloundou David Farhi Liam Fedus Niko Felix Sim\u00f3n\u00a0Posada Fishman Juston Forte Isabella Fulford Leo Gao Elie Georges Christian Gibson Vik Goel Tarun Gogineni Gabriel Goh Rapha Gontijo-Lopes Jonathan Gordon Morgan Grafstein Scott Gray Ryan Greene Joshua Gross Shixiang\u00a0Shane Gu Yufei Guo Chris Hallacy Jesse Han Jeff Harris Yuchen He Mike Heaton Johannes Heidecke Chris Hesse Alan Hickey Wade Hickey Peter Hoeschele Brandon Houghton Kenny Hsu Shengli Hu Xin Hu Joost Huizinga Shantanu Jain Shawn Jain Joanne Jang Angela Jiang Roger Jiang Haozhun Jin Denny Jin Shino Jomoto Billie Jonn Heewoo Jun Tomer Kaftan \u0141ukasz Kaiser Ali Kamali Ingmar Kanitscheider Nitish\u00a0Shirish Keskar Tabarak Khan Logan Kilpatrick Jong\u00a0Wook Kim Christina Kim Yongjik Kim Jan\u00a0Hendrik Kirchner Jamie Kiros Matt Knight Daniel Kokotajlo \u0141ukasz Kondraciuk Andrew Kondrich Aris Konstantinidis Kyle Kosic Gretchen Krueger Vishal Kuo Michael Lampe Ikai Lan Teddy Lee Jan Leike Jade Leung Daniel Levy Chak\u00a0Ming Li Rachel Lim Molly Lin Stephanie Lin Mateusz Litwin Theresa Lopez Ryan Lowe Patricia Lue Anna Makanju Kim Malfacini Sam Manning Todor Markov Yaniv Markovski Bianca Martin Katie Mayer Andrew Mayne Bob McGrew Scott\u00a0Mayer McKinney Christine McLeavey Paul McMillan Jake McNeil David Medina Aalok Mehta Jacob Menick Luke Metz Andrey Mishchenko Pamela Mishkin Vinnie Monaco Evan Morikawa Daniel Mossing Tong Mu Mira Murati Oleg Murk David M\u00e9ly Ashvin Nair Reiichiro Nakano Rajeev Nayak Arvind Neelakantan Richard Ngo Hyeonwoo Noh Long Ouyang Cullen O\u2019Keefe Jakub Pachocki Alex Paino Joe Palermo Ashley Pantuliano Giambattista Parascandolo Joel Parish Emy Parparita Alex Passos Mikhail Pavlov Andrew Peng Adam Perelman Filipe de Avila Belbute\u00a0Peres Michael Petrov Henrique\u00a0Ponde de Oliveira\u00a0Pinto Michael Pokorny Michelle Pokrass Vitchyr\u00a0H. Pong Tolly Powell Alethea Power Boris Power Elizabeth Proehl Raul Puri Alec Radford Jack Rae Aditya Ramesh Cameron Raymond Francis Real Kendra Rimbach Carl Ross Bob Rotsted Henri Roussez Nick Ryder Mario Saltarelli Ted Sanders Shibani Santurkar Girish Sastry Heather Schmidt David Schnurr John Schulman Daniel Selsam Kyla Sheppard Toki Sherbakov Jessica Shieh Sarah Shoker Pranav Shyam Szymon Sidor Eric Sigler Maddie Simens Jordan Sitkin Katarina Slama Ian Sohl Benjamin Sokolowsky Yang Song Natalie Staudacher Felipe\u00a0Petroski Such Natalie Summers Ilya Sutskever Jie Tang Nikolas Tezak Madeleine\u00a0B. Thompson Phil Tillet Amin Tootoonchian Elizabeth Tseng Preston Tuggle Nick Turley Jerry Tworek Juan Felipe\u00a0Cer\u00f3n Uribe Andrea Vallone Arun Vijayvergiya Chelsea Voss Carroll Wainwright Justin\u00a0Jay Wang Alvin Wang Ben Wang Jonathan Ward Jason Wei CJ Weinmann Akila Welihinda Peter Welinder Jiayi Weng Lilian Weng Matt Wiethoff Dave Willner Clemens Winter Samuel Wolrich Hannah Wong Lauren Workman Sherwin Wu Jeff Wu Michael Wu Kai Xiao Tao Xu Sarah Yoo Kevin Yu Qiming Yuan Wojciech Zaremba Rowan Zellers Chong Zhang Marvin Zhang Shengjia Zhao Tianhao Zheng Juntang Zhuang William Zhuk and Barret Zoph. 2024. GPT-4 Technical Report. arxiv:https:\/\/arXiv.org\/abs\/2303.08774\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"e_1_3_3_2_68_2","unstructured":"Long Ouyang Jeff Wu Xu Jiang Diogo Almeida Carroll\u00a0L. Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray John Schulman Jacob Hilton Fraser Kelton Luke Miller Maddie Simens Amanda Askell Peter Welinder Paul Christiano Jan Leike and Ryan Lowe. 2022. Training language models to follow instructions with human feedback. arxiv:https:\/\/arXiv.org\/abs\/2203.02155\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2203.02155"},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"publisher","unstructured":"Thomas Purnell William Idsardi and John Baugh. 1999. Perceptual and Phonetic Experiments on American English Dialect Identification. Journal of Language and Social Psychology 18 1 (1999) 10\u201330. 10.1177\/0261927X99018001002 arXiv:10.1177\/0261927X99018001002","DOI":"10.1177\/0261927X99018001002"},{"key":"e_1_3_3_2_70_2","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. 2018. Improving Language Understanding by Generative Pre-Training. https:\/\/s3-us-west-2.amazonaws.com\/openai-assets\/research-covers\/language-unsupervised\/language_understanding_paper.pdf"},{"key":"e_1_3_3_2_71_2","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. https:\/\/d4mucfpksywv.cloudfront.net\/better-language-models\/language_models_are_unsupervised_multitask_learners.pdf"},{"key":"e_1_3_3_2_72_2","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21 140 (2020) 1\u201367. http:\/\/jmlr.org\/papers\/v21\/20-074.html"},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3306618.3314244"},{"key":"e_1_3_3_2_74_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533158"},{"key":"e_1_3_3_2_75_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372873"},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"publisher","DOI":"10.1145\/3514094.3534181"},{"key":"e_1_3_3_2_77_2","doi-asserted-by":"publisher","DOI":"10.1145\/3514094.3534189"},{"key":"e_1_3_3_2_78_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581407"},{"key":"e_1_3_3_2_79_2","volume-title":"\"Data and Discrimination: Converting Critical Concerns into Productive Inquiry,\u201d a preconference at the 64th Annual Meeting of the International Communication Association","author":"Sandvig Christian","year":"2014","unstructured":"Christian Sandvig, Kevin Hamilton, K. Karahalios, and C\u00e9dric Langbort. 2014. Auditing Algorithms: Research Methods for Detecting Discrimination on Internet Platforms. In \"Data and Discrimination: Converting Critical Concerns into Productive Inquiry,\u201d a preconference at the 64th Annual Meeting of the International Communication Association. Seattle, WA. https:\/\/www.semanticscholar.org\/paper\/Auditing-Algorithms-%3A-Research-Methods-for-on-Sandvig-Hamilton\/b7227cbd34766655dea10d0437ab10df3a127396"},{"key":"e_1_3_3_2_80_2","unstructured":"Sander Schulhoff Michael Ilie Nishant Balepur Konstantine Kahadze Amanda Liu Chenglei Si Yinheng Li Aayush Gupta HyoJung Han Sevien Schulhoff Pranav\u00a0Sandeep Dulepet Saurav Vidyadhara Dayeon Ki Sweta Agrawal Chau Pham Gerson Kroiz Feileen Li Hudson Tao Ashay Srivastava Hevander\u00a0Da Costa Saloni Gupta Megan\u00a0L. Rogers Inna Goncearenco Giuseppe Sarli Igor Galynker Denis Peskoff Marine Carpuat Jules White Shyamal Anadkat Alexander Hoyle and Philip Resnik. 2024. The Prompt Report: A Systematic Survey of Prompting Techniques. arxiv:https:\/\/arXiv.org\/abs\/2406.06608\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2406.06608"},{"key":"e_1_3_3_2_81_2","unstructured":"Andrew\u00a0D Selbst and Solon Barocas. 2023. Unfair Artificial Intelligence: How FTC Intervention Can Overcome the Limitations of Discrimination Law. University of Pennsylvania Law Review 171 (2023)."},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600211.3604673"},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","unstructured":"Hong Shen Alicia DeVos Motahhare Eslami and Kenneth Holstein. 2021. Everyday Algorithm Auditing: Understanding the Power of Everyday Users in Surfacing Harmful Algorithmic Behaviors. Proceedings of the ACM on Human-Computer Interaction 5 CSCW2 (Oct. 2021) 1\u201329. 10.1145\/3479577","DOI":"10.1145\/3479577"},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1339"},{"key":"e_1_3_3_2_85_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.330"},{"key":"e_1_3_3_2_86_2","volume-title":"Talkin and Testifyin: The Language of Black America","author":"Smitherman G.","year":"1986","unstructured":"G. Smitherman. 1986. Talkin and Testifyin: The Language of Black America. Wayne State University Press. https:\/\/books.google.com\/books?id=HXD7pYv80bUC"},{"key":"e_1_3_3_2_87_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.331"},{"key":"e_1_3_3_2_88_2","doi-asserted-by":"publisher","unstructured":"Latanya Sweeney. 2013. Discrimination in online ad delivery. Commun. ACM 56 5 (May 2013) 44\u201354. 10.1145\/2447976.2447990","DOI":"10.1145\/2447976.2447990"},{"key":"e_1_3_3_2_89_2","doi-asserted-by":"publisher","DOI":"10.1145\/3465416.3483294"},{"key":"e_1_3_3_2_90_2","first-page":"1324","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics","author":"Venkit Pranav\u00a0Narayanan","year":"2022","unstructured":"Pranav\u00a0Narayanan Venkit, Mukund Srinath, and Shomir Wilson. 2022. A Study of Implicit Bias in Pretrained Language Models against People with Disabilities. In Proceedings of the 29th International Conference on Computational Linguistics, Nicoletta Calzolari, Chu-Ren Huang, Hansaem Kim, James Pustejovsky, Leo Wanner, Key-Sun Choi, Pum-Mo Ryu, Hsin-Hsi Chen, Lucia Donatelli, Heng Ji, Sadao Kurohashi, Patrizia Paggio, Nianwen Xue, Seokhwan Kim, Younggyun Hahm, Zhong He, Tony\u00a0Kyungil Lee, Enrico Santus, Francis Bond, and Seung-Hoon Na (Eds.). International Committee on Computational Linguistics, Gyeongju, Republic of Korea, 1324\u20131332. https:\/\/aclanthology.org\/2022.coling-1.113\/"},{"key":"e_1_3_3_2_91_2","doi-asserted-by":"publisher","unstructured":"Angelina Wang Jamie Morgenstern and John\u00a0P. Dickerson. 2025. Large language models that replace human participants can harmfully misportray and flatten identity groups. Nature Machine Intelligence (17 Feb 2025). 10.1038\/s42256-025-00986-z","DOI":"10.1038\/s42256-025-00986-z"},{"key":"e_1_3_3_2_92_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533088"},{"key":"e_1_3_3_2_93_2","doi-asserted-by":"publisher","DOI":"10.4324\/9781315060125"},{"key":"e_1_3_3_2_94_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372833"},{"key":"e_1_3_3_2_95_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658998"},{"key":"e_1_3_3_2_96_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533194"},{"key":"e_1_3_3_2_97_2","series-title":"(ICLR Workshops)","volume-title":"ICLR 2024 Workshop on Secure and Trustworthy Large Language Models","author":"Yu Jiahao","year":"2024","unstructured":"Jiahao Yu, Yuhang Wu, Dong Shu, Mingyu Jin, Sabrina Yang, and Xinyu Xing. 2024. Assessing Prompt Injection Risks in 200+ Custom GPTs. In ICLR 2024 Workshop on Secure and Trustworthy Large Language Models(ICLR Workshops). arXiv. http:\/\/arxiv.org\/abs\/2311.11538 arXiv:https:\/\/arXiv.org\/abs\/2311.11538 [cs]."},{"key":"e_1_3_3_2_98_2","unstructured":"Daniel\u00a0M. Ziegler Nisan Stiennon Jeffrey Wu Tom\u00a0B. Brown Alec Radford Dario Amodei Paul Christiano and Geoffrey Irving. 2020. Fine-Tuning Language Models from Human Preferences. arxiv:https:\/\/arXiv.org\/abs\/1909.08593\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1909.08593"},{"key":"e_1_3_3_2_99_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.44"}],"event":{"name":"FAccT '25: The 2025 ACM Conference on Fairness, Accountability, and Transparency","location":"Athens Greece","acronym":"FAccT '25"},"container-title":["Proceedings of the 2025 ACM Conference on Fairness, Accountability, and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3715275.3732137","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T11:21:57Z","timestamp":1750764117000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3715275.3732137"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":98,"alternative-id":["10.1145\/3715275.3732137","10.1145\/3715275"],"URL":"https:\/\/doi.org\/10.1145\/3715275.3732137","relation":{},"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"2025-06-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}