{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T21:48:55Z","timestamp":1776116935122,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":103,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,23]]},"DOI":"10.1145\/3715275.3732038","type":"proceedings-article","created":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T13:03:13Z","timestamp":1750683793000},"page":"573-598","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Position is Power: System Prompts as a Mechanism of Bias in Large Language Models (LLMs)"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9672-8087","authenticated-orcid":false,"given":"Anna","family":"Neumann","sequence":"first","affiliation":[{"name":"University of Duisburg-Essen, Research Center Trust, UA Ruhr, Duisburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0680-8916","authenticated-orcid":false,"given":"Elisabeth","family":"Kirsten","sequence":"additional","affiliation":[{"name":"Ruhr University Bochum, Research Center Trust, UA Ruhr, Bochum, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8347-7813","authenticated-orcid":false,"given":"Muhammad Bilal","family":"Zafar","sequence":"additional","affiliation":[{"name":"Ruhr University Bochum, Research Center Trust, UA Ruhr, Bochum, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5102-6564","authenticated-orcid":false,"given":"Jatinder","family":"Singh","sequence":"additional","affiliation":[{"name":"University of Duisburg-Essen, Research Center Trust, UA Ruhr, Duisburg, Germany and University of Cambridge, Cambridge, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2025,6,23]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"2024. GPT-4o System Card. https:\/\/openai.com\/index\/gpt-4o-system-card\/"},{"key":"e_1_3_3_3_3_2","unstructured":"2024. Introducing the next generation of Claude. https:\/\/www.anthropic.com\/news\/claude-3-family"},{"key":"e_1_3_3_3_4_2","unstructured":"2024. Memory and new controls for ChatGPT. https:\/\/openai.com\/index\/memory-and-new-controls-for-chatgpt\/"},{"key":"e_1_3_3_3_5_2","unstructured":"2024. Model Spec (2024\/05\/08). https:\/\/cdn.openai.com\/spec\/model-spec-2024-05-08.html\/#follow-the-chain-of-command"},{"key":"e_1_3_3_3_6_2","unstructured":"2025. Gemini API. https:\/\/ai.google.dev\/gemini-api\/docs"},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"publisher","unstructured":"Mohsen Abbasi Sorelle\u00a0A. Friedler C. Scheidegger and Suresh Venkatasubramanian. 2019. Fairness in representation: quantifying stereotyping as a representational harm. (2019) 801\u2013809. https:\/\/doi.org\/10.1137\/1.9781611975673.90","DOI":"10.1137\/1.9781611975673.90"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.1093\/oxfordhb\/9780197579329.013.65"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","unstructured":"Amith Ananthram Elias Stengel-Eskin Carl Vondrick Mohit Bansal and Kathleen McKeown. 2024. See It from My Perspective: Diagnosing the Western Cultural Bias of Large Vision-Language Models in Image Understanding. https:\/\/doi.org\/10.48550\/arXiv.2406.11665","DOI":"10.48550\/arXiv.2406.11665"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","unstructured":"Sodiq\u00a0Odetunde Babatunde Opeyemi\u00a0Abayomi Odejide Tolulope\u00a0Esther Edunjobi and Damilola\u00a0Oluwaseun Ogundipe. 2024. THE ROLE OF AI IN MARKETING PERSONALIZATION: A THEORETICAL EXPLORATION OF CONSUMER ENGAGEMENT STRATEGIES. International Journal of Management & Entrepreneurship Research 6 3 (March 2024) 936\u2013949. https:\/\/doi.org\/10.51594\/ijmer.v6i3.964","DOI":"10.51594\/ijmer.v6i3.964"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713787"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.148"},{"key":"e_1_3_3_3_13_2","volume-title":"9th Annual conference of the special interest group for computing, information and society","author":"Barocas Solon","year":"2017","unstructured":"Solon Barocas, Kate Crawford, Aaron Shapiro, and Hanna Wallach. 2017. The problem with bias: Allocative versus representational harms in machine learning. In 9th Annual conference of the special interest group for computing, information and society. New York, NY."},{"key":"e_1_3_3_3_14_2","volume-title":"Fairness and Machine Learning: Limitations and Opportunities","author":"Barocas Solon","year":"2023","unstructured":"Solon Barocas, Moritz Hardt, and Arvind Narayanan. 2023. Fairness and Machine Learning: Limitations and Opportunities. MIT Press."},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","unstructured":"Solon Barocas and Andrew\u00a0D. Selbst. 2016. Big Data\u2019s Disparate Impact. https:\/\/doi.org\/10.2139\/ssrn.2477899","DOI":"10.2139\/ssrn.2477899"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","unstructured":"Rick Battle and Teja Gollapudi. 2024. The Unreasonable Effectiveness of Eccentric Automatic Prompts. https:\/\/doi.org\/10.48550\/arXiv.2402.10949","DOI":"10.48550\/arXiv.2402.10949"},{"key":"e_1_3_3_3_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","unstructured":"Agata Blasiak Jeffrey Khong and Theodore Kee. 2020. CURATE.AI: Optimizing Personalized Medicine with Artificial Intelligence. SLAS TECHNOLOGY: Translating Life Sciences Innovation 25 2 (April 2020) 95\u2013105. https:\/\/doi.org\/10.1177\/2472630319890316","DOI":"10.1177\/2472630319890316"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"publisher","unstructured":"Su\u00a0Lin Blodgett Solon Barocas Hal\u00a0Daum\u00e9 III and Hanna Wallach. 2020. Language (Technology) is Power: A Critical Survey of \"Bias\" in NLP. https:\/\/doi.org\/10.48550\/arXiv.2005.14050","DOI":"10.48550\/arXiv.2005.14050"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","unstructured":"Rishi Bommasani Drew\u00a0A. Hudson Ehsan Adeli Russ Altman Simran Arora Sydney\u00a0von Arx Michael\u00a0S. Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill Erik Brynjolfsson Shyamal Buch Dallas Card Rodrigo Castellon Niladri Chatterji Annie Chen Kathleen Creel Jared\u00a0Quincy Davis Dora Demszky Chris Donahue Moussa Doumbouya Esin Durmus Stefano Ermon John Etchemendy Kawin Ethayarajh Li Fei-Fei Chelsea Finn Trevor Gale Lauren Gillespie Karan Goel Noah Goodman Shelby Grossman Neel Guha Tatsunori Hashimoto Peter Henderson John Hewitt Daniel\u00a0E. Ho Jenny Hong Kyle Hsu Jing Huang Thomas Icard Saahil Jain Dan Jurafsky Pratyusha Kalluri Siddharth Karamcheti Geoff Keeling Fereshte Khani Omar Khattab Pang\u00a0Wei Koh Mark Krass Ranjay Krishna Rohith Kuditipudi Ananya Kumar Faisal Ladhak Mina Lee Tony Lee Jure Leskovec Isabelle Levent Xiang\u00a0Lisa Li Xuechen Li Tengyu Ma Ali Malik Christopher\u00a0D. Manning Suvir Mirchandani Eric Mitchell Zanele Munyikwa Suraj Nair Avanika Narayan Deepak Narayanan Ben Newman Allen Nie Juan\u00a0Carlos Niebles Hamed Nilforoshan Julian Nyarko Giray Ogut Laurel Orr Isabel Papadimitriou Joon\u00a0Sung Park Chris Piech Eva Portelance Christopher Potts Aditi Raghunathan Rob Reich Hongyu Ren Frieda Rong Yusuf Roohani Camilo Ruiz Jack Ryan Christopher R\u00e9 Dorsa Sadigh Shiori Sagawa Keshav Santhanam Andy Shih Krishnan Srinivasan Alex Tamkin Rohan Taori Armin\u00a0W. Thomas Florian Tram\u00e8r Rose\u00a0E. Wang William Wang Bohan Wu Jiajun Wu Yuhuai Wu Sang\u00a0Michael Xie Michihiro Yasunaga Jiaxuan You Matei Zaharia Michael Zhang Tianyi Zhang Xikun Zhang Yuhui Zhang Lucia Zheng Kaitlyn Zhou and Percy Liang. 2022. On the Opportunities and Risks of Foundation Models. https:\/\/doi.org\/10.48550\/arXiv.2108.07258","DOI":"10.48550\/arXiv.2108.07258"},{"key":"e_1_3_3_3_21_2","unstructured":"Tom\u00a0B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel\u00a0M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. https:\/\/arxiv.org\/abs\/2005.14165"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658946"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","unstructured":"Jennifer Cobbe and Jatinder Singh. 2021. Artificial intelligence as a service: Legal responsibilities liabilities and policy challenges. Computer Law & Security Review 42 (Sept. 2021) 105573. https:\/\/doi.org\/10.1016\/j.clsr.2021.105573","DOI":"10.1016\/j.clsr.2021.105573"},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594073"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533213"},{"key":"e_1_3_3_3_26_2","unstructured":"Kate Crawford. 2016. Opinion | Artificial Intelligence\u2019s White Guy Problem. The New York Times (June 2016). https:\/\/www.nytimes.com\/2016\/06\/26\/opinion\/sunday\/artificial-intelligences-white-guy-problem.html"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3534627"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445924"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3659047"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3652988.3673967"},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"publisher","unstructured":"Sourojit Ghosh Pranav\u00a0Narayanan Venkit Sanjana Gautam Shomir Wilson and Aylin Caliskan. 2024. Do Generative AI Models Output Harm while Representing Non-Western Cultures: Evidence from A Community-Centered Approach. Proceedings of the AAAI\/ACM Conference on AI Ethics and Society 7 (Oct. 2024) 476\u2013489. https:\/\/doi.org\/10.1609\/aies.v7i1.31651","DOI":"10.1609\/aies.v7i1.31651"},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"publisher","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan Amy Yang Angela Fan Anirudh Goyal Anthony Hartshorn Aobo Yang Archi Mitra Archie Sravankumar Artem Korenev Arthur Hinsvark Arun Rao Aston Zhang Aurelien Rodriguez Austen Gregerson Ava Spataru Baptiste Roziere Bethany Biron Binh Tang Bobbie Chern Charlotte Caucheteux Chaya Nayak Chloe Bi Chris Marra Chris McConnell Christian Keller Christophe Touret Chunyang Wu Corinne Wong Cristian\u00a0Canton Ferrer Cyrus Nikolaidis Damien Allonsius Daniel Song Danielle Pintz Danny Livshits Danny Wyatt David Esiobu Dhruv Choudhary Dhruv Mahajan Diego Garcia-Olano Diego Perino Dieuwke Hupkes Egor Lakomkin et\u00a0al. 2024. The Llama 3 Herd of Models. https:\/\/doi.org\/10.48550\/arXiv.2407.21783","DOI":"10.48550\/arXiv.2407.21783"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","unstructured":"Shashank Gupta Vaishnavi Shrivastava Ameet Deshpande Ashwin Kalyan Peter Clark Ashish Sabharwal and Tushar Khot. 2024. Bias Runs Deep: Implicit Reasoning Biases in Persona-Assigned LLMs. https:\/\/doi.org\/10.48550\/arXiv.2311.04892","DOI":"10.48550\/arXiv.2311.04892"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3659017"},{"key":"e_1_3_3_3_35_2","unstructured":"Moritz Hardt Eric Price and Nathan Srebro. 2016. Equality of Opportunity in Supervised Learning. https:\/\/arxiv.org\/abs\/1610.02413"},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","unstructured":"Ruidan He Linlin Liu Hai Ye Qingyu Tan Bosheng Ding Liying Cheng Jia-Wei Low Lidong Bing and Luo Si. 2021. On the Effectiveness of Adapter-based Tuning for Pretrained Language Model Adaptation. https:\/\/doi.org\/10.48550\/arXiv.2106.03164","DOI":"10.48550\/arXiv.2106.03164"},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3375674"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","unstructured":"Maurice Jakesch Jeffrey\u00a0T. Hancock and Mor Naaman. 2023. Human heuristics for AI-generated language are flawed. Proceedings of the National Academy of Sciences 120 11 (2023). https:\/\/doi.org\/10.1073\/pnas.2208839120","DOI":"10.1073\/pnas.2208839120"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3461702.3462566"},{"key":"e_1_3_3_3_40_2","series-title":"(NIPS \u201923)","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Jiang Guangyuan","year":"2023","unstructured":"Guangyuan Jiang, Manjie Xu, Song-Chun Zhu, Wenjuan Han, Chi Zhang, and Yixin Zhu. 2023. Evaluating and inducing personality in pre-trained language models. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS \u201923). Curran Associates Inc., Red Hook, NY, USA, Article 466, 22\u00a0pages."},{"key":"e_1_3_3_3_41_2","doi-asserted-by":"publisher","unstructured":"Zhifeng Jiang Zhihua Jin and Guoliang He. 2025. Safeguarding System Prompts for LLMs. https:\/\/doi.org\/10.48550\/arXiv.2412.13426","DOI":"10.48550\/arXiv.2412.13426"},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","unstructured":"Jared Katzman Angelina Wang Morgan Scheuerman Su\u00a0Lin Blodgett Kristen Laird Hanna Wallach and Solon Barocas. 2023. Taxonomizing and Measuring Representational Harms: A Look at Image Tagging. Proceedings of the AAAI Conference on Artificial Intelligence 37 12 (June 2023) 14277\u201314285. https:\/\/doi.org\/10.1609\/aaai.v37i12.26670","DOI":"10.1609\/aaai.v37i12.26670"},{"key":"e_1_3_3_3_43_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.91"},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.29"},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","unstructured":"Adriano Koshiyama Emre Kazim Philip Treleaven Pete Rai Lukasz Szpruch Giles Pavey Ghazi Ahamat Franziska Leutner Randy Goebel Andrew Knight Janet Adams Christina Hitrova Jeremy Barnett Parashkev Nachev David Barber Tomas Chamorro-Premuzic Konstantin Klemmer Miro Gregorovic Shakeel Khan Elizabeth Lomas Airlie Hilliard and Siddhant Chatterjee. 2024. Towards algorithm auditing: managing legal ethical and technological risks of AI ML and associated algorithms. Royal Society Open Science 11 5 (May 2024) 230859. https:\/\/doi.org\/10.1098\/rsos.230859","DOI":"10.1098\/rsos.230859"},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"crossref","unstructured":"Bushra Kundi Christo El\u00a0Morr Rachel Gorman and Ena Dua. 2023. Artificial intelligence and bias: a scoping review. AI and Society (2023) 199\u2013215.","DOI":"10.1201\/9781003261247-15"},{"key":"e_1_3_3_3_47_2","doi-asserted-by":"publisher","unstructured":"Nils K\u00f6bis and Luca\u00a0D. Mossink. 2021. Artificial intelligence versus Maya Angelou: Experimental evidence that people cannot differentiate AI-generated from human-written poetry. Computers in Human Behavior 114 (2021) 106553. https:\/\/doi.org\/10.1016\/j.chb.2020.106553","DOI":"10.1016\/j.chb.2020.106553"},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"publisher","unstructured":"Ehsan Latif and Xiaoming Zhai. 2024. Fine-tuning ChatGPT for automatic scoring. Computers and Education: Artificial Intelligence 6 (June 2024) 100210. https:\/\/doi.org\/10.1016\/j.caeai.2024.100210","DOI":"10.1016\/j.caeai.2024.100210"},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658975"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445261"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","unstructured":"Seongyun Lee Sue\u00a0Hyun Park Seungone Kim and Minjoon Seo. 2024. Aligning to Thousands of Preferences via System Message Generalization. https:\/\/doi.org\/10.48550\/arXiv.2405.17977","DOI":"10.48550\/arXiv.2405.17977"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594062"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"crossref","unstructured":"Alina Leidinger and Richard Rogers. 2024. How Are LLMs Mitigating Stereotyping Harms? Learning from Search Engine Studies. https:\/\/arxiv.org\/abs\/2407.11733","DOI":"10.1609\/aies.v7i1.31684"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581463"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","unstructured":"Kenneth Li Tianle Liu Naomi Bashkansky David Bau Fernanda Vi\u00e9gas Hanspeter Pfister and Martin Wattenberg. 2024. Measuring and Controlling Instruction (In)Stability in Language Model Dialogs. https:\/\/doi.org\/10.48550\/arXiv.2402.10962","DOI":"10.48550\/arXiv.2402.10962"},{"key":"e_1_3_3_3_56_2","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. https:\/\/arxiv.org\/abs\/1907.11692"},{"key":"e_1_3_3_3_57_2","unstructured":"Chandler May Alex Wang Shikha Bordia Samuel\u00a0R. Bowman and Rachel Rudinger. 2019. On Measuring Social Biases in Sentence Encoders. https:\/\/arxiv.org\/abs\/1903.10561"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594109"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658967"},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"publisher","unstructured":"Norman Mu Sarah Chen Zifan Wang Sizhe Chen David Karamardian Lulwa Aljeraisy Basel Alomair Dan Hendrycks and David Wagner. 2024. Can LLMs Follow Simple Rules?https:\/\/doi.org\/10.48550\/arXiv.2311.04235","DOI":"10.48550\/arXiv.2311.04235"},{"key":"e_1_3_3_3_61_2","doi-asserted-by":"publisher","unstructured":"Mir Murtaza Yamna Ahmed Jawwad\u00a0Ahmed Shamsi Fahad Sherwani and Mariam Usman. 2022. AI-Based Personalized E-Learning Systems: Issues Challenges and Solutions. IEEE Access 10 (2022) 81323\u201381342. https:\/\/doi.org\/10.1109\/ACCESS.2022.3193938","DOI":"10.1109\/ACCESS.2022.3193938"},{"key":"e_1_3_3_3_62_2","first-page":"1","volume-title":"ACIS 2020 Proceedings","author":"Nadeem Ayesha","year":"2020","unstructured":"Ayesha Nadeem, Babak Abedin, and Olivera Marjanovic. 2020. Gender bias in AI: a review of contributing factors and mitigating strategies. In ACIS 2020 Proceedings. AIS Electronic Library (AISeL), 1\u201312. https:\/\/www.acis2020.org\/"},{"key":"e_1_3_3_3_63_2","unstructured":"Maayan Nahmias Yifat\u00a0Perel. 2021. The Oversight of Content Moderation by AI: Impact Assessments and Their Limitations. Harvard Journal on Legislation 58 (2021) 145."},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","unstructured":"Long Ouyang Jeff Wu Xu Jiang Diogo Almeida Carroll\u00a0L. Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray John Schulman Jacob Hilton Fraser Kelton Luke Miller Maddie Simens Amanda Askell Peter Welinder Paul Christiano Jan Leike and Ryan Lowe. 2022. Training language models to follow instructions with human feedback. https:\/\/doi.org\/10.48550\/arXiv.2203.02155","DOI":"10.48550\/arXiv.2203.02155"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594078"},{"key":"e_1_3_3_3_66_2","doi-asserted-by":"publisher","unstructured":"Sinead O\u2019Connor and Helen Liu. 2024. Gender bias\u00a0perpetuation and mitigation in AI technologies: challenges and opportunities. AI & SOCIETY 39 4 (Aug. 2024) 2045\u20132057. https:\/\/doi.org\/10.1007\/s00146-023-01675-4","DOI":"10.1007\/s00146-023-01675-4"},{"key":"e_1_3_3_3_67_2","doi-asserted-by":"publisher","unstructured":"Ye\u00a0Sul Park. 2024. White Default: Examining Racialized Biases Behind AI-Generated Images. Art Education 77 4 (July 2024) 36\u201345. https:\/\/doi.org\/10.1080\/00043125.2024.2330340","DOI":"10.1080\/00043125.2024.2330340"},{"key":"e_1_3_3_3_68_2","unstructured":"Parliamant and Council of the European Union. 2016. Regulation (EU) 2016\/679 of the European Parliament and of the Council. https:\/\/eur-lex.europa.eu\/legal-content\/EN\/TXT\/HTML\/?uri=CELEX:32016R0679&from=EN#d1e2051-1-1."},{"key":"e_1_3_3_3_69_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.186"},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658969"},{"key":"e_1_3_3_3_71_2","doi-asserted-by":"publisher","unstructured":"Yanzhao Qin Tao Zhang Tao Zhang Yanjun Shen Wenjing Luo Haoze Sun Yan Zhang Yujing Qiao Weipeng Chen Zenan Zhou Wentao Zhang and Bin Cui. 2024. SysBench: Can Large Language Models Follow System Messages?https:\/\/doi.org\/10.48550\/arXiv.2408.10943","DOI":"10.48550\/arXiv.2408.10943"},{"key":"e_1_3_3_3_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/3306618.3314244"},{"key":"e_1_3_3_3_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372873"},{"key":"e_1_3_3_3_74_2","doi-asserted-by":"publisher","unstructured":"Brianna Richardson and Juan\u00a0E. Gilbert. 2021. A Framework for Fairness: A Systematic Review of Existing Fair AI Solutions. https:\/\/doi.org\/10.48550\/arXiv.2112.05700","DOI":"10.48550\/arXiv.2112.05700"},{"key":"e_1_3_3_3_75_2","doi-asserted-by":"publisher","unstructured":"Shahnewaz\u00a0Karim Sakib and Anindya\u00a0Bijoy Das. 2024. Challenging Fairness: A Comprehensive Exploration of Bias in LLM-Based Recommendations. https:\/\/doi.org\/10.48550\/arXiv.2409.10825","DOI":"10.48550\/arXiv.2409.10825"},{"key":"e_1_3_3_3_76_2","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287598"},{"key":"e_1_3_3_3_77_2","unstructured":"Mrinank Sharma Meg Tong Tomasz Korbak David Duvenaud Amanda Askell Samuel\u00a0R. Bowman Newton Cheng Esin Durmus Zac Hatfield-Dodds Scott\u00a0R. Johnston Shauna Kravec Timothy Maxwell Sam McCandlish Kamal Ndousse Oliver Rausch Nicholas Schiefer Da Yan Miranda Zhang and Ethan Perez. 2023. Towards Understanding Sycophancy in Language Models. https:\/\/arxiv.org\/abs\/2310.13548"},{"key":"e_1_3_3_3_78_2","doi-asserted-by":"publisher","DOI":"10.1145\/3600211.3604673"},{"key":"e_1_3_3_3_79_2","unstructured":"Tianhao Shen Renren Jin Yufei Huang Chuang Liu Weilong Dong Zishan Guo Xinwei Wu Yan Liu and Deyi Xiong. 2023. Large Language Model Alignment: A Survey. https:\/\/arxiv.org\/abs\/2309.15025"},{"key":"e_1_3_3_3_80_2","doi-asserted-by":"crossref","unstructured":"Emily Sheng Kai-Wei Chang Premkumar Natarajan and Nanyun Peng. 2019. The Woman Worked as a Babysitter: On Biases in Language Generation. https:\/\/arxiv.org\/abs\/1909.01326","DOI":"10.18653\/v1\/D19-1339"},{"key":"e_1_3_3_3_81_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-long.111"},{"key":"e_1_3_3_3_82_2","doi-asserted-by":"publisher","unstructured":"Bangzhao Shu Lechen Zhang Minje Choi Lavinia Dunagan Lajanugen Logeswaran Moontae Lee Dallas Card and David Jurgens. 2024. You don\u2019t need a personality test to know these models are unreliable: Assessing the Reliability of Large Language Models on Psychometric Instruments. https:\/\/doi.org\/10.48550\/arXiv.2311.09718","DOI":"10.48550\/arXiv.2311.09718"},{"key":"e_1_3_3_3_83_2","doi-asserted-by":"publisher","unstructured":"Jatinder Singh Jennifer Cobbe and Chris Norval. 2019. Decision Provenance: Harnessing Data Flow for Accountable Systems. IEEE Access 7 (2019) 6562\u20136574. https:\/\/doi.org\/10.1109\/ACCESS.2018.2887201","DOI":"10.1109\/ACCESS.2018.2887201"},{"key":"e_1_3_3_3_84_2","doi-asserted-by":"publisher","unstructured":"Eric\u00a0Michael Smith Melissa Hall Melanie Kambadur Eleonora Presani and Adina Williams. 2022. \"I\u2019m sorry to hear that\": Finding New Biases in Language Models with a Holistic Descriptor Dataset. https:\/\/doi.org\/10.48550\/arXiv.2205.09209","DOI":"10.48550\/arXiv.2205.09209"},{"key":"e_1_3_3_3_85_2","unstructured":"Nathalie\u00a0A. Smuha. 2021. Beyond the Individual: Governing AI\u2019s Societal Harm. https:\/\/papers.ssrn.com\/abstract=3941956"},{"key":"e_1_3_3_3_86_2","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220046"},{"key":"e_1_3_3_3_87_2","doi-asserted-by":"publisher","DOI":"10.1145\/3465416.3483305"},{"key":"e_1_3_3_3_88_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658992"},{"key":"e_1_3_3_3_89_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372837"},{"key":"e_1_3_3_3_90_2","unstructured":"Alex Tamkin Amanda Askell Liane Lovitt Esin Durmus Nicholas Joseph Shauna Kravec Karina Nguyen Jared Kaplan and Deep Ganguli. 2023. Evaluating and Mitigating Discrimination in Language Model Decisions. http:\/\/arxiv.org\/abs\/2312.03689"},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"publisher","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian\u00a0Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit\u00a0Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric\u00a0Michael Smith Ranjan Subramanian Xiaoqing\u00a0Ellen Tan Binh Tang Ross Taylor Adina Williams Jian\u00a0Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. https:\/\/doi.org\/10.48550\/arXiv.2307.09288","DOI":"10.48550\/arXiv.2307.09288"},{"key":"e_1_3_3_3_92_2","unstructured":"Eric Wallace Kai Xiao Reimar Leike Lilian Weng Johannes Heidecke and Alex Beutel. 2024. The Instruction Hierarchy: Training LLMs to Prioritize Privileged Instructions. http:\/\/arxiv.org\/abs\/2404.13208"},{"key":"e_1_3_3_3_93_2","unstructured":"Boxin Wang Weixin Chen Hengzhi Pei Chulin Xie Mintong Kang Chenhui Zhang Chejian Xu Zidi Xiong Ritik Dutta Rylan Schaeffer Sang\u00a0T. Truong Simran Arora Mantas Mazeika Dan Hendrycks Zinan Lin Yu Cheng Sanmi Koyejo Dawn Song and Bo Li. 2024. DecodingTrust: A Comprehensive Assessment of Trustworthiness in GPT Models. http:\/\/arxiv.org\/abs\/2306.11698"},{"key":"e_1_3_3_3_94_2","doi-asserted-by":"crossref","unstructured":"Yuan Wang Xuyang Wu Hsin-Tai Wu Zhiqiang Tao and Yi Fang. 2024. Do Large Language Models Rank Fairly? An Empirical Study on the Fairness of LLMs as Rankers. https:\/\/arxiv.org\/abs\/2404.03192","DOI":"10.18653\/v1\/2024.naacl-long.319"},{"key":"e_1_3_3_3_95_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533088"},{"key":"e_1_3_3_3_96_2","doi-asserted-by":"crossref","unstructured":"David\u00a0Gray Widder and Dawn Nafus. 2023. Dislocated accountabilities in the \u201cAI supply chain\u201d: Modularity and developers\u2019 notions of responsibility. Big Data Soc. 10 1 (Jan. 2023).","DOI":"10.1177\/20539517231177620"},{"key":"e_1_3_3_3_97_2","doi-asserted-by":"publisher","unstructured":"Bowen Xu Shaoyu Wu Kai Liu and Lulu Hu. 2024. Mixture-of-Instructions: Comprehensive Alignment of a Large Language Model through the Mixture of Diverse System Prompting Instructions. https:\/\/doi.org\/10.48550\/arXiv.2404.18410","DOI":"10.48550\/arXiv.2404.18410"},{"key":"e_1_3_3_3_98_2","unstructured":"Muhammad\u00a0Bilal Zafar Isabel Valera Manuel\u00a0Gomez Rodriguez Krishna\u00a0P. Gummadi and Adrian Weller. 2017. From Parity to Preference-based Notions of Fairness in Classification. https:\/\/arxiv.org\/abs\/1707.00010"},{"key":"e_1_3_3_3_99_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581388"},{"key":"e_1_3_3_3_100_2","unstructured":"Zhehao Zhang Ryan\u00a0A. Rossi Branislav Kveton Yijia Shao Diyi Yang Hamed Zamani Franck Dernoncourt Joe Barrow Tong Yu Sungchul Kim Ruiyi Zhang Jiuxiang Gu Tyler Derr Hongjie Chen Junda Wu Xiang Chen Zichao Wang Subrata Mitra Nedim Lipka Nesreen Ahmed and Yu Wang. 2024. Personalization of Large Language Models: A Survey. https:\/\/arxiv.org\/abs\/2411.00027"},{"key":"e_1_3_3_3_101_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01456"},{"key":"e_1_3_3_3_102_2","doi-asserted-by":"publisher","unstructured":"Mingqian Zheng Jiaxin Pei and David Jurgens. 2023. Is \"A Helpful Assistant\" the Best Role for Large Language Models? A Systematic Evaluation of Social Roles in System Prompts. https:\/\/doi.org\/10.48550\/arXiv.2311.10054","DOI":"10.48550\/arXiv.2311.10054"},{"key":"e_1_3_3_3_103_2","doi-asserted-by":"publisher","unstructured":"Ce Zhou Qian Li Chen Li Jun Yu Yixin Liu Guangjing Wang Kai Zhang Cheng Ji Qiben Yan Lifang He Hao Peng Jianxin Li Jia Wu Ziwei Liu Pengtao Xie Caiming Xiong Jian Pei Philip\u00a0S. Yu and Lichao Sun. 2024. A comprehensive survey on pretrained foundation models: a history from BERT to ChatGPT. International Journal of Machine Learning and Cybernetics (Nov. 2024). https:\/\/doi.org\/10.1007\/s13042-024-02443-6","DOI":"10.1007\/s13042-024-02443-6"},{"key":"e_1_3_3_3_104_2","doi-asserted-by":"publisher","unstructured":"Lei Zhu Xinjiang Wang Wayne Zhang and Rynson W.\u00a0H. Lau. 2024. RelayAttention for Efficient Large Language Model Serving with Long System Prompts. https:\/\/doi.org\/10.48550\/arXiv.2402.14808","DOI":"10.48550\/arXiv.2402.14808"}],"event":{"name":"FAccT '25: The 2025 ACM Conference on Fairness, Accountability, and Transparency","location":"Athens Greece","acronym":"FAccT '25"},"container-title":["Proceedings of the 2025 ACM Conference on Fairness, Accountability, and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3715275.3732038","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T06:06:08Z","timestamp":1759903568000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3715275.3732038"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":103,"alternative-id":["10.1145\/3715275.3732038","10.1145\/3715275"],"URL":"https:\/\/doi.org\/10.1145\/3715275.3732038","relation":{},"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"2025-06-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}