{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:28:05Z","timestamp":1775665685767,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":77,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Bill & Melinda Gates Foundation","award":["OPP1144"],"award-info":[{"award-number":["OPP1144"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1145\/3696410.3714705","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:47:11Z","timestamp":1745362031000},"page":"3204-3218","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["C3AI: Crafting and Evaluating Constitutions for Constitutional AI"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0636-5046","authenticated-orcid":false,"given":"Yara","family":"Kyrychenko","sequence":"first","affiliation":[{"name":"University of Cambridge, Cambridge, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7177-9152","authenticated-orcid":false,"given":"Ke","family":"Zhou","sequence":"additional","affiliation":[{"name":"Nokia Bell Labs, Cambridge, United Kingdom and University of Nottingham, Nottingham, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8774-2386","authenticated-orcid":false,"given":"Edyta","family":"Bogucka","sequence":"additional","affiliation":[{"name":"Nokia Bell Labs, Cambridge, United Kingdom and University of Cambridge, Cambridge, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9461-5804","authenticated-orcid":false,"given":"Daniele","family":"Quercia","sequence":"additional","affiliation":[{"name":"Nokia Bell Labs, Cambridge, United Kingdom and Politecnico di Torino, Torino, Italy"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461702.3462624"},{"key":"e_1_3_2_1_2_1","volume-title":"Public Constitutional AI. arXiv:2406.16696","author":"Abiri Gilad","year":"2024","unstructured":"Gilad Abiri. 2024. Public Constitutional AI. arXiv:2406.16696 (2024)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1080\/00273171.2023.2194606"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s43681-021-00077-w"},{"key":"e_1_3_2_1_5_1","volume-title":"Erik Jenner, Stephen Casper, Oliver Sourbut, et al.","author":"Anwar Usman","year":"2024","unstructured":"Usman Anwar, Abulhair Saparov, Javier Rando, Daniel Paleka, Miles Turpin, Peter Hase, Ekdeep Singh Lubana, Erik Jenner, Stephen Casper, Oliver Sourbut, et al. 2024. Foundational Challenges in Assuring Alignment and Safety of Large Language Models. arXiv:2404.09932 (2024)."},{"key":"e_1_3_2_1_6_1","volume-title":"Constitutional and Administrative Law","author":"Bradley A.W.","unstructured":"Bradley A.W., Ewing D., and Christopher Knight. 2020. Constitutional and Administrative Law. Princeton University Press."},{"key":"e_1_3_2_1_7_1","unstructured":"Yuntao Bai Andy Jones Kamal Ndousse Amanda Askell Anna Chen Nova DasSarma Dawn Drain Stanislav Fort Deep Ganguli Tom Henighan et al. 2022. Training a Helpful and Harmless Assistant With Reinforcement Learning From Human Feedback. arXiv:2204.05862 (2022)."},{"key":"e_1_3_2_1_8_1","volume-title":"Constitutional AI: Harmlessness From AI Feedback. arXiv:2212.08073","author":"Bai Yuntao","year":"2022","unstructured":"Yuntao Bai, Saurav Kadavath, Sandipan Kundu, Amanda Askell, Jackson Kernion, Andy Jones, Anna Chen, Anna Goldie, Azalia Mirhoseini, Cameron McKinnon, et al. 2022. Constitutional AI: Harmlessness From AI Feedback. arXiv:2212.08073 (2022)."},{"key":"e_1_3_2_1_9_1","volume-title":"Hal Daum\u00e9 III, and Hanna Wallach","author":"Blodgett Su Lin","year":"2020","unstructured":"Su Lin Blodgett, Solon Barocas, Hal Daum\u00e9 III, and Hanna Wallach. 2020. Language (Technology) Is Power: A Critical Survey Of ''Bias'' in NLP. arXiv preprint arXiv:2005.14050 (2020)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.81"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_12_1","volume-title":"Dahl","author":"Bowman Samuel R.","year":"2021","unstructured":"Samuel R. Bowman and George E. Dahl. 2021. What Will It Take To Fix Benchmarking in Natural Language Understanding? arXiv:2104.02145 (2021)."},{"key":"e_1_3_2_1_13_1","unstructured":"Samuel R. Bowman Jeeyoon Hyun Ethan Perez Edwin Chen Craig Pettit Scott Heiner Kamil\u0117 Luko\u0161i\u016bt\u0117 Amanda Askell Andy Jones Anna Chen et al. 2022. Measuring Progress on Scalable Oversight for Large Language Models. arXiv:2211.03540 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"J\u00e9r\u00e9my Scheurer, Javier Rando, Rachel Freedman, Tomasz Korbak, David Lindner, Pedro Freire, et al.","author":"Casper Stephen","year":"2023","unstructured":"Stephen Casper, Xander Davies, Claudia Shi, Thomas Krendl Gilbert, J\u00e9r\u00e9my Scheurer, Javier Rando, Rachel Freedman, Tomasz Korbak, David Lindner, Pedro Freire, et al. 2023. Open Problems and Fundamental Limitations of Reinforcement Learning From Human Feedback. arXiv:2307.15217 (2023)."},{"key":"e_1_3_2_1_15_1","volume-title":"ITERALIGN: Iterative Constitutional Alignment of Large Language Models. arXiv:2403.18341","author":"Chen Xiusi","year":"2024","unstructured":"Xiusi Chen, Hongzhi Wen, Sreyashi Nag, Chen Luo, Qingyu Yin, Ruirui Li, Zheng Li, and Wei Wang. 2024. ITERALIGN: Iterative Constitutional Alignment of Large Language Models. arXiv:2403.18341 (2024)."},{"key":"e_1_3_2_1_16_1","volume-title":"Training Verifiers to Solve Math Word Problems. ArXiv abs\/2110.14168","author":"Cobbe Karl","year":"2021","unstructured":"Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser, Matthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, Christopher Hesse, and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. ArXiv abs\/2110.14168 (2021). https:\/\/api.semanticscholar.org\/ CorpusID:239998651"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML).","author":"Conitzer Vincent","year":"2024","unstructured":"Vincent Conitzer, Rachel Freedman, Jobst Heitzig, Wesley H Holliday, Bob M Jacobs, Nathan Lambert, Milan Moss\u00e9, Eric Pacuit, Stuart Russell, Hailey Schoelkopf, et al. 2024. Social Choice Should Guide AI Alignment in Dealing with Diverse Human Feedback. In Proceedings of the International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_18_1","volume-title":"Investigating Data Contamination in Modern Benchmarks for Large Language Models. arXiv:2311.09783","author":"Deng Chunyuan","year":"2023","unstructured":"Chunyuan Deng, Yilun Zhao, Xiangru Tang, Mark Gerstein, and Arman Cohan. 2023. Investigating Data Contamination in Modern Benchmarks for Large Language Models. arXiv:2311.09783 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Toxicity in ChatGPT: Analyzing Persona-Assigned Language Models. arXiv:2304.05335","author":"Deshpande Ameet","year":"2023","unstructured":"Ameet Deshpande, Vishvak Murahari, Tanmay Rajpurohit, Ashwin Kalyan, and Karthik Narasimhan. 2023. Toxicity in ChatGPT: Analyzing Persona-Assigned Language Models. arXiv:2304.05335 (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1097\/01.mlr.0000245426.10853.30"},{"key":"e_1_3_2_1_21_1","unstructured":"Esin Durmus Karina Nguyen Thomas I Liao Nicholas Schiefer Amanda Askell Anton Bakhtin Carol Chen Zac Hatfield-Dodds Danny Hernandez Nicholas Joseph et al. 2023. Towards Measuring the Representation of Subjective Global Opinions in Language Models. arXiv:2306.16388 (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"Reise","author":"Embretson Susan E.","year":"2013","unstructured":"Susan E. Embretson and Steven P. Reise. 2013. Item Response Theory. Psychology Press."},{"key":"e_1_3_2_1_23_1","volume-title":"Understanding Dataset Difficulty with Usable Information. In International Conference on Machine Learning (ICML). PMLR, 5988--6008","author":"Ethayarajh Kawin","year":"2022","unstructured":"Kawin Ethayarajh, Yejin Choi, and Swabha Swayamdipta. 2022. Understanding Dataset Difficulty with Usable Information. In International Conference on Machine Learning (ICML). PMLR, 5988--6008."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1093\/jlb"},{"key":"e_1_3_2_1_25_1","volume-title":"Wegener","author":"Fabrigar Leandre R.","year":"2012","unstructured":"Leandre R. Fabrigar and Duane T. Wegener. 2012. Exploratory Factor Analysis. Oxford University Press."},{"key":"e_1_3_2_1_26_1","volume-title":"Inverse Constitutional AI: Compressing Preferences into Principles. arXiv:2406.06560","author":"Findeis Arduin","year":"2024","unstructured":"Arduin Findeis, Timo Kaufmann, Eyke H\u00fcllermeier, Samuel Albanie, and Robert Mullins. 2024. Inverse Constitutional AI: Compressing Preferences into Principles. arXiv:2406.06560 (2024)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11023-020-09539-2"},{"key":"e_1_3_2_1_28_1","unstructured":"Deep Ganguli Amanda Askell Nicholas Schiefer Thomas I Liao Kamil\u0117 Luko\u0161i\u016bt\u0117 Anna Chen Anna Goldie Azalia Mirhoseini Catherine Olsson Danny Hernandez et al. 2023. The Capacity for Moral Self-Correction in Large Language Models. arXiv:2302.07459 (2023)."},{"key":"e_1_3_2_1_29_1","unstructured":"Deep Ganguli Liane Lovitt Jackson Kernion Amanda Askell Yuntao Bai Saurav Kadavath Ben Mann Ethan Perez Nicholas Schiefer Kamal Ndousse et al. 2022. Red Teaming Language Models To Reduce Harms: Methods Scaling Behaviors and Lessons Learned. arXiv:2209.07858 (2022)."},{"key":"e_1_3_2_1_30_1","unstructured":"Deep Ganguli Nicholas Schiefer Marina Favaro and Jack Clark. 2023. Challenges in Evaluating AI Systems. https:\/\/www.anthropic.com\/index\/evaluatingai-systems"},{"key":"e_1_3_2_1_31_1","volume-title":"Exploratory Graph Analysis in Context. Revista Psicologia: Teoria e Pr\u00e1tica 24, 3","author":"Golino Hudson","year":"2022","unstructured":"Hudson Golino, Alexander P. Christensen, and Luis Eduardo Garrido. 2022. Exploratory Graph Analysis in Context. Revista Psicologia: Teoria e Pr\u00e1tica 24, 3 (2022)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1037\/met0000255"},{"key":"e_1_3_2_1_33_1","volume-title":"Golino and Sacha Epskamp","author":"Hudson","year":"2017","unstructured":"Hudson F. Golino and Sacha Epskamp. 2017. Exploratory Graph Analysis: A New Approach for Estimating the Number of Dimensions in Psychological Research. PloS one 12, 6 (2017)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1287\/isre.2021.1058"},{"key":"e_1_3_2_1_35_1","volume-title":"Measuring Massive Multitask Language Understanding. arXiv:2009.03300","author":"Hendrycks Dan","year":"2020","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2020. Measuring Massive Multitask Language Understanding. arXiv:2009.03300 (2020)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","unstructured":"Jiwoo Hong Noah Lee and James Thorne. 2024. ORPO: Monolithic Preference Optimization without Reference Model. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing Yaser Al-Onaizan Mohit Bansal and Yun-Nung Chen (Eds.). Association for Computational Linguistics 11170--11189. doi:10.18653\/v1\/2024.emnlp-main.626","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1038\/s43588-024-00741-1"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658979"},{"key":"e_1_3_2_1_39_1","unstructured":"Jiaming Ji Tianyi Qiu Boyuan Chen Borong Zhang Hantao Lou Kaile Wang Yawen Duan Zhonghao He Jiayi Zhou Zhaowei Zhang et al. 2023. AI Alignment: A Comprehensive Survey. arXiv:2310.19852 (2023)."},{"key":"e_1_3_2_1_40_1","unstructured":"Jigsaw. 2024. Announcing Experimental Bridging Attributes in Perspective API. https:\/\/medium.com\/jigsaw\/announcing-experimental-bridging-attributesin-perspective-api-578a9d59ac37"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-019-0088-2"},{"key":"e_1_3_2_1_42_1","volume-title":"Hale","author":"Kirk Hannah Rose","year":"2024","unstructured":"Hannah Rose Kirk, Bertie Vidgen, Paul R\u00f6ttger, and Scott A. Hale. 2024. The Benefits, Risks and Bounds of Personalizing the Alignment of Large Language Models to Individuals. Nature Machine Intelligence (2024), 1--10."},{"key":"e_1_3_2_1_43_1","unstructured":"Hannah Rose Kirk Alexander Whitefield Paul R\u00f6ttger Andrew Bean Katerina Margatina Juan Ciro Rafael Mosquera Max Bartolo Adina Williams He He et al. 2024. The PRISM Alignment Project: What Participatory Representative and Individualised Human Feedback Reveals About the Subjective and Multicultural Alignment of Large Language Models. arXiv:2404.16019 (2024)."},{"key":"e_1_3_2_1_44_1","unstructured":"Sandipan Kundu Yuntao Bai Saurav Kadavath Amanda Askell Andrew Callahan Anna Chen Anna Goldie Avital Balwit Azalia Mirhoseini Brayden McLean et al. 2023. Specific Versus General Principles for Constitutional AI. arXiv:2310.13798 (2023)."},{"key":"e_1_3_2_1_45_1","unstructured":"Maxime Labonne. 2024. Fine-Tune Llama 3 With ORPO. https:\/\/huggingface.co\/blog\/mlabonne\/orpo-llama-3"},{"key":"e_1_3_2_1_46_1","volume-title":"Khyathi Chandu, Nouha Dziri, Sachin Kumar, Tom Zick, Yejin Choi, et al.","author":"Lambert Nathan","year":"2024","unstructured":"Nathan Lambert, Valentina Pyatkin, Jacob Morrison, LJ Miranda, Bill Yuchen Lin, Khyathi Chandu, Nouha Dziri, Sachin Kumar, Tom Zick, Yejin Choi, et al. 2024. RewardBench: Evaluating Reward Models for Language Modeling. arXiv:2403.13787 (2024)."},{"key":"e_1_3_2_1_47_1","volume-title":"Privacy in Large Language Models: Attacks, Defenses and Future Directions. arXiv:2310.10383","author":"Li Haoran","year":"2023","unstructured":"Haoran Li, Yulin Chen, Jinglong Luo, Yan Kang, Xiaojin Zhang, Qi Hu, Chunkit Chan, and Yangqiu Song. 2023. Privacy in Large Language Models: Attacks, Defenses and Future Directions. arXiv:2310.10383 (2023)."},{"key":"e_1_3_2_1_48_1","volume-title":"Aligning with Human Judgement: The Role of Pairwise Preference in Large Language Model Evaluators. arXiv:2403.16950","author":"Liu Yinhong","year":"2024","unstructured":"Yinhong Liu, Han Zhou, Zhijiang Guo, Ehsan Shareghi, Ivan Vulic, Anna Korhonen, and Nigel Collier. 2024. Aligning with Human Judgement: The Role of Pairwise Preference in Large Language Model Evaluators. arXiv:2403.16950 (2024)."},{"key":"e_1_3_2_1_49_1","unstructured":"Dan Milmo. 2023. ChatGPT Reaches 100 Million Users Two Months After Launch. https:\/\/www.theguardian.com\/technology\/2023\/feb\/02\/chatgpt-100- million-users-open-ai-fastest-growing-app"},{"key":"e_1_3_2_1_50_1","first-page":"27730","article-title":"Training Language Models To Follow Instructions With Human Feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al. 2022. Training Language Models To Follow Instructions With Human Feedback. Advances in Neural Information Processing Systems 35 (2022), 27730--27744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_51_1","volume-title":"Bridging Systems: Open Problems for Countering Destructive Divisiveness Across Ranking, Recommenders, and Governance. arXiv:2301.09976","author":"Ovadya Aviv","year":"2023","unstructured":"Aviv Ovadya and Luke Thorburn. 2023. Bridging Systems: Open Problems for Countering Destructive Divisiveness Across Ranking, Recommenders, and Governance. arXiv:2301.09976 (2023)."},{"key":"e_1_3_2_1_52_1","volume-title":"Phu Mon Htut, and Samuel R Bowman","author":"Parrish Alicia","year":"2021","unstructured":"Alicia Parrish, Angelica Chen, Nikita Nangia, Vishakh Padmakumar, Jason Phang, Jana Thompson, Phu Mon Htut, and Samuel R Bowman. 2021. BBQ: A Hand-Built Bias Benchmark for Question Answering. arXiv:2110.08193 (2021)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","unstructured":"Ethan Perez Sam Ringer Kamile Lukosiute Karina Nguyen Edwin Chen Scott Heiner et al. 2023. Discovering Language Model Behaviors with Model-Written Evaluations. In Findings of the Association for Computational Linguistics Anna Rogers Jordan Boyd-Graber and Naoaki Okazaki (Eds.). Association for Computational Linguistics 13387--13434. doi:10.18653\/v1\/2023.findings-acl.847","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640543.3645144"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Zhen Qin Rolf Jagerman Kai Hui Honglei Zhuang Junru Wu Le Yan Jiaming Shen Tianqi Liu Jialu Liu Donald Metzler et al. 2023. Large Language Models Are Effective Text Rankers With Pairwise Ranking Prompting. arXiv:2306.17563 (2023).","DOI":"10.18653\/v1\/2024.findings-naacl.97"},{"key":"e_1_3_2_1_56_1","volume-title":"Direct Preference Optimization: Your Language Model Is Secretly a Reward Model. Advances in Neural Information Processing Systems 36","author":"Rafailov Rafael","year":"2024","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D. Manning, Stefano Ermon, and Chelsea Finn. 2024. Direct Preference Optimization: Your Language Model Is Secretly a Reward Model. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642810"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2017.00025"},{"key":"e_1_3_2_1_59_1","volume-title":"International Conference on Machine Learning (ICML). 29971--30004","author":"Santurkar Shibani","year":"2023","unstructured":"Shibani Santurkar, Esin Durmus, Faisal Ladhak, Cinoo Lee, Percy Liang, and Tatsunori Hashimoto. 2023. Whose Opinions Do Language Models Reflect?. In International Conference on Machine Learning (ICML). 29971--30004."},{"key":"e_1_3_2_1_60_1","volume-title":"Universals in the Content and Structure of Values: Theoretical Advances and Empirical Tests in 20 Countries. Advances in Experimental Social Psychology\/Academic Press","author":"Schwartz Shalom H","year":"1992","unstructured":"Shalom H Schwartz. 1992. Universals in the Content and Structure of Values: Theoretical Advances and Empirical Tests in 20 Countries. Advances in Experimental Social Psychology\/Academic Press (1992)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1177\/1073191121998760"},{"key":"e_1_3_2_1_62_1","unstructured":"Hua Shen Tiffany Knearem Reshmi Ghosh Kenan Alkiek Kundan Krishna Yachuan Liu Ziqiao Ma Savvas Petridis Yi-Hao Peng Li Qiwei et al. 2024. Towards Bidirectional Human-AI Alignment: A Systematic Review for Clarifications Framework and Future Directions. arXiv:2406.09264 (2024)."},{"key":"e_1_3_2_1_63_1","unstructured":"Toby Shevlane Sebastian Farquhar Ben Garfinkel Mary Phuong Jess Whittlestone Jade Leung Daniel Kokotajlo Nahema Marchal Markus Anderljung Noam Kolt et al. 2023. Model Evaluation for Extreme Risks. arXiv:2305.15324 (2023)."},{"key":"e_1_3_2_1_64_1","volume-title":"Basic Rights: Subsistence, Affluence, and US Foreign Policy","author":"Shue Henry","year":"2020","unstructured":"Henry Shue. 2020. Basic Rights: Subsistence, Affluence, and US Foreign Policy. Princeton University Press."},{"key":"e_1_3_2_1_65_1","volume-title":"Andre Ye, Liwei Jiang, Ximing Lu, Nouha Dziri, et al.","author":"Sorensen Taylor","year":"2024","unstructured":"Taylor Sorensen, Jared Moore, Jillian Fisher, Mitchell Gordon, Niloofar Mireshghallah, Christopher Michael Rytting, Andre Ye, Liwei Jiang, Ximing Lu, Nouha Dziri, et al. 2024. A Roadmap to Pluralistic Alignment. arXiv:2402.05070 (2024)."},{"key":"e_1_3_2_1_66_1","volume-title":"Number of Artificial Intelligence (AI) Tool Users Globally From 2020 to","year":"2030","unstructured":"Statista. 2024. Number of Artificial Intelligence (AI) Tool Users Globally From 2020 to 2030. https:\/\/www.statista.com\/forecasts\/1449844\/ai-tool-users-worldwide"},{"key":"e_1_3_2_1_67_1","unstructured":"Lichao Sun Yue Huang Haoran Wang Siyuan Wu Qihui Zhang Chujie Gao Yixin Huang Wenhan Lyu Yixuan Zhang Xiner Li et al. 2024. TrustLLM: Trustworthiness in Large Language Models. arXiv:2401.05561 (2024)."},{"key":"e_1_3_2_1_68_1","volume-title":"Salmon: Self-Alignment With Principle-Following Reward Models. arXiv:2310.05910","author":"Sun Zhiqing","year":"2023","unstructured":"Zhiqing Sun, Yikang Shen, Hongxin Zhang, Qinhong Zhou, Zhenfang Chen, David Cox, Yiming Yang, and Chuang Gan. 2023. Salmon: Self-Alignment With Principle-Following Reward Models. arXiv:2310.05910 (2023)."},{"key":"e_1_3_2_1_69_1","volume-title":"Superglue: A Stickier Benchmark for General-Purpose Language Understanding Systems. Advances in Neural Information Processing Systems 32","author":"Wang Alex","year":"2019","unstructured":"Alex Wang, Yada Pruksachatkun, Nikita Nangia, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel Bowman. 2019. Superglue: A Stickier Benchmark for General-Purpose Language Understanding Systems. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_70_1","volume-title":"Bowman","author":"Wang Alex","year":"2018","unstructured":"Alex Wang, Amanpreet Singh, Julian Michael, Felix Hill, Omer Levy, and Samuel R. Bowman. 2018. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. arXiv:1804.07461 (2018)."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.newsum-1.1"},{"key":"e_1_3_2_1_72_1","volume-title":"Large Language Models Are Not Fair Evaluators. ArXiv abs\/2305.17926","author":"Wang Peiyi","year":"2023","unstructured":"Peiyi Wang, Lei Li, Liang Chen, Dawei Zhu, Binghuai Lin, Yunbo Cao, Qi Liu, Tianyu Liu, and Zhifang Sui. 2023. Large Language Models Are Not Fair Evaluators. ArXiv abs\/2305.17926 (2023)."},{"key":"e_1_3_2_1_73_1","volume-title":"The Stanford Encyclopedia of Philosophy (Spring 2023 ed.), Edward N","author":"Wenar Leif","unstructured":"Leif Wenar. 2023. Rights. In The Stanford Encyclopedia of Philosophy (Spring 2023 ed.), Edward N. Zalta and Uri Nodelman (Eds.). Metaphysics Research Lab, Stanford University."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1080\/13678860110057638"},{"key":"e_1_3_2_1_75_1","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng Tianle Li Siyuan Zhuang Zhanghao Wu Yonghao Zhuang Zhuohan Li Zi Lin Eric. P Xing Joseph E. Gonzalez Ion Stoica and Hao Zhang. 2023. LMSYS-Chat-1M: A Large-Scale Real-World LLM Conversation Dataset. arXiv:2309.11998"},{"key":"e_1_3_2_1_76_1","volume-title":"Fairer Preferences Elicit Improved Human-Aligned Large Language Model Judgments. arXiv preprint arXiv:2406.11370","author":"Zhou Han","year":"2024","unstructured":"Han Zhou, Xingchen Wan, Yinhong Liu, Nigel Collier, Ivan Vuli\u0107, and Anna Korhonen. 2024. Fairer Preferences Elicit Improved Human-Aligned Large Language Model Judgments. arXiv preprint arXiv:2406.11370 (2024)."},{"key":"e_1_3_2_1_77_1","volume-title":"Instruction-Following Evaluation for Large Language Models. arXiv:2311.07911","author":"Zhou Jeffrey","year":"2023","unstructured":"Jeffrey Zhou, Tianjian Lu, Swaroop Mishra, Siddhartha Brahma, Sujoy Basu, Yi Luan, Denny Zhou, and Le Hou. 2023. Instruction-Following Evaluation for Large Language Models. arXiv:2311.07911 (2023)."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714705","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714705","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:57Z","timestamp":1750295937000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714705"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":77,"alternative-id":["10.1145\/3696410.3714705","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714705","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}