{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T15:13:16Z","timestamp":1780326796764,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3630106.3658979","type":"proceedings-article","created":{"date-parts":[[2024,6,5]],"date-time":"2024-06-05T13:14:21Z","timestamp":1717593261000},"page":"1395-1417","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":47,"title":["Collective Constitutional AI: Aligning a Language Model with Public Input"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-2654-5961","authenticated-orcid":false,"given":"Saffron","family":"Huang","sequence":"first","affiliation":[{"name":"Collective Intelligence Project, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7073-6728","authenticated-orcid":false,"given":"Divya","family":"Siddarth","sequence":"additional","affiliation":[{"name":"Collective Intelligence Project, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2105-5612","authenticated-orcid":false,"given":"Liane","family":"Lovitt","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3936-1391","authenticated-orcid":false,"given":"Thomas I.","family":"Liao","sequence":"additional","affiliation":[{"name":"Unaffiliated, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7331-8160","authenticated-orcid":false,"given":"Esin","family":"Durmus","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0007-3746","authenticated-orcid":false,"given":"Alex","family":"Tamkin","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9435-3817","authenticated-orcid":false,"given":"Deep","family":"Ganguli","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,6,5]]},"reference":[{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies. 107\u2013112","author":"Alm Cecilia\u00a0Ovesdotter","year":"2011","unstructured":"Cecilia\u00a0Ovesdotter Alm. 2011. Subjective Natural Language Problems: Motivations, Applications, Characterizations, and Implications. In Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies. 107\u2013112."},{"key":"e_1_3_2_1_3_1","volume-title":"Retrieved","year":"2023","unstructured":"Anthropic. 2023. Claude\u2019s Constitution. Retrieved Dec 23, 2023 from https:\/\/www.anthropic.com\/index\/claudes-constitution"},{"key":"e_1_3_2_1_4_1","unstructured":"Anthropic. 2023. Model Card and Evaluations for Claude Models. https:\/\/www-files.anthropic.com\/production\/images\/Model-Card-Claude-2.pdf"},{"key":"e_1_3_2_1_5_1","first-page":"1","volume-title":"Retrieved","year":"2023","unstructured":"Anthropic. 2023. Releasing Claude Instant 1.2. Retrieved Dec 23, 2023 from https:\/\/www.anthropic.com\/index\/releasing-claude-instant-1-2"},{"key":"e_1_3_2_1_6_1","volume-title":"Social Choice and Individual Values. Vol.\u00a012","author":"Arrow J","unstructured":"Kenneth\u00a0J Arrow. 2012. Social Choice and Individual Values. Vol.\u00a012. Yale University Press."},{"key":"e_1_3_2_1_7_1","volume-title":"Retrieved","author":"Askell Amanda","year":"2021","unstructured":"Amanda Askell, Yuntao Bai, Anna Chen, Dawn Drain, Deep Ganguli, Tom Henighan, Andy Jones, Nicholas Joseph, Ben Mann, Nova DasSarma, 2021. A General Language Assistant as a Laboratory for Alignment. arXiv:2112.00861 (2021). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2112.00861"},{"key":"e_1_3_2_1_8_1","volume-title":"Retrieved","author":"Bai Yuntao","year":"2022","unstructured":"Yuntao Bai, Andy Jones, Kamal Ndousse, Amanda Askell, Anna Chen, Nova DasSarma, Dawn Drain, Stanislav Fort, Deep Ganguli, Tom Henighan, 2022. Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback. arXiv:2204.05862 (2022). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2204.05862"},{"key":"e_1_3_2_1_9_1","volume-title":"Retrieved","author":"Bai Yuntao","year":"2022","unstructured":"Yuntao Bai, Saurav Kadavath, Sandipan Kundu, Amanda Askell, Jackson Kernion, Andy Jones, Anna Chen, Anna Goldie, Azalia Mirhoseini, Cameron McKinnon, 2022. Constitutional AI: Harmlessness from AI Feedback. arXiv:2212.08073 (2022). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2212.08073"},{"key":"e_1_3_2_1_10_1","volume-title":"Algorithmic injustice: a relational ethics approach. Patterns 2, 2","author":"Birhane Abeba","year":"2021","unstructured":"Abeba Birhane. 2021. Algorithmic injustice: a relational ethics approach. Patterns 2, 2 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"Power to the People? Opportunities and Challenges for Participatory AI. Equity and Access in Algorithms, Mechanisms, and Optimization","author":"Birhane Abeba","year":"2022","unstructured":"Abeba Birhane, William Isaac, Vinodkumar Prabhakaran, Mark Diaz, Madeleine\u00a0Clare Elish, Iason Gabriel, and Shakir Mohamed. 2022. Power to the People? Opportunities and Challenges for Participatory AI. Equity and Access in Algorithms, Mechanisms, and Optimization (2022), 1\u20138."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533083"},{"key":"e_1_3_2_1_13_1","volume-title":"Retrieved","author":"Blodgett Su\u00a0Lin","year":"2020","unstructured":"Su\u00a0Lin Blodgett, Solon Barocas, Hal Daum\u00e9\u00a0III, and Hanna Wallach. 2020. Language (Technology) is Power: A Critical Survey of\" Bias\" in NLP. arXiv:2005.14050 (2020). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2005.14050"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.81"},{"key":"e_1_3_2_1_15_1","volume-title":"Retrieved","author":"Bowman R","year":"2021","unstructured":"Samuel\u00a0R Bowman and George\u00a0E Dahl. 2021. What Will it Take to Fix Benchmarking in Natural Language Understanding?arXiv:2104.02145 (2021). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2104.02145"},{"key":"e_1_3_2_1_16_1","first-page":"324","article-title":"Rank Analysis of Incomplete Block Designs","volume":"39","author":"Bradley Ralph\u00a0Allan","year":"1952","unstructured":"Ralph\u00a0Allan Bradley and Milton\u00a0E Terry. 1952. Rank Analysis of Incomplete Block Designs: I. The Method of Paired Comparisons. Biometrika 39, 3\/4 (1952), 324\u2013345.","journal-title":"I. The Method of Paired Comparisons. Biometrika"},{"key":"e_1_3_2_1_17_1","volume-title":"Deep Reinforcement Learning from Human Preferences. Advances in Neural Information Processing Systems 30","author":"Christiano F","year":"2017","unstructured":"Paul\u00a0F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep Reinforcement Learning from Human Preferences. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_2_1_18_1","volume-title":"Retrieved","author":"Cobbe Karl","year":"2021","unstructured":"Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser, Matthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, 2021. Training Verifiers to Solve Math Word Problems. arXiv:2110.14168 (2021). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2110.14168"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617694.3623261"},{"key":"e_1_3_2_1_20_1","volume-title":"Retrieved","author":"Durmus Esin","year":"2023","unstructured":"Esin Durmus, Karina Nyugen, Thomas\u00a0I Liao, Nicholas Schiefer, Amanda Askell, Anton Bakhtin, Carol Chen, Zac Hatfield-Dodds, Danny Hernandez, Nicholas Joseph, 2023. Towards Measuring the Representation of Subjective Global Opinions in Language Models. arXiv:2306.16388 (2023). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2306.16388"},{"key":"e_1_3_2_1_21_1","volume-title":"Retrieved","author":"Feng KJ","year":"2023","unstructured":"KJ Feng, Quan Ze, Inyoung Cheong, King Xia, Amy\u00a0X Zhang, 2023. Case Repositories: Towards Case-Based Reasoning for AI Alignment. arXiv:2311.10934 (2023). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2311.10934"},{"key":"e_1_3_2_1_22_1","volume-title":"Artificial Intelligence, Values, and Alignment. Minds and machines 30, 3","author":"Gabriel Iason","year":"2020","unstructured":"Iason Gabriel. 2020. Artificial Intelligence, Values, and Alignment. Minds and machines 30, 3 (2020), 411\u2013437."},{"key":"e_1_3_2_1_23_1","volume-title":"Retrieved","author":"Ganguli Deep","year":"2023","unstructured":"Deep Ganguli, Amanda Askell, Nicholas Schiefer, Thomas Liao, Kamil\u0117 Luko\u0161i\u016bt\u0117, Anna Chen, Anna Goldie, Azalia Mirhoseini, Catherine Olsson, Danny Hernandez, 2023. The Capacity for Moral Self-Correction in Large Language Models. arXiv:2302.07459 (2023). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2302.07459"},{"key":"e_1_3_2_1_24_1","volume-title":"arXiv:2209.07858","author":"Ganguli Deep","year":"2022","unstructured":"Deep Ganguli, Liane Lovitt, Jackson Kernion, Amanda Askell, Yuntao Bai, Saurav Kadavath, Ben Mann, Ethan Perez, Nicholas Schiefer, Kamal Ndousse, 2022. Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors, and Lessons Learned. arXiv:2209.07858 (2022). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2209.07858"},{"key":"e_1_3_2_1_25_1","volume-title":"Retrieved","author":"Google Gemini\u00a0Team","year":"2023","unstructured":"Gemini\u00a0Team Google. 2023. Gemini: A Family of Highly Capable Multimodal Models. arXiv:2312.11805 (2023). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2312.11805"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502004"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594071"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Amy Gutmann and Dennis\u00a0F Thompson. 2004. Why Deliberative Democracy?Princeton University Press.","DOI":"10.1515\/9781400826339"},{"key":"e_1_3_2_1_29_1","volume-title":"Retrieved","author":"Hao Karen","year":"2022","unstructured":"Karen Hao. 2022. Artificial Intelligence for the People. MIT Technology Review (2022). Retrieved Dec 23, 2023 from https:\/\/www.technologyreview.com\/2022\/04\/22\/1050394\/artificial-intelligence-for-the-people\/"},{"key":"e_1_3_2_1_30_1","volume-title":"Retrieved","author":"Hendrycks Dan","year":"2020","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2020. Measuring Massive Multitask Language Understanding. arXiv:2009.03300 (2020). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2009.03300"},{"key":"e_1_3_2_1_31_1","volume-title":"Retrieved","author":"Huang Saffron","year":"2023","unstructured":"Saffron Huang and Divya Siddarth. 2023. Generative AI and the Digital Commons. arXiv:2303.11074 (2023). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2303.11074"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445901"},{"key":"e_1_3_2_1_33_1","volume-title":"Retrieved","author":"Jiang Liwei","year":"2021","unstructured":"Liwei Jiang, Jena\u00a0D Hwang, Chandra Bhagavatula, Ronan\u00a0Le Bras, Jenny Liang, Jesse Dodge, Keisuke Sakaguchi, Maxwell Forbes, Jon Borchardt, Saadia Gabriel, 2021. Can Machines Learn Morality? The Delphi Experiment. arXiv:2110.07574 (2021). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2110.07574"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_36_1","volume-title":"Retrieved","author":"Kundu Sandipan","year":"2023","unstructured":"Sandipan Kundu, Yuntao Bai, Saurav Kadavath, Amanda Askell, Andrew Callahan, Anna Chen, Anna Goldie, Avital Balwit, Azalia Mirhoseini, Brayden McLean, 2023. Specific versus General Principles for Constitutional AI. arXiv:2310.13798 (2023). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2310.13798"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1007\/978-3-319-51532-8_10","article-title":"Learning Dynamic Robot-to-Human Object Handover from Human Feedback","volume":"1","author":"Kupcsik Andras","year":"2018","unstructured":"Andras Kupcsik, David Hsu, and Wee\u00a0Sun Lee. 2018. Learning Dynamic Robot-to-Human Object Handover from Human Feedback. Robotics Research: Volume 1 (2018), 161\u2013176.","journal-title":"Robotics Research"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3555625"},{"key":"e_1_3_2_1_39_1","volume-title":"Retrieved","author":"Liang Percy","year":"2022","unstructured":"Percy Liang, Rishi Bommasani, Tony Lee, Dimitris Tsipras, Dilara Soylu, Michihiro Yasunaga, Yian Zhang, Deepak Narayanan, Yuhuai Wu, Ananya Kumar, 2022. Holistic Evaluation of Language Models. arXiv:2211.09110 (2022). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2211.09110"},{"key":"e_1_3_2_1_40_1","unstructured":"Alexandra Mateescu and Madeleine Elish. 2019. AI in Context: The Labor of Integrating New Technologies. (2019)."},{"key":"e_1_3_2_1_41_1","volume-title":"Retrieved","author":"Nekoto Wilhelmina","year":"2020","unstructured":"Wilhelmina Nekoto, Vukosi Marivate, Tshinondiwa Matsila, Timi Fasubaa, Tajudeen Kolawole, Taiwo Fagbohungbe, Solomon\u00a0Oluwole Akinola, Shamsuddeen\u00a0Hassan Muhammad, Salomon Kabongo, Salomey Osei, 2020. Participatory Research for Low-resourced Machine Translation: A Case Study in African Languages. arXiv:2010.02353 (2020). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2010.02353"},{"key":"e_1_3_2_1_42_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems 35 (2022), 27730\u201327744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_43_1","volume-title":"Retrieved","author":"Parrish Alicia","year":"2021","unstructured":"Alicia Parrish, Angelica Chen, Nikita Nangia, Vishakh Padmakumar, Jason Phang, Jana Thompson, Phu\u00a0Mon Htut, and Samuel\u00a0R Bowman. 2021. BBQ: A Hand-Built Bias Benchmark for Question Answering. arXiv:2110.08193 (2021). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2110.08193"},{"key":"e_1_3_2_1_44_1","volume-title":"International Conference on Machine Learning. PMLR, 27630\u201327641","author":"Peng Andi","year":"2023","unstructured":"Andi Peng, Aviv Netanyahu, Mark\u00a0K Ho, Tianmin Shu, Andreea Bobu, Julie Shah, and Pulkit Agrawal. 2023. Diagnosis, feedback, adaptation: A human-in-the-loop framework for test-time policy adaptation. In International Conference on Machine Learning. PMLR, 27630\u201327641."},{"key":"e_1_3_2_1_45_1","volume-title":"Majority of Americans have heard of ChatGPT, but few have tried it. (24","author":"Pew Research Center","year":"2023","unstructured":"Pew Research Center. 2023. Majority of Americans have heard of ChatGPT, but few have tried it. (24 May 2023). https:\/\/www.pewresearch.org\/short-reads\/2023\/05\/24\/a-majority-of-americans-have-heard-of-chatgpt-but-few-have-tried-it-themselves\/"},{"key":"e_1_3_2_1_46_1","volume-title":"Moderation Policy - Crowd Wisdom Project. Retrieved","author":"Project Crowd\u00a0Wisdom","year":"2024","unstructured":"Crowd\u00a0Wisdom Project. 2024. Moderation Policy - Crowd Wisdom Project. Retrieved Apr 8, 2024 from https:\/\/www.crowdwisdomproject.org\/moderation-policy\/"},{"key":"e_1_3_2_1_47_1","volume-title":"The Computational Democracy Project - Moderation. Retrieved","author":"Computational\u00a0Democracy Project The","year":"2024","unstructured":"The Computational\u00a0Democracy Project. 2024. The Computational Democracy Project - Moderation. Retrieved Apr 8, 2024 from https:\/\/compdemocracy.org\/Moderation\/"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594016"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594134"},{"key":"e_1_3_2_1_50_1","volume-title":"Direct Preference Optimization: Your Language Model is Secretly a Reward Model. Advances in Neural Information Processing Systems 36","author":"Rafailov Rafael","year":"2024","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher\u00a0D Manning, Stefano Ermon, and Chelsea Finn. 2024. Direct Preference Optimization: Your Language Model is Secretly a Reward Model. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0202)","author":"Santurkar Shibani","year":"2023","unstructured":"Shibani Santurkar, Esin Durmus, Faisal Ladhak, Cinoo Lee, Percy Liang, and Tatsunori Hashimoto. 2023. Whose Opinions Do Language Models Reflect?. In Proceedings of the 40th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 29971\u201330004. https:\/\/proceedings.mlr.press\/v202\/santurkar23a.html"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3593987"},{"key":"e_1_3_2_1_53_1","volume-title":"Polis: Scaling Deliberation by Mapping High Dimensional Opinion Spaces. Recerca: revista de pensament i an\u00e0lisi 26, 2","author":"Small Christopher","year":"2021","unstructured":"Christopher Small, Michael Bjorkegren, Timo Erkkil\u00e4, Lynette Shaw, and Colin Megill. 2021. Polis: Scaling Deliberation by Mapping High Dimensional Opinion Spaces. Recerca: revista de pensament i an\u00e0lisi 26, 2 (2021)."},{"key":"e_1_3_2_1_54_1","volume-title":"Juries and Deliberative Democracy. Political studies 48, 1","author":"Smith Graham","year":"2000","unstructured":"Graham Smith and Corinne Wales. 2000. Citizens\u2019 Juries and Deliberative Democracy. Political studies 48, 1 (2000), 51\u201365."},{"key":"e_1_3_2_1_55_1","first-page":"5861","article-title":"Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets","volume":"34","author":"Solaiman Irene","year":"2021","unstructured":"Irene Solaiman and Christy Dennison. 2021. Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets. Advances in Neural Information Processing Systems 34 (2021), 5861\u20135873.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_56_1","volume-title":"Retrieved","author":"Sorensen Taylor","year":"2023","unstructured":"Taylor Sorensen, Liwei Jiang, Jena Hwang, Sydney Levine, Valentina Pyatkin, Peter West, Nouha Dziri, Ximing Lu, Kavel Rao, Chandra Bhagavatula, [n. d.]. Value Kaleidoscope: Engaging AI with Pluralistic Human Values, Rights, and Duties. arXiv:2309.00779 ([n. d.]). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2309.00779"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_58_1","first-page":"3008","article-title":"Learning to summarize with human feedback","volume":"33","author":"Stiennon Nisan","year":"2020","unstructured":"Nisan Stiennon, Long Ouyang, Jeffrey Wu, Daniel Ziegler, Ryan Lowe, Chelsea Voss, Alec Radford, Dario Amodei, and Paul\u00a0F Christiano. 2020. Learning to summarize with human feedback. Advances in Neural Information Processing Systems 33 (2020), 3008\u20133021.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_59_1","volume-title":"The Ethics of Nanotechnology, Geoengineering, and Clean Energy","author":"Stilgoe Jack","unstructured":"Jack Stilgoe, Richard Owen, and Phil Macnaghten. 2020. Developing a framework for responsible innovation. In The Ethics of Nanotechnology, Geoengineering, and Clean Energy. Routledge, 347\u2013359."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533132"},{"key":"e_1_3_2_1_61_1","volume-title":"Assembly.","author":"Warren E","year":"2008","unstructured":"Mark\u00a0E Warren and Hilary Pearse. 2008. Designing Deliberative Democracy: The British Columbia Citizens\u2019 Assembly. (2008)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533088"},{"key":"e_1_3_2_1_63_1","volume-title":"Retrieved","year":"2023","unstructured":"Wikipedia. 2023. Wiki survey. Retrieved Dec 23, 2023 from https:\/\/en.wikipedia.org\/wiki\/Wiki_survey"},{"key":"e_1_3_2_1_64_1","volume-title":"Proceedings of the 2023 ACM Conference on Fairness, Accountability, and Transparency. 593\u2013602","author":"Tze-Inn Wu Stephen","year":"2023","unstructured":"Stephen Tze-Inn Wu, Daniel Demetriou, and Rudwan\u00a0Ali Husain. 2023. Honor Ethics: The Challenge of Globalizing Value Alignment in AI. In Proceedings of the 2023 ACM Conference on Fairness, Accountability, and Transparency. 593\u2013602."},{"key":"e_1_3_2_1_65_1","volume-title":"Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH","author":"Xenos Alexandros","year":"2021","unstructured":"Alexandros Xenos, John Pavlopoulos, and Ion Androutsopoulos. 2021. Context sensitivity estimation in toxicity detection. In Proceedings of the 5th Workshop on Online Abuse and Harms (WOAH 2021). 140\u2013145."},{"key":"e_1_3_2_1_66_1","volume-title":"Retrieved","author":"Yamagata Taku","year":"2021","unstructured":"Taku Yamagata, Ryan McConville, and Raul Santos-Rodriguez. 2021. Reinforcement learning with feedback from multiple humans with diverse skills. arXiv:2111.08596 (2021). Retrieved Dec 23, 2023 from https:\/\/arxiv.org\/abs\/2111.08596"}],"event":{"name":"FAccT '24: The 2024 ACM Conference on Fairness, Accountability, and Transparency","location":"Rio de Janeiro Brazil","acronym":"FAccT '24"},"container-title":["The 2024 ACM Conference on Fairness, Accountability, and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658979","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3630106.3658979","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:50:58Z","timestamp":1750287058000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3658979"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":65,"alternative-id":["10.1145\/3630106.3658979","10.1145\/3630106"],"URL":"https:\/\/doi.org\/10.1145\/3630106.3658979","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}