{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T12:53:24Z","timestamp":1781873604834,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,21]],"date-time":"2026-06-21T00:00:00Z","timestamp":1782000000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"funder":[{"name":"Notre Dame-IBM Technology Ethics Lab","award":["2025 Collaborative Projects"],"award-info":[{"award-number":["2025 Collaborative Projects"]}]},{"DOI":"10.13039\/100004316","name":"International Business Machines Corporation","doi-asserted-by":"publisher","award":["2025 Ph.D. Fellowship"],"award-info":[{"award-number":["2025 Ph.D. Fellowship"]}],"id":[{"id":"10.13039\/100004316","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000879","name":"Alfred P. Sloan Foundation","doi-asserted-by":"publisher","award":["G-2024-22427"],"award-info":[{"award-number":["G-2024-22427"]}],"id":[{"id":"10.13039\/100000879","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2426395"],"award-info":[{"award-number":["CNS-2426395"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Google Research","award":["Scholar Award"],"award-info":[{"award-number":["Scholar Award"]}]},{"DOI":"10.13039\/100007065","name":"Nvidia","doi-asserted-by":"publisher","award":["Academic Hardware Grant"],"award-info":[{"award-number":["Academic Hardware Grant"]}],"id":[{"id":"10.13039\/100007065","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Amazon Science","award":["2023 Award"],"award-info":[{"award-number":["2023 Award"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,22]]},"DOI":"10.1145\/3808045.3808093","type":"proceedings-article","created":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T11:44:52Z","timestamp":1781869492000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["MultEval: Supporting Collaborative Alignment for LLM-as-a-Judge Evaluation Criteria"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6079-4355","authenticated-orcid":false,"given":"Charles","family":"Chiang","sequence":"first","affiliation":[{"name":"University of Notre Dame, Notre Dame, Indiana, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1772-6065","authenticated-orcid":false,"given":"Simret","family":"Gebreegziabher","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, Indiana, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5472-282X","authenticated-orcid":false,"given":"Annalisa","family":"Szymanski","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, Indiana, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8297-6792","authenticated-orcid":false,"given":"Hyo","family":"Jin Do","sequence":"additional","affiliation":[{"name":"IBM Research, Cambridge, Massachusetts, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0686-7911","authenticated-orcid":false,"given":"Zahra","family":"Ashktorab","sequence":"additional","affiliation":[{"name":"IBM Research, Yorktown Heights, New York, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4699-5026","authenticated-orcid":false,"given":"Werner","family":"Geyer","sequence":"additional","affiliation":[{"name":"IBM Research, Cambridge, Massachusetts, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7902-7625","authenticated-orcid":false,"given":"Toby","family":"Jia-Jun Li","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, Indiana, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4609-6293","authenticated-orcid":false,"given":"Diego","family":"Gomez-Zara","sequence":"additional","affiliation":[{"name":"University of Notre Dame, Notre Dame, Indiana, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,21]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. Secure & reliable LLMs | Promptfoo. https:\/\/www.promptfoo.dev\/"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642016"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","unstructured":"Zahra Ashktorab Michael Desmond Qian Pan James\u00a0M. Johnson Martin\u00a0Santillan Cooper Elizabeth\u00a0M. Daly Rahul Nair Tejaswini Pedapati Hyo\u00a0Jin Do and Werner Geyer. 2025. Aligning Human and LLM Judgments: Insights from EvalAssist on Task-Specific Evaluations and AI-assisted Assessment Strategy Preferences. arXiv:2410.00873 (Aug. 2025). 10.48550\/arXiv.2410.00873","DOI":"10.48550\/arXiv.2410.00873"},{"key":"e_1_3_3_2_5_2","unstructured":"Yuntao Bai Saurav Kadavath Sandipan Kundu Amanda Askell Jackson Kernion Andy Jones Anna Chen Anna Goldie Azalia Mirhoseini Cameron McKinnon et\u00a0al. 2022. Constitutional ai: Harmlessness from ai feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.08073 (2022)."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Virginia Braun and Victoria Clarke. 2006. Using thematic analysis in psychology. Qualitative research in psychology 3 2 (2006) 77\u2013101.","DOI":"10.1191\/1478088706qp063oa"},{"key":"e_1_3_3_2_7_2","unstructured":"Robert\u00a0O Briggs Gwendolyn\u00a0L Kolfschoten and Gert-Jan\u00a0de Vreede. 2005. Toward a theoretical model of consensus building. AMCIS 2005 Proceedings (2005) 12."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","unstructured":"Chi-Min Chan Weize Chen Yusheng Su Jianxuan Yu Wei Xue Shanghang Zhang Jie Fu and Zhiyuan Liu. 2023. ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate. arXiv:2308.07201 (Aug. 2023). 10.48550\/arXiv.2308.07201","DOI":"10.48550\/arXiv.2308.07201"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Herbert\u00a0H Clark and Susan\u00a0E Brennan. 1991. Grounding in communication. (1991).","DOI":"10.1037\/10096-006"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640544.3645216"},{"key":"e_1_3_3_2_11_2","unstructured":"Yann Dubois Bal\u00e1zs Galambosi Percy Liang and Tatsunori\u00a0B Hashimoto. 2024. Length-controlled alpacaeval: A simple way to debias automatic evaluators. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.04475 (2024)."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Thomas Erickson and Wendy\u00a0A Kellogg. 2000. Social translucence: an approach to designing systems that support social processes. ACM transactions on computer-human interaction (TOCHI) 7 1 (2000) 59\u201383.","DOI":"10.1145\/344949.345004"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3772318.3791689"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-007-7844-3_4"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Iason Gabriel. 2020. Artificial intelligence values and alignment. Minds and machines 30 3 (2020) 411\u2013437.","DOI":"10.1007\/s11023-020-09539-2"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642002"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3729176.3729199"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","unstructured":"Katy\u00a0Ilonka Gero Chelse Swoopes Ziwei Gu Jonathan\u00a0K. Kummerfeld and Elena\u00a0L. Glassman. 2024. Supporting Sensemaking of Large Language Model Outputs at Scale. arXiv:2401.13726 (Jan. 2024). 10.48550\/arXiv.2401.13726","DOI":"10.48550\/arXiv.2401.13726"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376654"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502004"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/2856767.2856776"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","unstructured":"Jiawei Gu Xuhui Jiang Zhichao Shi Hexiang Tan Xuehao Zhai Chengjin Xu Wei Li Yinghan Shen Shengjie Ma Honghao Liu Saizhuo Wang Kun Zhang Yuanzhuo Wang Wen Gao Lionel Ni and Jian Guo. 2025. A Survey on LLM-as-a-Judge. arXiv:2411.15594 (March 2025). 10.48550\/arXiv.2411.15594","DOI":"10.48550\/arXiv.2411.15594"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Yu-Tang Hsiao Shu-Yang Lin Audrey Tang Darshana Narayanan and Claudina Sarahe. 2018. vTaiwan: An empirical study of open consultation process in Taiwan. Taiwan: Center for Open Science (2018).","DOI":"10.31235\/osf.io\/xyhft"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658979"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Karen\u00a0A Jehn Gregory\u00a0B Northcraft and Margaret\u00a0A Neale. 1999. Why differences make a difference: A field study of diversity conflict and performance in workgroups. Administrative science quarterly 44 4 (1999) 741\u2013763.","DOI":"10.2307\/2667054"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Tae\u00a0Soo Kim Nitesh Goyal Jeongyeon Kim Juho Kim and Sungsoo\u00a0Ray Hong. 2021. Supporting collaborative sequencing of small groups through visual awareness. Proceedings of the ACM on Human-Computer Interaction 5 CSCW1 (2021) 1\u201329.","DOI":"10.1145\/3449250"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Tae\u00a0Soo Kim Heechan Lee Yoonjoo Lee Joseph Seering and Juho Kim. 2025. Evalet: Evaluating Large Language Models by Fragmenting Outputs into Functions. arXiv:2509.11206 (2025). 10.48550\/arXiv.2509.11206","DOI":"10.48550\/arXiv.2509.11206"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642216"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746059.3747680"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/1753326.1753543"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Bidhan\u00a0L Parmar R\u00a0Edward Freeman Jeffrey\u00a0S Harrison Andrew\u00a0C Wicks Lauren Purnell and Simone De\u00a0Colle. 2010. Stakeholder theory: The state of the art. Academy of Management Annals 4 1 (2010) 403\u2013445.","DOI":"10.5465\/19416520.2010.495581"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287567"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676450"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"crossref","unstructured":"Atousa Soltani Kasun Hewage Bahareh Reza and Rehan Sadiq. 2015. Multiple stakeholders in multi-criteria decision-making in the context of municipal solid waste management: a review. Waste Management 35 (2015) 318\u2013328.","DOI":"10.1016\/j.wasman.2014.09.010"},{"key":"e_1_3_3_2_35_2","unstructured":"Stax. [n. d.]. Stax - The complete toolkit for AI evaluation. https:\/\/stax.withgoogle.com\/landing"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517537"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"Annalisa Szymanski Simret\u00a0Araya Gebreegziabher Oghenemaro Anuyah Ronald\u00a0A. Metoyer and Toby Jia-Jun Li. 2024. Comparing Criteria Development Across Domain Experts Lay Users and Models in Large Language Model Evaluation. arXiv:2410.02054 (Oct. 2024). 10.48550\/arXiv.2410.02054","DOI":"10.48550\/arXiv.2410.02054"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","unstructured":"Michael Terry Chinmay Kulkarni Martin Wattenberg Lucas Dixon and Meredith\u00a0Ringel Morris. 2024. Interactive AI Alignment: Specification Process and Evaluation Alignment. arXiv:2311.00710 (2024). 10.48550\/arXiv.2311.00710","DOI":"10.48550\/arXiv.2311.00710"},{"key":"e_1_3_3_2_39_2","unstructured":"Michael Williams and Tami Moser. 2019. The art of coding and thematic exploration in qualitative research. International management review 15 1 (2019) 45\u201355."},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640794.3665534"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"crossref","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng Siyuan Zhuang Zhanghao Wu Yonghao Zhuang Zi Lin Zhuohan Li Dacheng Li Eric Xing et\u00a0al. 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in neural information processing systems 36 (2023) 46595\u201346623.","DOI":"10.52202\/075280-2020"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Tan Zhi-Xuan Micah Carroll Matija Franklin and Hal Ashton. 2025. Beyond Preferences in AI Alignment: T. Zhi-Xuan et al. Philosophical Studies 182 7 (2025) 1813\u20131863.","DOI":"10.1007\/s11098-024-02249-w"}],"event":{"name":"CHIWORK '26: Proceedings of the 5th Annual Symposium on Human-Computer Interaction for Work","location":"Linz Austria","acronym":"CHIWORK '26"},"container-title":["Proceedings of the 5th Annual Symposium on Human-Computer Interaction for Work"],"original-title":[],"deposited":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T11:57:21Z","timestamp":1781870241000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3808045.3808093"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,21]]},"references-count":41,"alternative-id":["10.1145\/3808045.3808093","10.1145\/3808045"],"URL":"https:\/\/doi.org\/10.1145\/3808045.3808093","relation":{},"subject":[],"published":{"date-parts":[[2026,6,21]]},"assertion":[{"value":"2026-06-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}