{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T07:57:22Z","timestamp":1781337442582,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T00:00:00Z","timestamp":1781222400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/100005959","name":"John S. and James L. Knight Foundation","doi-asserted-by":"publisher","award":["GR-2023-67586"],"award-info":[{"award-number":["GR-2023-67586"]}],"id":[{"id":"10.13039\/100005959","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,13]]},"DOI":"10.1145\/3800645.3812938","type":"proceedings-article","created":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T18:41:17Z","timestamp":1781289677000},"page":"1263-1281","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Real-World Validity in Generative AI Benchmarks: Understanding and Designing Domain-Centered Evaluations for Journalism Practitioners"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8914-3717","authenticated-orcid":false,"given":"Charlotte","family":"Li","sequence":"first","affiliation":[{"name":"Northwestern University, Evanston, Illinois, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5110-3737","authenticated-orcid":false,"given":"Nick","family":"Hagar","sequence":"additional","affiliation":[{"name":"Northwestern University, Evanston, Illinois, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6192-6091","authenticated-orcid":false,"given":"Sachita","family":"Nishal","sequence":"additional","affiliation":[{"name":"Northwestern University, Evanston, Illinois, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2585-0972","authenticated-orcid":false,"given":"Jeremy","family":"Gilbert","sequence":"additional","affiliation":[{"name":"Medill School, Northwestern University, Evanston, Illinois, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5005-6123","authenticated-orcid":false,"given":"Nicholas","family":"Diakopoulos","sequence":"additional","affiliation":[{"name":"Northwestern University, Evanston, Illinois, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,12]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","unstructured":"Elisabeth\u00a0Muth Andersen. 2025. The Learnability Hierarchy of News Values: What Makes Some Journalistic Concepts Harder to Classify? Anthology of Computers and the Humanities 3 (2025) 367\u2013381. 10.63744\/svxDtDD45mvw","DOI":"10.63744\/svxDtDD45mvw"},{"key":"e_1_3_3_2_3_2","unstructured":"Anthropic. 2025. Introducing Claude 4. https:\/\/www.anthropic.com\/news\/claude-4. Accessed: 2025-08-22."},{"key":"e_1_3_3_2_4_2","unstructured":"Jacob Austin Augustus Odena Maxwell Nye Maarten Bosma Henryk Michalewski David Dohan Ellen Jiang Carrie Cai Michael Terry Quoc Le and Charles Sutton. 2021. Program Synthesis with Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2108.07732\u00a0[cs.PL] https:\/\/arxiv.org\/abs\/2108.07732"},{"key":"e_1_3_3_2_5_2","volume-title":"News Integrity in AI Assistants: An International PSM Study","year":"2025","unstructured":"BBC and European Broadcasting Union. 2025. News Integrity in AI Assistants: An International PSM Study. Technical Report. BBC and European Broadcasting Union (EBU). https:\/\/www.ebu.ch\/files\/live\/sites\/ebu\/files\/Publications\/MIS\/open\/EBU-MIS-BBC_News_Integrity_in_AI_Assistants_Report_2025.pdf"},{"key":"e_1_3_3_2_6_2","unstructured":"Stella Biderman Hailey Schoelkopf Lintang Sutawika Leo Gao Jonathan Tow Baber Abbasi Alham\u00a0Fikri Aji Pawan\u00a0Sasanka Ammanamanchi Sidney Black Jordan Clive Anthony DiPofi Julen Etxaniz Benjamin Fattori Jessica\u00a0Zosa Forde Charles Foster Jeffrey Hsu Mimansa Jaiswal Wilson\u00a0Y. Lee Haonan Li Charles Lovering Niklas Muennighoff Ellie Pavlick Jason Phang Aviya Skowron Samson Tan Xiangru Tang Kevin\u00a0A. Wang Genta\u00a0Indra Winata Fran\u00e7ois Yvon and Andy Zou. 2024. Lessons from the Trenches on Reproducible Evaluation of Language Models. arxiv:https:\/\/arXiv.org\/abs\/2405.14782\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2405.14782"},{"key":"e_1_3_3_2_7_2","volume-title":"AAAI Conference on Artificial Intelligence","author":"Bisk Yonatan","year":"2019","unstructured":"Yonatan Bisk, Rowan Zellers, Ronan\u00a0Le Bras, Jianfeng Gao, and Yejin Choi. 2019. PIQA: Reasoning about Physical Commonsense in Natural Language. In AAAI Conference on Artificial Intelligence. https:\/\/api.semanticscholar.org\/CorpusID:208290939"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Virginia Braun and Victoria Clarke. 2006. Using thematic analysis in psychology. Qualitative research in psychology 3 2 (2006) 77\u2013101.","DOI":"10.1191\/1478088706qp063oa"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","unstructured":"Suzanne Campbell Mike Greenwood Sarah Prior Tom Shearer Kathryn Walkem Sally Young David Bywaters and Kate Walker. 2020. Purposive Sampling: Complex or Simple? Research Case Examples. Journal of Research in Nursing 25 8 (2020) 652\u2013661. 10.1177\/1744987120927206","DOI":"10.1177\/1744987120927206"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","unstructured":"Colleen Cheek Elizabeth Austin Lieke Richardson Luke Testa Natalia Ransolin Emilie Francis-Auton Mariam Safi Margaret Murphy Aaron De\u00a0Los Santos Matthew Vukasovic and Robyn Clay-Williams. 2024. Non-Participant Observations in Experience-Based Codesign: An example using a Case Study Research approach to explore Emergency Department Care. International Journal of Qualitative Methods 23 (2024) 16094069241289278. arXiv:10.1177\/1609406924128927810.1177\/16094069241289278","DOI":"10.1177\/16094069241289278"},{"key":"e_1_3_3_2_11_2","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique\u00a0Ponde de Oliveira\u00a0Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe\u00a0Petroski Such Dave Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William\u00a0Hebgen Guss Alex Nichol Alex Paino Nikolas Tezak Jie Tang Igor Babuschkin Suchir Balaji Shantanu Jain William Saunders Christopher Hesse Andrew\u00a0N. Carr Jan Leike Josh Achiam Vedant Misra Evan Morikawa Alec Radford Matthew Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. arxiv:https:\/\/arXiv.org\/abs\/2107.03374\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2107.03374"},{"key":"e_1_3_3_2_12_2","unstructured":"Wei-Lin Chiang Lianmin Zheng Ying Sheng Anastasios\u00a0Nikolas Angelopoulos Tianle Li Dacheng Li Hao Zhang Banghua Zhu Michael Jordan Joseph\u00a0E. Gonzalez and Ion Stoica. 2024. Chatbot Arena: An Open Platform for Evaluating LLMs by Human Preference. arxiv:https:\/\/arXiv.org\/abs\/2403.04132\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2403.04132"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-65442-3_2"},{"key":"e_1_3_3_2_14_2","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano Christopher Hesse and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. arxiv:https:\/\/arXiv.org\/abs\/2110.14168\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2110.14168"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","unstructured":"Hannes Cools and Nicholas Diakopoulos. 2024. Uses of Generative AI in the Newsroom: Mapping Journalists\u2019 Perceptions of Perils and Possibilities. Journalism Practice ahead-of-print ahead-of-print (2024) 1\u201319. 10.1080\/17512786.2024.2394558","DOI":"10.1080\/17512786.2024.2394558"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","unstructured":"Ernest Davis. 2023. Benchmarks for Automated Commonsense Reasoning: A Survey. ACM Comput. Surv. 56 4 Article 81 (Oct. 2023) 41\u00a0pages. 10.1145\/3615355","DOI":"10.1145\/3615355"},{"key":"e_1_3_3_2_17_2","unstructured":"DeepSeek-AI Aixin Liu Aoxue Mei Bangcai Lin Bing Xue Bingxuan Wang Bingzheng Xu Bochao Wu Bowei Zhang Chaofan Lin Chen Dong Chengda Lu Chenggang Zhao Chengqi Deng Chenhao Xu Chong Ruan Damai Dai Daya Guo Dejian Yang Deli Chen Erhang Li Fangqi Zhou Fangyun Lin Fucong Dai Guangbo Hao Guanting Chen Guowei Li H. Zhang Hanwei Xu Hao Li Haofen Liang Haoran Wei Haowei Zhang Haowen Luo Haozhe Ji Honghui Ding Hongxuan Tang Huanqi Cao Huazuo Gao Hui Qu Hui Zeng Jialiang Huang Jiashi Li Jiaxin Xu Jiewen Hu Jingchang Chen Jingting Xiang Jingyang Yuan Jingyuan Cheng Jinhua Zhu Jun Ran Junguang Jiang Junjie Qiu Junlong Li Junxiao Song Kai Dong Kaige Gao Kang Guan Kexin Huang Kexing Zhou Kezhao Huang Kuai Yu Lean Wang Lecong Zhang Lei Wang Liang Zhao Liangsheng Yin Lihua Guo Lingxiao Luo Linwang Ma Litong Wang Liyue Zhang M.\u00a0S. Di M.\u00a0Y Xu Mingchuan Zhang Minghua Zhang Minghui Tang Mingxu Zhou Panpan Huang Peixin Cong Peiyi Wang Qiancheng Wang Qihao Zhu Qingyang Li Qinyu Chen Qiushi Du Ruiling Xu Ruiqi Ge Ruisong Zhang Ruizhe Pan Runji Wang Runqiu Yin Runxin Xu Ruomeng Shen Ruoyu Zhang S.\u00a0H. Liu Shanghao Lu Shangyan Zhou Shanhuang Chen Shaofei Cai Shaoyuan Chen Shengding Hu Shengyu Liu Shiqiang Hu Shirong Ma Shiyu Wang Shuiping Yu Shunfeng Zhou Shuting Pan Songyang Zhou Tao Ni Tao Yun Tian Pei Tian Ye Tianyuan Yue Wangding Zeng Wen Liu Wenfeng Liang Wenjie Pang Wenjing Luo Wenjun Gao Wentao Zhang Xi Gao Xiangwen Wang Xiao Bi Xiaodong Liu Xiaohan Wang Xiaokang Chen Xiaokang Zhang Xiaotao Nie Xin Cheng Xin Liu Xin Xie Xingchao Liu Xingkai Yu Xingyou Li Xinyu Yang Xinyuan Li Xu Chen Xuecheng Su Xuehai Pan Xuheng Lin Xuwei Fu Y.\u00a0Q. Wang Yang Zhang Yanhong Xu Yanru Ma Yao Li Yao Li Yao Zhao Yaofeng Sun Yaohui Wang Yi Qian Yi Yu Yichao Zhang Yifan Ding Yifan Shi Yiliang Xiong Ying He Ying Zhou Yinmin Zhong Yishi Piao Yisong Wang Yixiao Chen Yixuan Tan Yixuan Wei Yiyang Ma Yiyuan Liu Yonglun Yang Yongqiang Guo Yongtong Wu Yu Wu Yuan Cheng Yuan Ou Yuanfan Xu Yuduan Wang Yue Gong Yuhan Wu Yuheng Zou Yukun Li Yunfan Xiong Yuxiang Luo Yuxiang You Yuxuan Liu Yuyang Zhou Z.\u00a0F. Wu Z.\u00a0Z. Ren Zehua Zhao Zehui Ren Zhangli Sha Zhe Fu Zhean Xu Zhenda Xie Zhengyan Zhang Zhewen Hao Zhibin Gou Zhicheng Ma Zhigang Yan Zhihong Shao Zhixian Huang Zhiyu Wu Zhuoshu Li Zhuping Zhang Zian Xu Zihao Wang Zihui Gu Zijia Zhu Zilin Li Zipeng Zhang Ziwei Xie Ziyi Gao Zizheng Pan Zongqing Yao Bei Feng Hui Li J.\u00a0L. Cai Jiaqi Ni Lei Xu Meng Li Ning Tian R.\u00a0J. Chen R.\u00a0L. Jin S.\u00a0S. Li Shuang Zhou Tianyu Sun X.\u00a0Q. Li Xiangyue Jin Xiaojin Shen Xiaosha Chen Xinnan Song Xinyi Zhou Y.\u00a0X. Zhu Yanping Huang Yaohui Li Yi Zheng Yuchen Zhu Yunxian Ma Zhen Huang Zhipeng Xu Zhongyu Zhang Dongjie Ji Jian Liang Jianzhong Guo Jin Chen Leyi Xia Miaojun Wang Mingming Li Peng Zhang Ruyi Chen Shangmian Sun Shaoqing Wu Shengfeng Ye T. Wang W.\u00a0L. Xiao Wei An Xianzu Wang Xiaowen Sun Xiaoxiang Wang Ying Tang Yukun Zha Zekai Zhang Zhe Ju Zhen Zhang and Zihua Qu. 2025. DeepSeek-V3.2: Pushing the Frontier of Open Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2512.02556\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2512.02556"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","unstructured":"M Deuze. 2005. What is journalism?: Professional identity and ideology of journalists reconsidered. Journalism 6 4 (11 2005) 442 \u2013 464. 10.1177\/1464884905056815","DOI":"10.1177\/1464884905056815"},{"key":"e_1_3_3_2_19_2","unstructured":"Ruchira Dhar Danae\u00a0Sanchez Villegas Antonia Karamolegkou Alice Schiavone Yifei Yuan Xinyi Chen Jiaang Li Stella Frank Laura\u00a0De Grazia Monorama Swain Stephanie Brandl Daniel Hershcovich Anders S\u00f8gaard and Desmond Elliott. 2025. EvalCards: A Framework for Standardized Evaluation Reporting. arxiv:https:\/\/arXiv.org\/abs\/2511.21695\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2511.21695"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.31540.05765"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","unstructured":"Nicholas Diakopoulos Christoph Trattner Dietmar Jannach Irene\u00a0Costera Meijer and Enrico Motta. 2023. Leveraging Professional Ethics for Responsible AI. Commun. ACM (2023). 10.1145\/3625252","DOI":"10.1145\/3625252"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","unstructured":"Tom\u00e1s Dodds Valeria Res\u00e9ndez Gerret von Nordheim Theo Araujo and Judith Moeller. 2024. Collaborative Coding Cultures: How Journalists Use GitHub as a Trading Zone. Digital Journalism 12 7 (2024) 1030\u20131051. arXiv:10.1080\/21670811.2024.234246810.1080\/21670811.2024.2342468","DOI":"10.1080\/21670811.2024.2342468"},{"key":"e_1_3_3_2_23_2","unstructured":"Dheeru Dua Yizhong Wang Pradeep Dasigi Gabriel Stanovsky Sameer Singh and Matt Gardner. 2019. DROP: A Reading Comprehension Benchmark Requiring Discrete Reasoning Over Paragraphs. arxiv:https:\/\/arXiv.org\/abs\/1903.00161\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1903.00161"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","unstructured":"Stephanie D\u2019haeseleer Kristin\u00a0Van Damme Hannes Cools Sarah\u00a0Van Leuven and Tom Evens. 2025. AI Divides in Newsrooms? How Journalists in the Low Countries Use and Perceive Generative AI. Journalism Practice 0 0 (2025) 1\u201328. arXiv:10.1080\/17512786.2025.253812010.1080\/17512786.2025.2538120","DOI":"10.1080\/17512786.2025.2538120"},{"key":"e_1_3_3_2_25_2","unstructured":"Kawin Ethayarajh and Dan Jurafsky. 2021. Utility is in the Eye of the User: A Critique of NLP Leaderboards. arxiv:https:\/\/arXiv.org\/abs\/2009.13888\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2009.13888"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","unstructured":"Batya Friedman David\u00a0G. Hendry and Alan Borning. 2017. A Survey of Value Sensitive Design Methods. Foundations and Trends\u00ae in Human\u2013Computer Interaction 11 2 (2017) 63\u2013125. 10.1561\/1100000015","DOI":"10.1561\/1100000015"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Neel Guha Julian Nyarko Daniel\u00a0E. Ho Christopher R\u00e9 Adam Chilton Aditya Narayana Alex Chohlas-Wood Austin Peters Brandon Waldon Daniel\u00a0N. Rockmore Diego Zambrano Dmitry Talisman Enam Hoque Faiz Surani Frank Fagan Galit Sarfaty Gregory\u00a0M. Dickinson Haggai Porat Jason Hegland Jessica Wu Joe Nudell Joel Niklaus John Nay Jonathan\u00a0H. Choi Kevin Tobia Margaret Hagan Megan Ma Michael Livermore Nikon Rasumov-Rahe Nils Holzenberger Noam Kolt Peter Henderson Sean Rehaag Sharad Goel Shang Gao Spencer Williams Sunny Gandhi Tom Zur Varun Iyer and Zehua Li. 2023. LegalBench: A Collaboratively Built Benchmark for Measuring Legal Reasoning in Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2308.11462\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2308.11462","DOI":"10.2139\/ssrn.4583531"},{"key":"e_1_3_3_2_28_2","unstructured":"Hamna Gayatri Bhat Sourabrata Mukherjee Faisal Lalani Evan Hadfield Divya Siddarth Kalika Bali and Sunayana Sitaram. 2025. Building Benchmarks from the Ground Up: Community-Centered Evaluation of LLMs in Healthcare Chatbot Settings. arxiv:https:\/\/arXiv.org\/abs\/2509.24506\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2509.24506"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","unstructured":"Thomas Hanitzsch. 2007. Deconstructing Journalism Culture: Toward a Universal Theory. Communication theory 17 4 (11 2007) 367 \u2013 385. 10.1111\/j.1468-2885.2007.00303.x","DOI":"10.1111\/j.1468-2885.2007.00303.x"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"Tony Harcup and Deirdre O\u2019Neill. 2016. What is news? News values revisited (again). Journalism Studies 23 1 (03 2016) 1 \u2013 19. 10.1080\/1461670x.2016.1150193","DOI":"10.1080\/1461670x.2016.1150193"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3708359.3712152"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-08-051574-8.50082-0"},{"key":"e_1_3_3_2_33_2","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn Song, and Jacob Steinhardt. 2021. Measuring Mathematical Problem Solving With the MATH Dataset. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2). https:\/\/openreview.net\/forum?id=7Bywt2mQsCe"},{"key":"e_1_3_3_2_34_2","unstructured":"Dongfu Jiang Max Ku Tianle Li Yuansheng Ni Shizhuo Sun Rongqi Fan and Wenhu Chen. 2024. GenAI Arena: An Open Evaluation Platform for Generative Models. arxiv:https:\/\/arXiv.org\/abs\/2406.04485\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2406.04485"},{"key":"e_1_3_3_2_35_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Jimenez Carlos\u00a0E","year":"2024","unstructured":"Carlos\u00a0E Jimenez, John Yang, Alexander Wettig, Shunyu Yao, Kexin Pei, Ofir Press, and Karthik\u00a0R Narasimhan. 2024. SWE-bench: Can Language Models Resolve Real-world Github Issues?. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=VTF8yNQM66"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","unstructured":"Vijay Khatri and Carol\u00a0V. Brown. 2010. Designing data governance. Commun. ACM 53 1 (Jan. 2010) 148\u2013152. 10.1145\/1629175.1629210","DOI":"10.1145\/1629175.1629210"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642278"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1082"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"Miao Li Ming-Bin Chen Bo Tang ShengbinHou ShengbinHou Pengyu Wang Haiying Deng Zhiyu Li Feiyu Xiong Keming Mao Cheng Peng and Yi Luo. 2024. NewsBench: A Systematic Evaluation Framework for Assessing Editorial Capabilities of Large Language Models in Chinese Journalism. Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) (2024) 9993\u201310014. 10.18653\/v1\/2024.acl-long.538","DOI":"10.18653\/v1\/2024.acl-long.538"},{"key":"e_1_3_3_2_40_2","unstructured":"Percy Liang Rishi Bommasani Tony Lee Dimitris Tsipras Dilara Soylu Michihiro Yasunaga Yian Zhang Deepak Narayanan Yuhuai Wu Ananya Kumar Benjamin Newman Binhang Yuan Bobby Yan Ce Zhang Christian Cosgrove Christopher\u00a0D. Manning Christopher R\u00e9 Diana Acosta-Navas Drew\u00a0A. Hudson Eric Zelikman Esin Durmus Faisal Ladhak Frieda Rong Hongyu Ren Huaxiu Yao Jue Wang Keshav Santhanam Laurel Orr Lucia Zheng Mert Yuksekgonul Mirac Suzgun Nathan Kim Neel Guha Niladri Chatterji Omar Khattab Peter Henderson Qian Huang Ryan Chi Sang\u00a0Michael Xie Shibani Santurkar Surya Ganguli Tatsunori Hashimoto Thomas Icard Tianyi Zhang Vishrav Chaudhary William Wang Xuechen Li Yifan Mai Yuhui Zhang and Yuta Koreeda. 2023. Holistic Evaluation of Language Models. arxiv:https:\/\/arXiv.org\/abs\/2211.09110\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2211.09110"},{"key":"e_1_3_3_2_41_2","unstructured":"Q.\u00a0Vera Liao and Ziang Xiao. 2025. Rethinking Model Evaluation as Narrowing the Socio-Technical Gap. arxiv:https:\/\/arXiv.org\/abs\/2306.03100\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2306.03100"},{"key":"e_1_3_3_2_42_2","unstructured":"Yu\u00a0Lu Liu Su\u00a0Lin Blodgett Jackie Chi\u00a0Kit Cheung Q.\u00a0Vera Liao Alexandra Olteanu and Ziang Xiao. 2024. ECBD: Evidence-Centered Benchmark Design for NLP. arxiv:https:\/\/arXiv.org\/abs\/2406.08723\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2406.08723"},{"key":"e_1_3_3_2_43_2","unstructured":"Alexandre Matton Tom Sherborne Dennis Aumiller Elena Tommasone Milad Alizadeh Jingyi He Raymond Ma Maxime Voisin Ellen Gilsenan-McMahon and Matthias Gall\u00e9. 2024. On Leakage of Code Generation Evaluation Datasets. arxiv:https:\/\/arXiv.org\/abs\/2407.07565\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2407.07565"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","unstructured":"Sachita Nishal and Nicholas Diakopoulos. 2024. Envisioning the Applications and Implications of Generative AI for News Media. arXiv (2024). arXiv:https:\/\/arXiv.org\/abs\/2402.1883510.48550\/arxiv.2402.18835","DOI":"10.48550\/arxiv.2402.18835"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3715336.3735717"},{"key":"e_1_3_3_2_46_2","volume-title":"The Florida AI Research Society","author":"O\u2019Hara Keith\u00a0J.","year":"2015","unstructured":"Keith\u00a0J. O\u2019Hara, Douglas\u00a0S. Blank, and James\u00a0B. Marshall. 2015. Computational Notebooks for AI Education. In The Florida AI Research Society. https:\/\/api.semanticscholar.org\/CorpusID:1772160"},{"key":"e_1_3_3_2_47_2","unstructured":"OpenAI. 2025. Introducing GPT-5. https:\/\/openai.com\/index\/introducing-gpt-5\/. Published August 7 2025; Accessed: 2025-08-22."},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3659012"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","unstructured":"Sora Park Caroline Fisher Edson TandocJr Uwe Dulleck Shengnan\u00a0Pinker Yao and William Lukamto. 0. The relationship between news trust mistrust and audience disengagement. Journalism 0 0 (0) 14648849241299775. arXiv:10.1177\/1464884924129977510.1177\/14648849241299775","DOI":"10.1177\/14648849241299775"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Tejal Patwardhan Rachel Dias Elizabeth Proehl Grace Kim Michele Wang Olivia Watkins Sim\u00f3n\u00a0Posada Fishman Marwan Aljubeh Phoebe Thacker Laurance Fauconnet Natalie\u00a0S. Kim Patrick Chao Samuel Miserendino Gildas Chabot David Li Michael Sharman Alexandra Barr Amelia Glaese and Jerry Tworek. 2025. GDPval: Evaluating AI Model Performance on Real-World Economically Valuable Tasks. arxiv:https:\/\/arXiv.org\/abs\/2510.04374\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2510.04374","DOI":"10.70777\/si.v2i4.17197"},{"key":"e_1_3_3_2_51_2","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)","author":"Raji Inioluwa\u00a0Deborah","year":"2021","unstructured":"Inioluwa\u00a0Deborah Raji, Emily Denton, Emily\u00a0M. Bender, Alex Hanna, and Amandalynne Paullada. 2021. AI and the Everything in the Whole Wide World Benchmark. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2). https:\/\/openreview.net\/forum?id=j6NxpQbREA1"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2124"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","unstructured":"Sandeep Reddy Wendy Rogers Ville-Petteri Makinen Enrico Coiera Pieta Brown Markus Wenzel Eva Weicken Saba Ansari Piyush Mathur Aaron Casey and Blair Kelly. 2021. Evaluation Framework to Guide Implementation of AI Systems into Healthcare Settings. BMJ Health & Care Informatics 28 1 (2021) e100444. 10.1136\/bmjhci-2021-100444","DOI":"10.1136\/bmjhci-2021-100444"},{"key":"e_1_3_3_2_54_2","unstructured":"David Rein Betty\u00a0Li Hou Asa\u00a0Cooper Stickland Jackson Petty Richard\u00a0Yuanzhe Pang Julien Dirani Julian Michael and Samuel\u00a0R. Bowman. 2023. GPQA: A Graduate-Level Google-Proof Q&A Benchmark. arxiv:https:\/\/arXiv.org\/abs\/2311.12022\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2311.12022"},{"key":"e_1_3_3_2_55_2","unstructured":"Anka Reuel Amelia Hardy Chandler Smith Max Lamparth Malcolm Hardy and Mykel\u00a0J. Kochenderfer. 2024. BetterBench: Assessing AI Benchmarks Uncovering Issues and Establishing Best Practices. arxiv:https:\/\/arXiv.org\/abs\/2411.12990\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2411.12990"},{"key":"e_1_3_3_2_56_2","unstructured":"Jenna Russell Marzena Karpinska Destiny Akinode Katherine Thai Bradley Emi Max Spero and Mohit Iyyer. 2025. AI use in American newspapers is widespread uneven and rarely disclosed. arxiv:https:\/\/arXiv.org\/abs\/2510.18774\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2510.18774"},{"key":"e_1_3_3_2_57_2","unstructured":"Paul R\u00f6ttger Musashi Hinck Valentin Hofmann Kobi Hackenburg Valentina Pyatkin Faeze Brahman and Dirk Hovy. 2025. IssueBench: Millions of Realistic Prompts for Measuring Issue Bias in LLM Writing Assistance. arxiv:https:\/\/arXiv.org\/abs\/2502.08395\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2502.08395"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","unstructured":"Malak Sadek Rafael\u00a0A. Calvo and C\u00e9line Mougenot. 2023. Designing value-sensitive AI: a critical review and recommendations for socio-technical design processes. AI and Ethics 4 4 (2023) 949\u2013967. 10.1007\/s43681-023-00373-7","DOI":"10.1007\/s43681-023-00373-7"},{"key":"e_1_3_3_2_59_2","unstructured":"Keisuke Sakaguchi Ronan\u00a0Le Bras Chandra Bhagavatula and Yejin Choi. 2019. WinoGrande: An Adversarial Winograd Schema Challenge at Scale. arxiv:https:\/\/arXiv.org\/abs\/1907.10641\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1907.10641"},{"key":"e_1_3_3_2_60_2","unstructured":"Michael Saxon Ari Holtzman Peter West William\u00a0Yang Wang and Naomi Saphra. 2024. Benchmarks as Microscopes: A Call for Model Metrology. arxiv:https:\/\/arXiv.org\/abs\/2407.16711\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2407.16711"},{"key":"e_1_3_3_2_61_2","unstructured":"Reva Schwartz Rumman Chowdhury Akash Kundu Heather Frase Marzieh Fadaee Tom David Gabriella Waters Afaf Taik Morgan Briggs Patrick Hall Shomik Jain Kyra Yee Spencer Thomas Sundeep Bhandari Paul Duncan Andrew Thompson Maya Carlyle Qinghua Lu Matthew Holmes and Theodora Skeadas. 2025. Reality Check: A New Evaluation Ecosystem Is Necessary to Understand AI\u2019s Real World Effects. arxiv:https:\/\/arXiv.org\/abs\/2505.18893\u00a0[cs.CY] https:\/\/arxiv.org\/abs\/2505.18893"},{"key":"e_1_3_3_2_62_2","unstructured":"Aryan Shrivastava and Paula\u00a0Akemi Aoyagui. 2025. DICE: A Framework for Dimensional and Contextual Evaluation of Language Models. arxiv:https:\/\/arXiv.org\/abs\/2504.10359\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2504.10359"},{"key":"e_1_3_3_2_63_2","unstructured":"Aaditya Singh Adam Fry Adam Perelman Adam Tart Adi Ganesh Ahmed El-Kishky Aidan McLaughlin Aiden Low AJ Ostrow Akhila Ananthram Akshay Nathan Alan Luo Alec Helyar Aleksander Madry Aleksandr Efremov Aleksandra Spyra Alex Baker-Whitcomb Alex Beutel Alex Karpenko Alex Makelov Alex Neitz Alex Wei Alexandra Barr Alexandre Kirchmeyer Alexey Ivanov Alexi Christakis Alistair Gillespie Allison Tam Ally Bennett Alvin Wan Alyssa Huang Amy\u00a0McDonald Sandjideh Amy Yang Ananya Kumar Andre Saraiva Andrea Vallone Andrei Gheorghe Andres\u00a0Garcia Garcia Andrew Braunstein Andrew Liu Andrew Schmidt Andrey Mereskin Andrey Mishchenko Andy Applebaum Andy Rogerson Ann Rajan Annie Wei Anoop Kotha Anubha Srivastava Anushree Agrawal Arun Vijayvergiya Ashley Tyra Ashvin Nair Avi Nayak Ben Eggers Bessie Ji Beth Hoover Bill Chen Blair Chen Boaz Barak Borys Minaiev Botao Hao Bowen Baker Brad Lightcap Brandon McKinzie Brandon Wang Brendan Quinn Brian Fioca Brian Hsu Brian Yang Brian Yu Brian Zhang Brittany Brenner Callie\u00a0Riggins Zetino Cameron Raymond Camillo Lugaresi Carolina Paz Cary Hudson Cedric Whitney Chak Li Charles Chen Charlotte Cole Chelsea Voss Chen Ding Chen Shen Chengdu Huang Chris Colby Chris Hallacy Chris Koch Chris Lu Christina Kaplan Christina Kim CJ Minott-Henriques Cliff Frey Cody Yu Coley Czarnecki Colin Reid Colin Wei Cory Decareaux Cristina Scheau Cyril Zhang Cyrus Forbes Da Tang Dakota Goldberg Dan Roberts Dana Palmie Daniel Kappler Daniel Levine Daniel Wright Dave Leo David Lin David Robinson Declan Grabb Derek Chen Derek Lim Derek Salama Dibya Bhattacharjee Dimitris Tsipras Dinghua Li Dingli Yu DJ Strouse Drew Williams Dylan Hunn Ed Bayes Edwin Arbus Ekin Akyurek Elaine\u00a0Ya Le Elana Widmann Eli Yani Elizabeth Proehl Enis Sert Enoch Cheung Eri Schwartz Eric Han Eric Jiang Eric Mitchell Eric Sigler Eric Wallace Erik Ritter Erin Kavanaugh Evan Mays Evgenii Nikishin Fangyuan Li Felipe\u00a0Petroski Such Filipe de Avila Belbute\u00a0Peres Filippo Raso Florent Bekerman Foivos Tsimpourlas Fotis Chantzis Francis Song Francis Zhang Gaby Raila Garrett McGrath Gary Briggs Gary Yang Giambattista Parascandolo Gildas Chabot Grace Kim Grace Zhao Gregory Valiant Guillaume Leclerc Hadi Salman Hanson Wang Hao Sheng Haoming Jiang Haoyu Wang Haozhun Jin Harshit Sikchi Heather Schmidt Henry Aspegren Honglin Chen Huida Qiu Hunter Lightman Ian Covert Ian Kivlichan Ian Silber Ian Sohl Ibrahim Hammoud Ignasi Clavera Ikai Lan Ilge Akkaya Ilya Kostrikov Irina Kofman Isak Etinger Ishaan Singal Jackie Hehir Jacob Huh Jacqueline Pan Jake Wilczynski Jakub Pachocki James Lee James Quinn Jamie Kiros Janvi Kalra Jasmyn Samaroo Jason Wang Jason Wolfe Jay Chen Jay Wang Jean Harb Jeffrey Han Jeffrey Wang Jennifer Zhao Jeremy Chen Jerene Yang Jerry Tworek Jesse Chand Jessica Landon Jessica Liang Ji Lin Jiancheng Liu Jianfeng Wang Jie Tang Jihan Yin Joanne Jang Joel Morris Joey Flynn Johannes Ferstad Johannes Heidecke John Fishbein John Hallman Jonah Grant Jonathan Chien Jonathan Gordon Jongsoo Park Jordan Liss Jos Kraaijeveld Joseph Guay Joseph Mo Josh Lawson Josh McGrath Joshua Vendrow Joy Jiao Julian Lee Julie Steele Julie Wang Junhua Mao Kai Chen Kai Hayashi Kai Xiao Kamyar Salahi Kan Wu Karan Sekhri Karan Sharma Karan Singhal Karen Li Kenny Nguyen Keren Gu-Lemberg Kevin King Kevin Liu Kevin Stone Kevin Yu Kristen Ying Kristian Georgiev Kristie Lim Kushal Tirumala Kyle Miller Lama Ahmad Larry Lv Laura Clare Laurance Fauconnet Lauren Itow Lauren Yang Laurentia Romaniuk Leah Anise Lee Byron Leher Pathak Leon Maksin Leyan Lo Leyton Ho Li Jing Liang Wu Liang Xiong Lien Mamitsuka Lin Yang Lindsay McCallum Lindsey Held Liz Bourgeois Logan Engstrom Lorenz Kuhn Louis Feuvrier Lu Zhang Lucas Switzer Lukas Kondraciuk Lukasz Kaiser Manas Joglekar Mandeep Singh Mandip Shah Manuka Stratta Marcus Williams Mark Chen Mark Sun Marselus Cayton Martin Li Marvin Zhang Marwan Aljubeh Matt Nichols Matthew Haines Max Schwarzer Mayank Gupta Meghan Shah Melody Huang Meng Dong Mengqing Wang Mia Glaese Micah Carroll Michael Lampe Michael Malek Michael Sharman Michael Zhang Michele Wang Michelle Pokrass Mihai Florian Mikhail Pavlov Miles Wang Ming Chen Mingxuan Wang Minnia Feng Mo Bavarian Molly Lin Moose Abdool Mostafa Rohaninejad Nacho Soto Natalie Staudacher Natan LaFontaine Nathan Marwell Nelson Liu Nick Preston Nick Turley Nicklas Ansman Nicole Blades Nikil Pancha Nikita Mikhaylin Niko Felix Nikunj Handa Nishant Rai Nitish Keskar Noam Brown Ofir Nachum Oleg Boiko Oleg Murk Olivia Watkins Oona Gleeson Pamela Mishkin Patryk Lesiewicz Paul Baltescu Pavel Belov Peter Zhokhov Philip Pronin Phillip Guo Phoebe Thacker Qi Liu Qiming Yuan Qinghua Liu Rachel Dias Rachel Puckett Rahul Arora Ravi\u00a0Teja Mullapudi Raz Gaon Reah Miyara Rennie Song Rishabh Aggarwal RJ Marsan Robel Yemiru Robert Xiong Rohan Kshirsagar Rohan Nuttall Roman Tsiupa Ronen Eldan Rose Wang Roshan James Roy Ziv Rui Shu Ruslan Nigmatullin Saachi Jain Saam Talaie Sam Altman Sam Arnesen Sam Toizer Sam Toyer Samuel Miserendino Sandhini Agarwal Sarah Yoo Savannah Heon Scott Ethersmith Sean Grove Sean Taylor Sebastien Bubeck Sever Banesiu Shaokyi Amdo Shengjia Zhao Sherwin Wu Shibani Santurkar Shiyu Zhao Shraman\u00a0Ray Chaudhuri Shreyas Krishnaswamy Shuaiqi Xia Shuyang Cheng Shyamal Anadkat Sim\u00f3n\u00a0Posada Fishman Simon Tobin Siyuan Fu Somay Jain Song Mei Sonya Egoian Spencer Kim Spug Golden SQ Mah Steph Lin Stephen Imm Steve Sharpe Steve Yadlowsky Sulman Choudhry Sungwon Eum Suvansh Sanjeev Tabarak Khan Tal Stramer Tao Wang Tao Xin Tarun Gogineni Taya Christianson Ted Sanders Tejal Patwardhan Thomas Degry Thomas Shadwell Tianfu Fu Tianshi Gao Timur Garipov Tina Sriskandarajah Toki Sherbakov Tomer Kaftan Tomo Hiratsuka Tongzhou Wang Tony Song Tony Zhao Troy Peterson Val Kharitonov Victoria Chernova Vineet Kosaraju Vishal Kuo Vitchyr Pong Vivek Verma Vlad Petrov Wanning Jiang Weixing Zhang Wenda Zhou Wenlei Xie Wenting Zhan Wes McCabe Will DePue Will Ellsworth Wulfie Bain Wyatt Thompson Xiangning Chen Xiangyu Qi Xin Xiang Xinwei Shi Yann Dubois Yaodong Yu Yara Khakbaz Yifan Wu Yilei Qian Yin\u00a0Tat Lee Yinbo Chen Yizhen Zhang Yizhong Xiong Yonglong Tian Young Cha Yu Bai Yu Yang Yuan Yuan Yuanzhi Li Yufeng Zhang Yuguang Yang Yujia Jin Yun Jiang Yunyun Wang Yushi Wang Yutian Liu Zach Stubenvoll Zehao Dou Zheng Wu and Zhigang Wang. 2025. OpenAI GPT-5 System Card. arxiv:https:\/\/arXiv.org\/abs\/2601.03267\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2601.03267"},{"key":"e_1_3_3_2_64_2","unstructured":"Aarohi Srivastava Abhinav Rastogi Abhishek Rao Abu Awal\u00a0Md Shoeb Abubakar Abid Adam Fisch Adam\u00a0R. Brown Adam Santoro Aditya Gupta Adri\u00e0 Garriga-Alonso Agnieszka Kluska Aitor Lewkowycz Akshat Agarwal Alethea Power Alex Ray Alex Warstadt Alexander\u00a0W. Kocurek Ali Safaya Ali Tazarv Alice Xiang Alicia Parrish Allen Nie Aman Hussain Amanda Askell Amanda Dsouza Ambrose Slone Ameet Rahane Anantharaman\u00a0S. Iyer Anders Andreassen Andrea Madotto Andrea Santilli Andreas Stuhlm\u00fcller Andrew Dai Andrew La Andrew Lampinen Andy Zou Angela Jiang Angelica Chen Anh Vuong Animesh Gupta Anna Gottardi Antonio Norelli Anu Venkatesh Arash Gholamidavoodi Arfa Tabassum Arul Menezes Arun Kirubarajan Asher Mullokandov Ashish Sabharwal Austin Herrick Avia Efrat Aykut Erdem Ayla Karaka\u015f B.\u00a0Ryan Roberts Bao\u00a0Sheng Loe Barret Zoph Bart\u0142omiej Bojanowski Batuhan \u00d6zyurt Behnam Hedayatnia Behnam Neyshabur Benjamin Inden Benno Stein Berk Ekmekci Bill\u00a0Yuchen Lin Blake Howald Bryan Orinion Cameron Diao Cameron Dour Catherine Stinson Cedrick Argueta C\u00e9sar\u00a0Ferri Ram\u00edrez Chandan Singh Charles Rathkopf Chenlin Meng Chitta Baral Chiyu Wu Chris Callison-Burch Chris Waites Christian Voigt Christopher\u00a0D. Manning Christopher Potts Cindy Ramirez Clara\u00a0E. Rivera Clemencia Siro Colin Raffel Courtney Ashcraft Cristina Garbacea Damien Sileo Dan Garrette Dan Hendrycks Dan Kilman Dan Roth Daniel Freeman Daniel Khashabi Daniel Levy Daniel\u00a0Mosegu\u00ed Gonz\u00e1lez Danielle Perszyk Danny Hernandez Danqi Chen Daphne Ippolito Dar Gilboa David Dohan David Drakard David Jurgens Debajyoti Datta Deep Ganguli Denis Emelin Denis Kleyko Deniz Yuret Derek Chen Derek Tam Dieuwke Hupkes Diganta Misra Dilyar Buzan Dimitri\u00a0Coelho Mollo Diyi Yang Dong-Ho Lee Dylan Schrader Ekaterina Shutova Ekin\u00a0Dogus Cubuk Elad Segal Eleanor Hagerman Elizabeth Barnes Elizabeth Donoway Ellie Pavlick Emanuele Rodola Emma Lam Eric Chu Eric Tang Erkut Erdem Ernie Chang Ethan\u00a0A. Chi Ethan Dyer Ethan Jerzak Ethan Kim Eunice\u00a0Engefu Manyasi Evgenii Zheltonozhskii Fanyue Xia Fatemeh Siar Fernando Mart\u00ednez-Plumed Francesca Happ\u00e9 Francois Chollet Frieda Rong Gaurav Mishra Genta\u00a0Indra Winata Gerard de Melo Germ\u00e1n Kruszewski Giambattista Parascandolo Giorgio Mariani Gloria Wang Gonzalo Jaimovitch-L\u00f3pez Gregor Betz Guy Gur-Ari Hana Galijasevic Hannah Kim Hannah Rashkin Hannaneh Hajishirzi Harsh Mehta Hayden Bogar Henry Shevlin Hinrich Sch\u00fctze Hiromu Yakura Hongming Zhang Hugh\u00a0Mee Wong Ian Ng Isaac Noble Jaap Jumelet Jack Geissinger Jackson Kernion Jacob Hilton Jaehoon Lee Jaime\u00a0Fern\u00e1ndez Fisac James\u00a0B. Simon James Koppel James Zheng James Zou Jan Koco\u0144 Jana Thompson Janelle Wingfield Jared Kaplan Jarema Radom Jascha Sohl-Dickstein Jason Phang Jason Wei Jason Yosinski Jekaterina Novikova Jelle Bosscher Jennifer Marsh Jeremy Kim Jeroen Taal Jesse Engel Jesujoba Alabi Jiacheng Xu Jiaming Song Jillian Tang Joan Waweru John Burden John Miller John\u00a0U. Balis Jonathan Batchelder Jonathan Berant J\u00f6rg Frohberg Jos Rozen Jose Hernandez-Orallo Joseph Boudeman Joseph Guerr Joseph Jones Joshua\u00a0B. Tenenbaum Joshua\u00a0S. Rule Joyce Chua Kamil Kanclerz Karen Livescu Karl Krauth Karthik Gopalakrishnan Katerina Ignatyeva Katja Markert Kaustubh\u00a0D. Dhole Kevin Gimpel Kevin Omondi Kory Mathewson Kristen Chiafullo Ksenia Shkaruta Kumar Shridhar Kyle McDonell Kyle Richardson Laria Reynolds Leo Gao Li Zhang Liam Dugan Lianhui Qin Lidia Contreras-Ochando Louis-Philippe Morency Luca Moschella Lucas Lam Lucy Noble Ludwig Schmidt Luheng He Luis\u00a0Oliveros Col\u00f3n Luke Metz L\u00fctfi\u00a0Kerem \u015eenel Maarten Bosma Maarten Sap Maartje ter Hoeve Maheen Farooqi Manaal Faruqui Mantas Mazeika Marco Baturan Marco Marelli Marco Maru Maria Jose\u00a0Ram\u00edrez Quintana Marie Tolkiehn Mario Giulianelli Martha Lewis Martin Potthast Matthew\u00a0L. Leavitt Matthias Hagen M\u00e1ty\u00e1s Schubert Medina\u00a0Orduna Baitemirova Melody Arnaud Melvin McElrath Michael\u00a0A. Yee Michael Cohen Michael Gu Michael Ivanitskiy Michael Starritt Michael Strube Micha\u0142 Sw\u0119drowski Michele Bevilacqua Michihiro Yasunaga Mihir Kale Mike Cain Mimee Xu Mirac Suzgun Mitch Walker Mo Tiwari Mohit Bansal Moin Aminnaseri Mor Geva Mozhdeh Gheini Mukund\u00a0Varma T Nanyun Peng Nathan\u00a0A. Chi Nayeon Lee Neta Gur-Ari Krakover Nicholas Cameron Nicholas Roberts Nick Doiron Nicole Martinez Nikita Nangia Niklas Deckers Niklas Muennighoff Nitish\u00a0Shirish Keskar Niveditha\u00a0S. Iyer Noah Constant Noah Fiedel Nuan Wen Oliver Zhang Omar Agha Omar Elbaghdadi Omer Levy Owain Evans Pablo Antonio\u00a0Moreno Casares Parth Doshi Pascale Fung Paul\u00a0Pu Liang Paul Vicol Pegah Alipoormolabashi Peiyuan Liao Percy Liang Peter Chang Peter Eckersley Phu\u00a0Mon Htut Pinyu Hwang Piotr Mi\u0142kowski Piyush Patil Pouya Pezeshkpour Priti Oli Qiaozhu Mei Qing Lyu Qinlang Chen Rabin Banjade Rachel\u00a0Etta Rudolph Raefer Gabriel Rahel Habacker Ramon Risco Rapha\u00ebl Milli\u00e8re Rhythm Garg Richard Barnes Rif\u00a0A. Saurous Riku Arakawa Robbe Raymaekers Robert Frank Rohan Sikand Roman Novak Roman Sitelew Ronan LeBras Rosanne Liu Rowan Jacobs Rui Zhang Ruslan Salakhutdinov Ryan Chi Ryan Lee Ryan Stovall Ryan Teehan Rylan Yang Sahib Singh Saif\u00a0M. Mohammad Sajant Anand Sam Dillavou Sam Shleifer Sam Wiseman Samuel Gruetter Samuel\u00a0R. Bowman Samuel\u00a0S. Schoenholz Sanghyun Han Sanjeev Kwatra Sarah\u00a0A. Rous Sarik Ghazarian Sayan Ghosh Sean Casey Sebastian Bischoff Sebastian Gehrmann Sebastian Schuster Sepideh Sadeghi Shadi Hamdan Sharon Zhou Shashank Srivastava Sherry Shi Shikhar Singh Shima Asaadi Shixiang\u00a0Shane Gu Shubh Pachchigar Shubham Toshniwal Shyam Upadhyay Shyamolima Debnath Siamak Shakeri Simon Thormeyer Simone Melzi Siva Reddy Sneha\u00a0Priscilla Makini Soo-Hwan Lee Spencer Torene Sriharsha Hatwar Stanislas Dehaene Stefan Divic Stefano Ermon Stella Biderman Stephanie Lin Stephen Prasad Steven\u00a0T. Piantadosi Stuart\u00a0M. Shieber Summer Misherghi Svetlana Kiritchenko Swaroop Mishra Tal Linzen Tal Schuster Tao Li Tao Yu Tariq Ali Tatsu Hashimoto Te-Lin Wu Th\u00e9o Desbordes Theodore Rothschild Thomas Phan Tianle Wang Tiberius Nkinyili Timo Schick Timofei Kornev Titus Tunduny Tobias Gerstenberg Trenton Chang Trishala Neeraj Tushar Khot Tyler Shultz Uri Shaham Vedant Misra Vera Demberg Victoria Nyamai Vikas Raunak Vinay Ramasesh Vinay\u00a0Uday Prabhu Vishakh Padmakumar Vivek Srikumar William Fedus William Saunders William Zhang Wout Vossen Xiang Ren Xiaoyu Tong Xinran Zhao Xinyi Wu Xudong Shen Yadollah Yaghoobzadeh Yair Lakretz Yangqiu Song Yasaman Bahri Yejin Choi Yichi Yang Yiding Hao Yifu Chen Yonatan Belinkov Yu Hou Yufang Hou Yuntao Bai Zachary Seid Zhuoye Zhao Zijian Wang Zijie\u00a0J. Wang Zirui Wang and Ziyi Wu. 2023. Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models. arxiv:https:\/\/arXiv.org\/abs\/2206.04615\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2206.04615"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","unstructured":"Marc Steen. 2013. Co-Design as a Process of Joint Inquiry and Imagination. Design Issues 29 2 (04 2013) 16\u201328. arXiv:https:\/\/direct.mit.edu\/desi\/article-pdf\/29\/2\/16\/1715163\/desi_a_00207.pdf10.1162\/DESI_a_00207","DOI":"10.1162\/DESI_a_00207"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658992"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"crossref","unstructured":"Emily Tseng Meg Young Marianne Aubin\u00a0Le Qu\u00e9r\u00e9 Aimee Rinehart and Harini Suresh. 2025. \"Ownership Not Just Happy Talk\": Co-Designing a Participatory Large Language Model for Journalism. arXiv (2025). arXiv:https:\/\/arXiv.org\/abs\/2501.17299","DOI":"10.1145\/3715275.3732198"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"crossref","unstructured":"Yuxia Wang Revanth\u00a0Gangi Reddy Zain\u00a0Muhammad Mujahid Arnav Arora Aleksandr Rubashevskii Jiahui Geng Osama\u00a0Mohammed Afzal Liangming Pan Nadav Borenstein Aditya Pillai Isabelle Augenstein Iryna Gurevych and Preslav Nakov. 2024. Factcheck-Bench: Fine-Grained Evaluation Benchmark for Automatic Fact-checkers. arxiv:https:\/\/arXiv.org\/abs\/2311.09000\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2311.09000","DOI":"10.18653\/v1\/2024.findings-emnlp.830"},{"key":"e_1_3_3_2_69_2","unstructured":"Shunyu Yao Noah Shinn Pedram Razavi and Karthik Narasimhan. 2024. \u03c4 -bench: A Benchmark for Tool-Agent-User Interaction in Real-World Domains. arxiv:https:\/\/arXiv.org\/abs\/2406.12045\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2406.12045"},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"publisher","unstructured":"Mary\u00a0Lynn Young and Alfred Hermida. 2024. People Power Platforms and the Business of Journalism. Digital Journalism 12 9 (2024) 1250\u20131260. arXiv:10.1080\/21670811.2023.227352310.1080\/21670811.2023.2273523","DOI":"10.1080\/21670811.2023.2273523"},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1425"},{"key":"e_1_3_3_2_72_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1472"},{"key":"e_1_3_3_2_73_2","unstructured":"Wanjun Zhong Ruixiang Cui Yiduo Guo Yaobo Liang Shuai Lu Yanlin Wang Amin Saied Weizhu Chen and Nan Duan. 2023. AGIEval: A Human-Centric Benchmark for Evaluating Foundation Models. arxiv:https:\/\/arXiv.org\/abs\/2304.06364\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2304.06364"}],"event":{"name":"DIS '26: Designing Interactive Systems Conference","location":"Singapore , Singapore","acronym":"DIS '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2026 Designing Interactive Systems Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3800645.3812938","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T07:44:44Z","timestamp":1781336684000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3800645.3812938"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,12]]},"references-count":72,"alternative-id":["10.1145\/3800645.3812938","10.1145\/3800645"],"URL":"https:\/\/doi.org\/10.1145\/3800645.3812938","relation":{},"subject":[],"published":{"date-parts":[[2026,6,12]]},"assertion":[{"value":"2026-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}