{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T04:31:20Z","timestamp":1779337880198,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,19]],"date-time":"2023-04-19T00:00:00Z","timestamp":1681862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-2040942"],"award-info":[{"award-number":["IIS-2040942"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,19]]},"DOI":"10.1145\/3544548.3581268","type":"proceedings-article","created":{"date-parts":[[2023,4,20]],"date-time":"2023-04-20T04:27:55Z","timestamp":1681964875000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":45,"title":["Zeno: An Interactive Framework for Behavioral Evaluation of Machine Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0348-3362","authenticated-orcid":false,"given":"\u00c1ngel Alexander","family":"Cabrera","sequence":"first","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9284-5750","authenticated-orcid":false,"given":"Erica","family":"Fu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2726-4108","authenticated-orcid":false,"given":"Donald","family":"Bertucci","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering and Computer Science, Oregon State University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6730-922X","authenticated-orcid":false,"given":"Kenneth","family":"Holstein","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6650-1893","authenticated-orcid":false,"given":"Ameet","family":"Talwalkar","sequence":"additional","affiliation":[{"name":"Machine Learning Department, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9856-9654","authenticated-orcid":false,"given":"Jason I.","family":"Hong","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8369-3847","authenticated-orcid":false,"given":"Adam","family":"Perer","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,4,19]]},"reference":[{"key":"e_1_3_3_2_1_1","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2019.2942288"},{"key":"e_1_3_3_2_2_1","unstructured":"Josh Attenberg Panagiotis\u00a0G. Ipeirotis and Foster Provost. 2011. Beat the Machine: Challenging Workers to Find the Unknown Unknowns."},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.2477899"},{"key":"e_1_3_3_2_4_1","volume-title":"DendroMap: Visual Exploration of Large-Scale Image Datasets for Machine Learning with Treemaps","author":"Bertucci Donald","year":"2022","unstructured":"Donald Bertucci, Md\u00a0Montaser Hamid, Yashwanthi Anand, Anita Ruangrotsakun, Delyar Tabatabai, Melissa Perez, and Minsuk Kahng. 2022. DendroMap: Visual Exploration of Large-Scale Image Datasets for Machine Learning with Treemaps. IEEE Transactions on Visualization and Computer Graphics (TVCG) (2022). https:\/\/div-lab.github.io\/dendromap\/ Publisher: IEEE."},{"key":"e_1_3_3_2_5_1","unstructured":"Rishi Bommasani Drew\u00a0A. Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael\u00a0S. Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill Erik Brynjolfsson Shyamal Buch Dallas Card Rodrigo Castellon Niladri Chatterji Annie Chen Kathleen Creel Jared\u00a0Quincy Davis Dora Demszky Chris Donahue Moussa Doumbouya Esin Durmus Stefano Ermon John Etchemendy Kawin Ethayarajh Li Fei-Fei Chelsea Finn Trevor Gale Lauren Gillespie Karan Goel Noah Goodman Shelby Grossman Neel Guha Tatsunori Hashimoto Peter Henderson John Hewitt Daniel\u00a0E. Ho Jenny Hong Kyle Hsu Jing Huang Thomas Icard Saahil Jain Dan Jurafsky Pratyusha Kalluri Siddharth Karamcheti Geoff Keeling Fereshte Khani Omar Khattab Pang\u00a0Wei Koh Mark Krass Ranjay Krishna Rohith Kuditipudi Ananya Kumar Faisal Ladhak Mina Lee Tony Lee Jure Leskovec Isabelle Levent Xiang\u00a0Lisa Li Xuechen Li Tengyu Ma Ali Malik Christopher\u00a0D. Manning Suvir Mirchandani Eric Mitchell Zanele Munyikwa Suraj Nair Avanika Narayan Deepak Narayanan Ben Newman Allen Nie Juan\u00a0Carlos Niebles Hamed Nilforoshan Julian Nyarko Giray Ogut Laurel Orr Isabel Papadimitriou Joon\u00a0Sung Park Chris Piech Eva Portelance Christopher Potts Aditi Raghunathan Rob Reich Hongyu Ren Frieda Rong Yusuf Roohani Camilo Ruiz Jack Ryan Christopher R\u00e9 Dorsa Sadigh Shiori Sagawa Keshav Santhanam Andy Shih Krishnan Srinivasan Alex Tamkin Rohan Taori Armin\u00a0W. Thomas Florian Tram\u00e8r Rose\u00a0E. Wang William Wang Bohan Wu Jiajun Wu Yuhuai Wu Sang\u00a0Michael Xie Michihiro Yasunaga Jiaxuan You Matei Zaharia Michael Zhang Tianyi Zhang Xikun Zhang Yuhui Zhang Lucia Zheng Kaitlyn Zhou and Percy Liang. 2022. On the Opportunities and Risks of Foundation Models. http:\/\/arxiv.org\/abs\/2108.07258 arXiv:2108.07258 [cs]."},{"key":"e_1_3_3_2_6_1","volume-title":"Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research, Vol.\u00a081)","author":"Buolamwini Joy","year":"2018","unstructured":"Joy Buolamwini and Timnit Gebru. 2018. Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification. In Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research, Vol.\u00a081), Sorelle\u00a0A. Friedler and Christo Wilson (Eds.). PMLR, 77\u201391. Buolamwini2018."},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502102"},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3479569"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/VAST47406.2019.8986948"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542921"},{"key":"e_1_3_3_2_11_1","volume-title":"Did the Model Change? Efficiently Assessing Machine Learning API Shifts. arXiv:2107.14203 [cs, stat] (July","author":"Chen Lingjiao","year":"2021","unstructured":"Lingjiao Chen, Tracy Cai, Matei Zaharia, and James Zou. 2021. Did the Model Change? Efficiently Assessing Machine Learning API Shifts. arXiv:2107.14203 [cs, stat] (July 2021). http:\/\/arxiv.org\/abs\/2107.14203 arXiv:2107.14203."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172950"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3143561"},{"key":"e_1_3_3_2_14_1","volume-title":"Slice-based Learning: A Programming Model for Residual Learning in Critical Data Slices. NeurIPS","author":"Chen S.","year":"2019","unstructured":"Vincent\u00a0S. Chen, Sen Wu, Zhenzhen Weng, Alexander Ratner, and Christopher R\u00e9. 2019. Slice-based Learning: A Programming Model for Residual Learning in Critical Data Slices. NeurIPS (2019). http:\/\/arxiv.org\/abs\/1909.06349"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2019.00139"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533108"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533113"},{"key":"e_1_3_3_2_18_1","volume-title":"The Spotlight: A General Method for Discovering Systematic Errors in Deep Learning Models. arXiv:2107.00758 [cs, stat] (Oct.","author":"Eon Greg","year":"2021","unstructured":"Greg d\u2019Eon, Jason d\u2019Eon, James\u00a0R. Wright, and Kevin Leyton-Brown. 2021. The Spotlight: A General Method for Discovering Systematic Errors in Deep Learning Models. arXiv:2107.00758 [cs, stat] (Oct. 2021). http:\/\/arxiv.org\/abs\/2107.00758 arXiv:2107.00758."},{"key":"e_1_3_3_2_19_1","volume-title":"Domino: Discovering Systematic Errors with Cross-Modal Embeddings. arXiv:2203.14960 [cs] (April","author":"Eyuboglu Sabri","year":"2022","unstructured":"Sabri Eyuboglu, Maya Varma, Khaled Saab, Jean-Benoit Delbrouck, Christopher Lee-Messer, Jared Dunnmon, James Zou, and Christopher R\u00e9. 2022. Domino: Discovering Systematic Errors with Cross-Modal Embeddings. arXiv:2203.14960 [cs] (April 2022). http:\/\/arxiv.org\/abs\/2203.14960 arXiv:2203.14960."},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474734"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458723"},{"key":"e_1_3_3_2_22_1","volume-title":"Robustness Gym: Unifying the NLP Evaluation Landscape.","author":"Goel Karan","year":"2021","unstructured":"Karan Goel, Nazneen Rajani, Jesse Vig, Samson Tan, Jason Wu, Stephan Zheng, Caiming Xiong, Mohit Bansal, and Christopher R\u00e9. 2021. Robustness Gym: Unifying the NLP Evaluation Landscape. (2021), 1\u201334. http:\/\/arxiv.org\/abs\/2101.04840"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501823"},{"key":"e_1_3_3_2_24_1","volume-title":"Arquero: Query processing and transformation of array-backed data tables. https:\/\/uwdata.github.io\/arquero\/","author":"Heer Jeffrey","year":"2020","unstructured":"Jeffrey Heer. 2020. Arquero: Query processing and transformation of array-backed data tables. https:\/\/uwdata.github.io\/arquero\/"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2020.3012063"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461702.3462527"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"publisher","unstructured":"Zohar Jackson C\u00e9sar Souza Jason Flaks Yuxin Pan Hereman Nicolas and Adhish Thite. 2018. Jakobovski\/Free-Spoken-Digit-Dataset: V1.0.8. https:\/\/doi.org\/10.5281\/ZENODO.1342401","DOI":"10.5281\/ZENODO.1342401"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-37734-2_37"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939502.2939503"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1148\/radiol.2251011667"},{"key":"e_1_3_3_2_31_1","volume-title":"Learning multiple layers of features from tiny images. (2009)","author":"Krizhevsky Alex","unstructured":"Alex Krizhevsky, Geoffrey Hinton, and others. 2009. Learning multiple layers of features from tiny images. (2009). Publisher: Citeseer."},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_2_33_1","volume-title":"Just Train Twice: Improving Group Robustness without Training Group Information. arXiv:2107.09044 [cs, stat] (Sept","author":"Liu Evan\u00a0Zheran","year":"2021","unstructured":"Evan\u00a0Zheran Liu, Behzad Haghgoo, Annie\u00a0S. Chen, Aditi Raghunathan, Pang\u00a0Wei Koh, Shiori Sagawa, Percy Liang, and Chelsea Finn. 2021. Just Train Twice: Improving Group Robustness without Training Group Information. arXiv:2107.09044 [cs, stat] (Sept. 2021). http:\/\/arxiv.org\/abs\/2107.09044 arXiv:2107.09044."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302505.3310068"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-92bf1922-00a"},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287596"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2011.06.019"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300924"},{"key":"e_1_3_3_2_39_1","volume-title":"Ray: A Distributed Framework for Emerging AI Applications","author":"Moritz Philipp","year":"2018","unstructured":"Philipp Moritz, Robert Nishihara, Stephanie Wang, Alexey Tumanov, Richard Liaw, Eric Liang, Melih Elibol, Zongheng Yang, William Paul, Michael\u00a0I. Jordan, and Ion Stoica. 2018. Ray: A Distributed Framework for Emerging AI Applications. http:\/\/arxiv.org\/abs\/1712.05889 arXiv:1712.05889 [cs, stat]."},{"key":"e_1_3_3_2_40_1","volume-title":"Documentation, Engineering, and Process. arXiv:2110.10234 [cs] (Dec.","author":"Nahar Nadia","year":"2021","unstructured":"Nadia Nahar, Shurui Zhou, Grace Lewis, and Christian K\u00e4stner. 2021. Collaboration Challenges in Building ML-Enabled Systems: Communication, Documentation, Engineering, and Process. arXiv:2110.10234 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2110.10234 arXiv:2110.10234."},{"key":"e_1_3_3_2_41_1","unstructured":"National Transportation Safety Board. 2019. Collision Between Vehicle Controlled by Developmental Automated Driving System and Pedestrian. https:\/\/www.ntsb.gov\/news\/events\/Pages\/2019-HWY18MH010-BMG.aspx"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v6i1.13337"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368555.3384468"},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/REW56159.2022.00039"},{"key":"e_1_3_3_2_46_1","volume-title":"Facets: An Open Source Visualization Tool for Machine Learning Training Data. https:\/\/pair-code.github.io\/facets\/","author":"Pushkarna Mahima","year":"2017","unstructured":"Mahima Pushkarna, James Wexler, and Jimbo Wilson. 2017. Facets: An Open Source Visualization Tool for Machine Learning Training Data. https:\/\/pair-code.github.io\/facets\/"},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1138-y"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.230"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. http:\/\/arxiv.org\/abs\/2112.10752 arXiv:2112.10752 [cs].","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_2_51_1","unstructured":"Shiori Sagawa Pang\u00a0Wei Koh Tatsunori\u00a0B. Hashimoto and Percy Liang. 2020. Distributionally Robust Neural Networks for Group Shifts: On the Importance of Regularization for Worst-Case Generalization. http:\/\/arxiv.org\/abs\/1911.08731 arXiv:1911.08731 [cs stat]."},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2016.2599030"},{"key":"e_1_3_3_2_53_1","first-page":"13","article-title":"Nutritional Labels for Data and Models","volume":"42","author":"Stoyanovich Julia","year":"2019","unstructured":"Julia Stoyanovich and Bill Howe. 2019. Nutritional Labels for Data and Models. IEEE Data Eng. Bull. 42(2019), 13\u201323.","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461778.3462012"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","DOI":"10.1158\/0008-5472.CAN-17-0339"},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"crossref","unstructured":"Zijie\u00a0J. Wang Evan Montoya David Munechika Haoyang Yang Benjamin Hoover and Duen\u00a0Horng Chau. 2022. DiffusionDB: A Large-scale Prompt Gallery Dataset for Text-to-Image Generative Models. http:\/\/arxiv.org\/abs\/2210.14896 arXiv:2210.14896 [cs].","DOI":"10.18653\/v1\/2023.acl-long.51"},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2019.2934619"},{"key":"e_1_3_3_2_58_1","unstructured":"Benjamin Wilson Judy Hoffman and Jamie Morgenstern. 2019. Predictive Inequity in Object Detection. http:\/\/arxiv.org\/abs\/1902.11097 arXiv:1902.11097 [cs stat]."},{"key":"e_1_3_3_2_59_1","first-page":"19","volume-title":"Proceedings of the 57th Conference of the Association for Computational Linguistics","author":"Wu Tongshuang","year":"2019","unstructured":"Tongshuang Wu, Marco\u00a0Tulio Ribeiro, Jeffrey Heer, and Daniel Weld. 2019. {E}rrudite: Scalable, Reproducible, and Testable Error Analysis. Proceedings of the 57th Conference of the Association for Computational Linguistics (2019), 747\u2013763. https:\/\/www.aclweb.org\/anthology\/P19-1073"},{"key":"e_1_3_3_2_60_1","unstructured":"Chenyang Yang Rachel Brower-Sinning Grace\u00a0A. Lewis Christian K\u00e4stner and Tongshuang Wu. 2022. Capabilities for Better ML Engineering. http:\/\/arxiv.org\/abs\/2211.06409 arXiv:2211.06409 [cs]."}],"event":{"name":"CHI '23: CHI Conference on Human Factors in Computing Systems","location":"Hamburg Germany","acronym":"CHI '23","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581268","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544548.3581268","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544548.3581268","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:29Z","timestamp":1750178249000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581268"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,19]]},"references-count":60,"alternative-id":["10.1145\/3544548.3581268","10.1145\/3544548"],"URL":"https:\/\/doi.org\/10.1145\/3544548.3581268","relation":{},"subject":[],"published":{"date-parts":[[2023,4,19]]},"assertion":[{"value":"2023-04-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}