{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T01:18:07Z","timestamp":1777339087442,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":78,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2021323098"],"award-info":[{"award-number":["2021323098"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3630106.3659047","type":"proceedings-article","created":{"date-parts":[[2024,6,5]],"date-time":"2024-06-05T09:14:21Z","timestamp":1717578861000},"page":"2432-2453","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Model ChangeLists: Characterizing Updates to ML Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8412-0266","authenticated-orcid":false,"given":"Sabri","family":"Eyuboglu","sequence":"first","affiliation":[{"name":"Computer Science, Stanford University, United States"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0687-5805","authenticated-orcid":false,"given":"Karan","family":"Goel","sequence":"additional","affiliation":[{"name":"Computer Science, Stanford University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0645-3257","authenticated-orcid":false,"given":"Arjun","family":"Desai","sequence":"additional","affiliation":[{"name":"Computer Science, Stanford University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4473-7544","authenticated-orcid":false,"given":"Lingjiao","family":"Chen","sequence":"additional","affiliation":[{"name":"Stanford University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6373-5520","authenticated-orcid":false,"given":"Mathew","family":"Monfort","sequence":"additional","affiliation":[{"name":"Amazon Web Services, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4109-0108","authenticated-orcid":false,"given":"Chris","family":"R\u00e9","sequence":"additional","affiliation":[{"name":"Computer Science, Stanford University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8880-4764","authenticated-orcid":false,"given":"James","family":"Zou","sequence":"additional","affiliation":[{"name":"Biomedical Data Science, Stanford University, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,5]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems 33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020), 1877\u20131901."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2699226"},{"key":"e_1_3_2_1_3_1","volume-title":"Conference on fairness, accountability and transparency. PMLR, 77\u201391","author":"Buolamwini Joy","year":"2018","unstructured":"Joy Buolamwini and Timnit Gebru. 2018. Gender shades: Intersectional accuracy disparities in commercial gender classification. In Conference on fairness, accountability and transparency. PMLR, 77\u201391."},{"key":"e_1_3_2_1_4_1","volume-title":"Did the Model Change? Efficiently Assessing Machine Learning API Shifts. arXiv preprint arXiv:2107.14203","author":"Chen Lingjiao","year":"2021","unstructured":"Lingjiao Chen, Tracy Cai, Matei Zaharia, and James Zou. 2021. Did the Model Change? Efficiently Assessing Machine Learning API Shifts. arXiv preprint arXiv:2107.14203 (2021)."},{"key":"e_1_3_2_1_5_1","unstructured":"Lingjiao Chen Zhihua Jin Sabri Eyuboglu Christopher Re Matei Zaharia and James\u00a0Y Zou. [n. d.]. HAPI: A Large-scale Longitudinal Dataset of Commercial ML API Predictions. Advances in Neural Information Processing Systems Datasets and Benchmarks ([n. d.])."},{"key":"e_1_3_2_1_6_1","first-page":"10685","article-title":"Frugalml: How to use ml prediction apis more accurately and cheaply","volume":"33","author":"Chen Lingjiao","year":"2020","unstructured":"Lingjiao Chen, Matei Zaharia, and James\u00a0Y Zou. 2020. Frugalml: How to use ml prediction apis more accurately and cheaply. Advances in Neural Information Processing Systems 33 (2020), 10685\u201310696.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","volume-title":"QuAC: Question answering in context. arXiv preprint arXiv:1808.07036","author":"Choi Eunsol","year":"2018","unstructured":"Eunsol Choi, He He, Mohit Iyyer, Mark Yatskar, Wen-tau Yih, Yejin Choi, Percy Liang, and Luke Zettlemoyer. 2018. QuAC: Question answering in context. arXiv preprint arXiv:1808.07036 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Deep reinforcement learning from human preferences. Advances in neural information processing systems 30","author":"Christiano F","year":"2017","unstructured":"Paul\u00a0F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep reinforcement learning from human preferences. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_9_1","volume-title":"Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416","author":"Chung Hyung\u00a0Won","year":"2022","unstructured":"Hyung\u00a0Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Eric Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, 2022. Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)."},{"key":"e_1_3_2_1_10_1","volume-title":"BoolQ: Exploring the surprising difficulty of natural yes\/no questions. arXiv preprint arXiv:1905.10044","author":"Clark Christopher","year":"2019","unstructured":"Christopher Clark, Kenton Lee, Ming-Wei Chang, Tom Kwiatkowski, Michael Collins, and Kristina Toutanova. 2019. BoolQ: Exploring the surprising difficulty of natural yes\/no questions. arXiv preprint arXiv:1905.10044 (2019)."},{"key":"e_1_3_2_1_11_1","unstructured":"Cohere. [n. d.]. Generation Model Card. https:\/\/docs.cohere.ai\/docs\/generation-card."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533108"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533108"},{"key":"e_1_3_2_1_14_1","unstructured":"James H\u00a0Martin Daniel\u00a0Jurafsky. 2021. Word Senses and WordNet. In Speech and Language Processing. 10."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops.","author":"de Vries Terrance","year":"2019","unstructured":"Terrance de Vries, Ishan Misra, Changhan Wang, and Laurens van\u00a0der Maaten. 2019. Does Object Recognition Work for Everyone?. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops."},{"key":"e_1_3_2_1_16_1","volume-title":"The Spotlight: A General Method for Discovering Systematic Errors in Deep Learning Models. AAAI (July","author":"Eon Greg","year":"2022","unstructured":"Greg d\u2019Eon, Jason d\u2019Eon, James\u00a0R Wright, and Kevin Leyton-Brown. 2022. The Spotlight: A General Method for Discovering Systematic Errors in Deep Learning Models. AAAI (July 2022)."},{"key":"e_1_3_2_1_17_1","volume-title":"An introduction to the bootstrap","author":"Efron Bradley","unstructured":"Bradley Efron and Robert\u00a0J Tibshirani. 1994. An introduction to the bootstrap. CRC press."},{"key":"e_1_3_2_1_18_1","unstructured":"EveryPixel. [n. d.]. Everypixel (EPixel) Image Tagging API. https:\/\/labs.everypixel.com\/api."},{"key":"e_1_3_2_1_19_1","volume-title":"Domino: Discovering Systematic Errors with Cross-Modal Embeddings. In International Conference on Learning Representations.","author":"Eyuboglu Sabri","year":"2022","unstructured":"Sabri Eyuboglu, Maya Varma, Khaled Saab, Jean-Benoit Delbrouck, Christopher Lee-Messer, Jared Dunnmon, James Zou, and Christopher R\u00e9. 2022. Domino: Discovering Systematic Errors with Cross-Modal Embeddings. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_20_1","volume-title":"WordNet: An Electronic Lexical Database","author":"Fellbaum Christiane","unstructured":"Christiane Fellbaum. 1998. WordNet: An Electronic Lexical Database. Bradford Books."},{"key":"e_1_3_2_1_21_1","volume-title":"Deep ensembles: A loss landscape perspective. arXiv preprint arXiv:1912.02757","author":"Fort Stanislav","year":"2019","unstructured":"Stanislav Fort, Huiyi Hu, and Balaji Lakshminarayanan. 2019. Deep ensembles: A loss landscape perspective. arXiv preprint arXiv:1912.02757 (2019)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458723"},{"key":"e_1_3_2_1_23_1","first-page":"13890","article-title":"Beyond accuracy: quantifying trial-by-trial behaviour of CNNs and humans by measuring error consistency","volume":"33","author":"Geirhos Robert","year":"2020","unstructured":"Robert Geirhos, Kristof Meding, and Felix\u00a0A Wichmann. 2020. Beyond accuracy: quantifying trial-by-trial behaviour of CNNs and humans by measuring error consistency. Advances in Neural Information Processing Systems 33 (2020), 13890\u201313902.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","volume-title":"Partial success in closing the gap between human and machine vision. Advances in Neural Information Processing Systems 34","author":"Geirhos Robert","year":"2021","unstructured":"Robert Geirhos, Kantharaju Narayanappa, Benjamin Mitzkus, Tizian Thieringer, Matthias Bethge, Felix\u00a0A Wichmann, and Wieland Brendel. 2021. Partial success in closing the gap between human and machine vision. Advances in Neural Information Processing Systems 34 (2021)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600211.3604698"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-industry.26"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-demos.6"},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Learning Representations ([n. d.]).","author":"Gontijo-Lopes Raphael","unstructured":"Raphael Gontijo-Lopes, Yann Dauphin, and Ekin\u00a0D Cubuk. [n. d.]. No One Representation to Rule Them All: Overlapping Features of Training Methods. International Conference on Learning Representations ([n. d.])."},{"key":"e_1_3_2_1_29_1","unstructured":"Google. [n. d.]. Google Vision API. https:\/\/cloud.google.com\/vision."},{"key":"e_1_3_2_1_30_1","unstructured":"Sam Greenfield. 2018. Picture what the cloud can do: How the New York Times is using Google Cloud to find untold stories in millions of archived photos. https:\/\/cloud.google.com\/blog\/products\/ai-machine-learning\/how-the-new-york-times-is-using-google-cloud-to-find-untold-stories-in-millions-of-archived-photos."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00550"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2017.0-172"},{"key":"e_1_3_2_1_33_1","first-page":"1825","article-title":"Studying the Live Cross-Platform Circulation of Images With Computer Vision API: An Experiment Based on a Sports Media Event","volume":"13","author":"Hosseini Hossein","year":"2019","unstructured":"Hossein Hosseini, Baicen Xiao, and Radha Poovendran. 2019. Studying the Live Cross-Platform Circulation of Images With Computer Vision API: An Experiment Based on a Sports Media Event. International Journal of Communication 13 (2019), 1825\u20131845.","journal-title":"International Journal of Communication"},{"key":"e_1_3_2_1_34_1","volume-title":"Distilling model failures as directions in latent space. arXiv preprint arXiv:2206.14754","author":"Jain Saachi","year":"2022","unstructured":"Saachi Jain, Hannah Lawrence, Ankur Moitra, and Aleksander Madry. 2022. Distilling model failures as directions in latent space. arXiv preprint arXiv:2206.14754 (2022)."},{"key":"e_1_3_2_1_35_1","volume-title":"International Conference on Learning Representations.","author":"Jia Hengrui","year":"2021","unstructured":"Hengrui Jia, Hongyu Chen, Jonas Guan, Ali\u00a0Shahin Shamsabadi, and Nicolas Papernot. 2021. A Zest of LIME: Towards Architecture-Independent Model Distances. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v11i1.27548"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3306618.3314287"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00023"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1915768117"},{"key":"e_1_3_2_1_40_1","volume-title":"International Conference on Machine Learning. PMLR, 5753\u20135763","author":"Kossen Jannik","year":"2021","unstructured":"Jannik Kossen, Sebastian Farquhar, Yarin Gal, and Tom Rainforth. 2021. Active testing: Sample-efficient model evaluation. In International Conference on Machine Learning. PMLR, 5753\u20135763."},{"key":"e_1_3_2_1_41_1","volume-title":"Box: Bringing image recognition and OCR to cloud content management.","author":"Kus Ben","year":"2017","unstructured":"Ben Kus. 2017. Box: Bringing image recognition and OCR to cloud content management."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460319.3464816"},{"key":"e_1_3_2_1_44_1","volume-title":"Holistic evaluation of language models. arXiv preprint arXiv:2211.09110","author":"Liang Percy","year":"2022","unstructured":"Percy Liang, Rishi Bommasani, Tony Lee, Dimitris Tsipras, Dilara Soylu, Michihiro Yasunaga, Yian Zhang, Deepak Narayanan, Yuhuai Wu, Ananya Kumar, 2022. Holistic evaluation of language models. arXiv preprint arXiv:2211.09110 (2022)."},{"key":"e_1_3_2_1_45_1","volume-title":"Jurassic-1: Technical details and evaluation. White Paper. AI21 Labs 1","author":"Lieber Opher","year":"2021","unstructured":"Opher Lieber, Or Sharir, Barak Lenz, and Yoav Shoham. 2021. Jurassic-1: Technical details and evaluation. White Paper. AI21 Labs 1 (2021)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_47_1","volume-title":"Model similarity mitigates test set overuse. Advances in Neural Information Processing Systems 32","author":"Mania Horia","year":"2019","unstructured":"Horia Mania, John Miller, Ludwig Schmidt, Moritz Hardt, and Benjamin Recht. 2019. Model similarity mitigates test set overuse. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467435"},{"key":"e_1_3_2_1_49_1","volume-title":"A method for unbiased selective sampling, using ranked sets. Australian journal of agricultural research 3, 4","author":"McIntyre GA","year":"1952","unstructured":"GA McIntyre. 1952. A method for unbiased selective sampling, using ranked sets. Australian journal of agricultural research 3, 4 (1952), 385\u2013390."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3594109"},{"key":"e_1_3_2_1_51_1","unstructured":"Microsoft. [n. d.]. Microsoft computer vision API. https:\/\/azure.microsoft.com\/en-us\/services\/cognitive-services\/computer-vision. [Accessed Oct-2020]."},{"key":"e_1_3_2_1_52_1","unstructured":"Microsoft. 2024. Microsoft Release Notes."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287596"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","unstructured":"Rahul Nair Massimiliano Mattetti Elizabeth Daly Dennis Wei Oznur Alkan and Yunfeng Zhang. 2021. What Changed? Interpretable Model Comparison.. In IJCAI. 2855\u20132861.","DOI":"10.24963\/ijcai.2021\/393"},{"key":"e_1_3_2_1_55_1","volume-title":"Text and code embeddings by contrastive pre-training. arXiv preprint arXiv:2201.10005","author":"Neelakantan Arvind","year":"2022","unstructured":"Arvind Neelakantan, Tao Xu, Raul Puri, Alec Radford, Jesse\u00a0Michael Han, Jerry Tworek, Qiming Yuan, Nikolas Tezak, Jong\u00a0Wook Kim, Chris Hallacy, 2022. Text and code embeddings by contrastive pre-training. arXiv preprint arXiv:2201.10005 (2022)."},{"key":"e_1_3_2_1_56_1","unstructured":"OpenAI. [n. d.]. How do text-davinci-002 and text-davinci-003 differ?https:\/\/help.openai.com\/en\/articles\/6779149-how-do-text-davinci-002-and-text-davinci-003-differ."},{"key":"e_1_3_2_1_57_1","volume-title":"Training language models to follow instructions with human feedback. arXiv preprint arXiv:2203.02155","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeff Wu, Xu Jiang, Diogo Almeida, Carroll\u00a0L Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, 2022. Training language models to follow instructions with human feedback. arXiv preprint arXiv:2203.02155 (2022)."},{"key":"e_1_3_2_1_58_1","unstructured":"Art\u00a0B. Owen. 2013. Monte Carlo theory methods and examples."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1002\/9781118445112.stat05999"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132747.3132785"},{"key":"e_1_3_2_1_61_1","volume-title":"Towards a More Rigorous Science of Blindspot Discovery in Image Classification Models. arXiv preprint arXiv:2207.04104","author":"Plumb Gregory","year":"2022","unstructured":"Gregory Plumb, Nari Johnson, \u00c1ngel\u00a0Alexander Cabrera, and Ameet Talwalkar. 2022. Towards a More Rigorous Science of Blindspot Discovery in Image Classification Models. arXiv preprint arXiv:2207.04104 (2022)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01053"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533231"},{"key":"e_1_3_2_1_64_1","volume-title":"Benchmarking commercial intent detection services with practice-driven evaluations. arXiv preprint arXiv:2012.03929","author":"Qi Haode","year":"2020","unstructured":"Haode Qi, Lin Pan, Atin Sood, Abhishek Shah, Ladislav Kunc, Mo Yu, and Saloni Potdar. 2020. Benchmarking commercial intent detection services with practice-driven evaluations. arXiv preprint arXiv:2012.03929 (2020)."},{"key":"e_1_3_2_1_65_1","volume-title":"International Conference on Machine Learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_1_66_1","volume-title":"100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250","author":"Rajpurkar Pranav","year":"2016","unstructured":"Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, and Percy Liang. 2016. Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250 (2016)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-77712-2_17"},{"key":"e_1_3_2_1_68_1","volume-title":"Beyond accuracy: Behavioral testing of NLP models with CheckList. arXiv preprint arXiv:2005.04118","author":"Ribeiro Marco\u00a0Tulio","year":"2020","unstructured":"Marco\u00a0Tulio Ribeiro, Tongshuang Wu, Carlos Guestrin, and Sameer Singh. 2020. Beyond accuracy: Behavioral testing of NLP models with CheckList. arXiv preprint arXiv:2005.04118 (2020)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533239"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533110"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01266"},{"key":"e_1_3_2_1_72_1","volume-title":"Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.\u00a0F. Balcan, and H.\u00a0Lin (Eds.). Vol.\u00a033. Curran Associates","author":"Sohoni Nimit","year":"1933","unstructured":"Nimit Sohoni, Jared Dunnmon, Geoffrey Angus, Albert Gu, and Christopher R\u00e9. 2020. No Subclass Left Behind: Fine-Grained Robustness in Coarse-Grained Classification Problems. In Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.\u00a0F. Balcan, and H.\u00a0Lin (Eds.). Vol.\u00a033. Curran Associates, Inc., 19339\u201319352. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/e0688d13958a19e087e123148555e4b4-Paper.pdf"},{"key":"e_1_3_2_1_73_1","volume-title":"Abubakar Abid, Adam Fisch, Adam\u00a0R Brown, Adam Santoro, Aditya Gupta","author":"Srivastava Aarohi","year":"2022","unstructured":"Aarohi Srivastava, Abhinav Rastogi, Abhishek Rao, Abu Awal\u00a0Md Shoeb, Abubakar Abid, Adam Fisch, Adam\u00a0R Brown, Adam Santoro, Aditya Gupta, Adri\u00e0 Garriga-Alonso, 2022. Beyond the imitation game: Quantifying and extrapolating the capabilities of language models. arXiv preprint arXiv:2206.04615 (2022)."},{"key":"e_1_3_2_1_74_1","first-page":"9866","article-title":"On the Safety of Interpretable Machine Learning: A Maximum Deviation Approach","volume":"35","author":"Wei Dennis","year":"2022","unstructured":"Dennis Wei, Rahul Nair, Amit Dhurandhar, Kush\u00a0R Varshney, Elizabeth Daly, and Moninder Singh. 2022. On the Safety of Interpretable Machine Learning: A Maximum Deviation Approach. Advances in Neural Information Processing Systems 35 (2022), 9866\u20139880.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"crossref","unstructured":"Xiaofei Xie Lei Ma Haijun Wang Yuekang Li Yang Liu and Xiaohong Li. 2019. DiffChaser: Detecting Disagreements for Deep Neural Networks.. In IJCAI. 5772\u20135778.","DOI":"10.24963\/ijcai.2019\/800"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01407"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3131365.3131372"},{"key":"e_1_3_2_1_78_1","first-page":"20554","article-title":"On completeness-aware concept-based explanations in deep neural networks","volume":"33","author":"Yeh Chih-Kuan","year":"2020","unstructured":"Chih-Kuan Yeh, Been Kim, Sercan Arik, Chun-Liang Li, Tomas Pfister, and Pradeep Ravikumar. 2020. On completeness-aware concept-based explanations in deep neural networks. Advances in Neural Information Processing Systems 33 (2020), 20554\u201320565.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"FAccT '24: The 2024 ACM Conference on Fairness, Accountability, and Transparency","location":"Rio de Janeiro Brazil","acronym":"FAccT '24"},"container-title":["The 2024 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3659047","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3630106.3659047","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:28:23Z","timestamp":1755883703000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3630106.3659047"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":78,"alternative-id":["10.1145\/3630106.3659047","10.1145\/3630106"],"URL":"https:\/\/doi.org\/10.1145\/3630106.3659047","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}