{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:14:00Z","timestamp":1776111240884,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":167,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100017567","name":"Apple","doi-asserted-by":"publisher","award":["Scholars in AIML Fellowship"],"award-info":[{"award-number":["Scholars in AIML Fellowship"]}],"id":[{"id":"10.13039\/100017567","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF","award":["1900991"],"award-info":[{"award-number":["1900991"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706598.3713406","type":"proceedings-article","created":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T03:17:03Z","timestamp":1745464623000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Abstraction Alignment: Comparing Model-Learned and Human-Encoded Conceptual Relationships"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9671-5574","authenticated-orcid":false,"given":"Angie","family":"Boggust","sequence":"first","affiliation":[{"name":"CSAIL, Massachusetts Institute of Technology, Cambridge, Massachusetts, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7213-4241","authenticated-orcid":false,"given":"Hyemin","family":"Bang","sequence":"additional","affiliation":[{"name":"CSAIL, Massachusetts Institute of Technology, Cambridge, Massachusetts, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8995-1683","authenticated-orcid":false,"given":"Hendrik","family":"Strobelt","sequence":"additional","affiliation":[{"name":"IBM Research AI, Cambridge, Massachusetts, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5564-635X","authenticated-orcid":false,"given":"Arvind","family":"Satyanarayan","sequence":"additional","affiliation":[{"name":"CSAIL, Massachusetts Institute of Technology, Cambridge, Massachusetts, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg\u00a0S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems."},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","unstructured":"Moloud Abdar Farhad Pourpanah Sadiq Hussain Dana Rezazadegan Li Liu Mohammad Ghavamzadeh Paul\u00a0W. Fieguth Xiaochun Cao Abbas Khosravi U.\u00a0Rajendra Acharya Vladimir Makarenkov and Saeid Nahavandi. 2021. A review of uncertainty quantification in deep learning: Techniques applications and challenges. Inf. Fusion 76 (2021) 243\u2013297. 10.1016\/J.INFFUS.2021.05.008","DOI":"10.1016\/J.INFFUS.2021.05.008"},{"key":"e_1_3_3_3_4_2","volume-title":"A Pattern Language: Towns, Buildings, Construction","author":"Alexander Christopher","year":"1977","unstructured":"Christopher Alexander, Sara Ishikawa, and Murray Silverstein. 1977. A Pattern Language: Towns, Buildings, Construction. Oxford University Press, New York."},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"crossref","unstructured":"Sherri Alexander Therese Conner and Teresa Slaughter. 2003. Overview of inpatient coding. American journal of health-system pharmacy : AJHP : official journal of the American Society of Health-System Pharmacists 60 21 Suppl 6 (2003) S11\u20134.","DOI":"10.1093\/ajhp\/60.suppl_6.S11"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","DOI":"10.4324\/9781315799438"},{"key":"e_1_3_3_3_7_2","volume-title":"European Conference on Computer Vision (ECCV) Explainable Computer Vision (eXCV) Workshop","author":"Bang Hyemin","year":"2024","unstructured":"Hyemin Bang, Angie Boggust, and Arvind Satyanarayan. 2024. Explanation Alignment: Quantifying the Correctness of Model Reasoning At Scale. In European Conference on Computer Vision (ECCV) Explainable Computer Vision (eXCV) Workshop."},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.354"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502102"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643834.3660722"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","unstructured":"Aditya Bhattacharya Simone Stumpf Lucija Gosak Gregor Stiglic and Katrien Verbert. 2023. Lessons Learned from EXMOS User Studies: A Technical Report Summarizing Key Takeaways from User Studies Conducted to Evaluate The EXMOS Platform. CoRR abs\/2310.02063 (2023). 10.48550\/ARXIV.2310.02063 arXiv:https:\/\/arXiv.org\/abs\/2310.02063","DOI":"10.48550\/ARXIV.2310.02063"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642106"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3551624.3555290"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00158"},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511122"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501965"},{"key":"e_1_3_3_3_17_2","doi-asserted-by":"publisher","unstructured":"Angie Boggust Venkatesh Sivaraman Yannick Assogba Donghao Ren Dominik Moritz and Fred Hohman. 2024. Compress and Compare: Interactively Evaluating Efficiency and Behavior Across ML Model Compression Experiments. IEEE Transactions on Visualization and Computer Graphics 31 1 (2024) 809\u2013819. 10.1109\/TVCG.2024.3456371","DOI":"10.1109\/TVCG.2024.3456371"},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593013.3593997"},{"key":"e_1_3_3_3_19_2","unstructured":"Tolga Bolukbasi Kai-Wei Chang James\u00a0Y Zou Venkatesh Saligrama and Adam\u00a0T Kalai. 2016. Man is to computer programmer as woman is to homemaker? debiasing word embeddings. Annual Conference on Neural Information Processing Systems (NeurIPS) 29 (2016)."},{"key":"e_1_3_3_3_20_2","unstructured":"Trenton Bricken Adly Templeton Joshua Batson Brian Chen Adam Jermyn Tom Conerly Nick Turner Cem Anil Carson Denison Amanda Askell Robert Lasenby Yifan Wu Shauna Kravec Nicholas Schiefer Tim Maxwell Nicholas Joseph Zac Hatfield-Dodds Alex Tamkin Karina Nguyen Brayden McLean Josiah\u00a0E Burke Tristan Hume Shan Carter Tom Henighan and Christopher Olah. 2023. Towards Monosemanticity: Decomposing Language Models With Dictionary Learning. Transformer Circuits Thread (2023). https:\/\/transformer-circuits.pub\/2023\/monosemantic-features\/index.html."},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581268"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","unstructured":"Carrie\u00a0J. Cai Samantha Winter David Steiner Lauren Wilcox and Michael Terry. 2019. \"Hello AI\": Uncovering the Onboarding Needs of Medical Practitioners for Human-AI Collaborative Decision-Making. Proceedings of the ACM on Human-Computer Interaction 3 CSCW (2019) 104:1\u2013104:24. 10.1145\/3359206","DOI":"10.1145\/3359206"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3514094.3534162"},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"publisher","unstructured":"Steve Campbell Melanie Greenwood Sarah Prior Toniele Shearer Kerrie Walkem Sarah Young Danielle Bywaters and Kim Walker. 2020. Purposive sampling: complex or simple? Research case examples. Journal of Research in Nursing 25 8 (2020) 652\u2013661. 10.1177\/1744987120927206","DOI":"10.1177\/1744987120927206"},{"key":"e_1_3_3_3_25_2","unstructured":"Brandon Carter Siddhartha Jain Jonas\u00a0W Mueller and David Gifford. 2021. Overinterpretation reveals image classification model pathologies. Annual Conference on Neural Information Processing Systems (NeurIPS) 34 (2021) 15395\u201315407."},{"key":"e_1_3_3_3_26_2","first-page":"567","volume-title":"International Conference on Artificial Intelligence and Statistics (AISTATS)","author":"Carter Brandon","year":"2019","unstructured":"Brandon Carter, Jonas Mueller, Siddhartha Jain, and David\u00a0K. Gifford. 2019. What made you do this? Understanding black-box decisions with sufficient input subsets. In International Conference on Artificial Intelligence and Statistics (AISTATS). PMLR, 567\u2013576."},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","unstructured":"Shan Carter Zan Armstrong Ludwig Schubert Ian Johnson and Chris Olah. 2019. Activation Atlas. Distill (2019). 10.23915\/distill.00015https:\/\/distill.pub\/2019\/activation-atlas.","DOI":"10.23915\/distill.00015"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","unstructured":"Shan Carter Zan Armstrong Ludwig Schubert Ian Johnson and Chris Olah. 2019. Activation Atlas. Distill (2019). 10.23915\/distill.00015https:\/\/distill.pub\/2019\/activation-atlas.","DOI":"10.23915\/distill.00015"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","unstructured":"Donna\u00a0J. Cartwright. 2013. ICD-9-CM to ICD-10-CM Codes: What? Why? How? Advances in Wound Care 2 10 (2013) 588\u2013592. 10.1089\/wound.2013.0478","DOI":"10.1089\/wound.2013.0478"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","unstructured":"Thomas\u00a0Y. Chen Biprateep Dey Aishik Ghosh Michael Kagan Brian Nord and Nesar Ramachandra. 2022. Interpretable Uncertainty Quantification in AI for HEP. CoRR abs\/2208.03284 (2022). 10.48550\/ARXIV.2208.03284 arXiv:https:\/\/arXiv.org\/abs\/2208.03284","DOI":"10.48550\/ARXIV.2208.03284"},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642775"},{"key":"e_1_3_3_3_32_2","volume-title":"The Foundations of Social Research: Meaning and Perspective in the Research Process","author":"Crotty Michael","year":"1998","unstructured":"Michael Crotty. 1998. The Foundations of Social Research: Meaning and Perspective in the Research Process. SAGE Publications."},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581026"},{"key":"e_1_3_3_3_35_2","first-page":"4171","volume-title":"Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT). ACL, 4171\u20134186."},{"key":"e_1_3_3_3_36_2","volume-title":"Dewey Decimal Classification and Relative Index (23 ed.)","author":"Dewey Melvil","year":"2011","unstructured":"Melvil Dewey. 2011. Dewey Decimal Classification and Relative Index (23 ed.). OCLC Online Computer Library Center, Inc."},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","unstructured":"Hang Dong Mat\u00fa\u0161 Falis William Whiteley Beatrice Alex Joshua Matterson Shaoxiong Ji Jiaoyan Chen and Honghan Wu. 2022. Automated clinical coding: what why and where we are? NPJ digital medicine 5 1 (2022) 159. 10.1038\/S41746-022-00705-7","DOI":"10.1038\/S41746-022-00705-7"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","unstructured":"Finale Doshi-Velez and Been Kim. 2017. Towards A Rigorous Science of Interpretable Machine Learning. 10.48550\/arXiv.1702.08608 arxiv:https:\/\/arXiv.org\/abs\/1702.08608\u00a0[stat.ML]","DOI":"10.48550\/arXiv.1702.08608"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591918"},{"key":"e_1_3_3_3_40_2","unstructured":"Nelson Elhage Tristan Hume Catherine Olsson Nicholas Schiefer Tom Henighan Shauna Kravec Zac Hatfield-Dodds Robert Lasenby Dawn Drain Carol Chen Roger Grosse Sam McCandlish Jared Kaplan Dario Amodei Martin Wattenberg and Christopher Olah. 2022. Toy Models of Superposition. Transformer Circuits Thread (2022). https:\/\/transformer-circuits.pub\/2022\/toy_model\/index.html."},{"key":"e_1_3_3_3_41_2","unstructured":"Nelson Elhage Neel Nanda Catherine Olsson Tom Henighan Nicholas Joseph Ben Mann Amanda Askell Yuntao Bai Anna Chen Tom Conerly Nova DasSarma Dawn Drain Deep Ganguli Zac Hatfield-Dodds Danny Hernandez Andy Jones Jackson Kernion Liane Lovitt Kamal Ndousse Dario Amodei Tom Brown Jack Clark Jared Kaplan Sam McCandlish and Chris Olah. 2021. A Mathematical Framework for Transformer Circuits. Transformer Circuits Thread (2021). https:\/\/transformer-circuits.pub\/2021\/framework\/index.html."},{"key":"e_1_3_3_3_42_2","unstructured":"Dumitru Erhan Yoshua Bengio Aaron Courville and Pascal Vincent. 2009. Visualizing higher-layer features of a deep network. University of Montreal 1341 3 (2009) 1."},{"key":"e_1_3_3_3_43_2","doi-asserted-by":"publisher","unstructured":"Kevin\u00a0W Eva. 2005. What every teacher needs to know about clinical reasoning. Medical Education 39 1 (2005) 98\u2013106. 10.1111\/j.1365-2929.2004.01972.x","DOI":"10.1111\/j.1365-2929.2004.01972.x"},{"key":"e_1_3_3_3_44_2","unstructured":"Andrew Feenberg. 2005. Critical Theory of Technology: An Overview. Tailoring Biotechnologies 1 (01 2005) 47\u201364."},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7287.001.0001"},{"key":"e_1_3_3_3_46_2","volume-title":"Power\/Knowledge: Selected Interviews and Other Writings","author":"Foucault Michel","year":"1972","unstructured":"Michel Foucault. 1972\u20131977. Power\/Knowledge: Selected Interviews and Other Writings. Pantheon Books, New York, NY, USA."},{"key":"e_1_3_3_3_47_2","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3695311"},{"key":"e_1_3_3_3_48_2","series-title":"JMLR Workshop and Conference Proceedings","first-page":"1050","volume-title":"International Conference on Machine Learning (ICML)","volume":"48","author":"Gal Yarin","year":"2016","unstructured":"Yarin Gal and Zoubin Ghahramani. 2016. Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning. In International Conference on Machine Learning (ICML)(JMLR Workshop and Conference Proceedings, Vol.\u00a048). JMLR.org, 1050\u20131059."},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"publisher","unstructured":"Timnit Gebru Jamie Morgenstern Briana Vecchione Jennifer\u00a0Wortman Vaughan Hanna\u00a0M. Wallach Hal\u00a0Daum\u00e9 III and Kate Crawford. 2021. Datasheets for datasets. Commun. ACM 64 12 (2021) 86\u201392. 10.1145\/3458723","DOI":"10.1145\/3458723"},{"key":"e_1_3_3_3_50_2","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS)","author":"Geirhos Robert","year":"2020","unstructured":"Robert Geirhos, Kristof Meding, and Felix\u00a0A. Wichmann. 2020. Beyond accuracy: quantifying trial-by-trial behaviour of CNNs and humans by measuring error consistency. In Annual Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","unstructured":"Amirata Ghorbani James Wexler and Been Kim. 2019. Automating Interpretability: Discovering and Testing Visual Concepts Learned by Neural Networks. CoRR abs\/1902.03129 (2019). 10.48550\/arXiv.1902.03129 arXiv:https:\/\/arXiv.org\/abs\/1902.03129","DOI":"10.48550\/arXiv.1902.03129"},{"key":"e_1_3_3_3_52_2","series-title":"Proceedings of Machine Learning Research","first-page":"1321","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"70","author":"Guo Chuan","year":"2017","unstructured":"Chuan Guo, Geoff Pleiss, Yu Sun, and Kilian\u00a0Q. Weinberger. 2017. On Calibration of Modern Neural Networks. In Proceedings of the International Conference on Machine Learning (ICML)(Proceedings of Machine Learning Research, Vol.\u00a070). PMLR, 1321\u20131330."},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.452"},{"key":"e_1_3_3_3_54_2","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS)","author":"Hanna Michael","year":"2023","unstructured":"Michael Hanna, Ollie Liu, and Alexandre Variengien. 2023. How does GPT-2 compute greater-than?: Interpreting mathematical abilities in a pre-trained language model. In Annual Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2304.00740"},{"key":"e_1_3_3_3_58_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Hernandez Evan","year":"2022","unstructured":"Evan Hernandez, Sarah Schwettmann, David Bau, Teona Bagashvili, Antonio Torralba, and Jacob Andreas. 2022. Natural Language Descriptions of Deep Visual Features. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/N19-1419"},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"publisher","unstructured":"Cody\u00a0E. Hinchliff Stephen\u00a0A. Smith James\u00a0F. Allman J.\u00a0Gordon Burleigh Ruchi Chaudhary Lyndon\u00a0M. Coghill Keith\u00a0A. Crandall Jiabin Deng Bryan\u00a0T. Drew Romina Gazis Karl Gude David\u00a0S. Hibbett Laura\u00a0A. Katz H.\u00a0Dail Laughinghouse Emily\u00a0Jane McTavish Peter\u00a0E. Midford Christopher\u00a0L. Owen Richard\u00a0H. Ree Jonathan\u00a0A. Rees Douglas\u00a0E. Soltis Tiffani Williams and Karen\u00a0A. Cranston. 2015. Synthesis of phylogeny and taxonomy into a comprehensive tree of life. Proceedings of the National Academy of Sciences 112 41 (2015) 12764\u201312769. 10.1073\/pnas.1423041112","DOI":"10.1073\/pnas.1423041112"},{"key":"e_1_3_3_3_61_2","doi-asserted-by":"publisher","unstructured":"Geoffrey\u00a0E. Hinton Oriol Vinyals and Jeffrey Dean. 2015. Distilling the Knowledge in a Neural Network. CoRR abs\/1503.02531 (2015). 10.48550\/arXiv.1503.02531 arXiv:https:\/\/arXiv.org\/abs\/1503.02531","DOI":"10.48550\/arXiv.1503.02531"},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"publisher","DOI":"10.2200\/S01125ED1V01Y202109DSK022"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642628"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.45"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","unstructured":"Eyke H\u00fcllermeier and Willem Waegeman. 2021. Aleatoric and epistemic uncertainty in machine learning: an introduction to concepts and methods. Machine Learning 110 3 (2021) 457\u2013506. 10.1007\/S10994-021-05946-3","DOI":"10.1007\/S10994-021-05946-3"},{"key":"e_1_3_3_3_66_2","first-page":"71","volume-title":"Joint Conference on Lexical and Computational Semantics","author":"Hulpus Ioana","year":"2020","unstructured":"Ioana Hulpus, Jonathan Kobbe, Heiner Stuckenschmidt, and Graeme Hirst. 2020. Knowledge Graphs meet Moral Values. In Joint Conference on Lexical and Computational Semantics. ACL, 71\u201380."},{"key":"e_1_3_3_3_67_2","doi-asserted-by":"publisher","unstructured":"Shaoxiong Ji Shirui Pan E. Cambria Pekka Marttinen and Philip\u00a0S. Yu. 2022. A Survey on Knowledge Graphs: Representation Acquisition and Applications. IEEE Transactions on Neural Networks and Learning Systems 33 (2022) 494\u2013514. 10.1109\/TNNLS.2021.3070843","DOI":"10.1109\/TNNLS.2021.3070843"},{"key":"e_1_3_3_3_68_2","doi-asserted-by":"publisher","unstructured":"Alistair Johnson Tom Pollard and Roger Mark. 2016. MIMIC-III Clinical Database (version 1.4). 10.13026\/C2XW26","DOI":"10.13026\/C2XW26"},{"key":"e_1_3_3_3_69_2","doi-asserted-by":"publisher","unstructured":"Alistair\u00a0EW Johnson Tom\u00a0J Pollard Lu Shen Li-wei\u00a0H Lehman Mengling Feng Mohammad Ghassemi Benjamin Moody Peter Szolovits Leo Anthony\u00a0Celi and Roger\u00a0G Mark. 2016. MIMIC-III a freely accessible critical care database. Scientific data 3 1 (2016) 1\u20139. 10.1038\/sdata.2016.35","DOI":"10.1038\/sdata.2016.35"},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"publisher","unstructured":"Charles Jones Daniel\u00a0C. Castro Fabio De\u00a0Sousa Ribeiro Ozan Oktay Melissa\u00a0D. McCradden and Ben Glocker. 2023. No Fair Lunch: A Causal Perspective on Dataset Bias in Machine Learning for Medical Imaging. CoRR abs\/2307.16526 (2023). 10.48550\/ARXIV.2307.16526 arXiv:https:\/\/arXiv.org\/abs\/2307.16526","DOI":"10.48550\/ARXIV.2307.16526"},{"key":"e_1_3_3_3_71_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2023.ACL-LONG.307"},{"key":"e_1_3_3_3_72_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00505"},{"key":"e_1_3_3_3_73_2","first-page":"2668","volume-title":"International Conference on Machine Learning (ICML)","author":"Kim Been","year":"2018","unstructured":"Been Kim, Martin Wattenberg, Justin Gilmer, Carrie Cai, James Wexler, Fernanda Viegas, and Rory Sayres. 2018. Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV). In International Conference on Machine Learning (ICML). PMLR, 2668\u20132677."},{"key":"e_1_3_3_3_74_2","series-title":"Proceedings of Machine Learning Research","first-page":"2673","volume-title":"International Conference on Machine Learning (ICML)","volume":"80","author":"Kim Been","year":"2018","unstructured":"Been Kim, Martin Wattenberg, Justin Gilmer, Carrie\u00a0J. Cai, James Wexler, Fernanda\u00a0B. Vi\u00e9gas, and Rory Sayres. 2018. Interpretability Beyond Feature Attribution: Quantitative Testing with Concept Activation Vectors (TCAV). In International Conference on Machine Learning (ICML)(Proceedings of Machine Learning Research, Vol.\u00a080). PMLR, 2673\u20132682."},{"key":"e_1_3_3_3_75_2","doi-asserted-by":"publisher","unstructured":"Armen\u00a0Der Kiureghian and Ove Ditlevsen. 2009. Aleatory or epistemic? Does it matter? Structural Safety 31 2 (2009) 105\u2013112. 10.1016\/j.strusafe.2008.06.020Risk Acceptance and Risk Communication.","DOI":"10.1016\/j.strusafe.2008.06.020"},{"key":"e_1_3_3_3_76_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658543"},{"key":"e_1_3_3_3_77_2","series-title":"Proceedings of Machine Learning Research","first-page":"1885","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","volume":"70","author":"Koh Pang\u00a0Wei","year":"2017","unstructured":"Pang\u00a0Wei Koh and Percy Liang. 2017. Understanding Black-box Predictions via Influence Functions. In Proceedings of the International Conference on Machine Learning (ICML)(Proceedings of Machine Learning Research, Vol.\u00a070). PMLR, 1885\u20131894."},{"key":"e_1_3_3_3_78_2","unstructured":"Alex Krizhevsky Geoffrey Hinton et\u00a0al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_3_3_79_2","first-page":"3787","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS)","author":"Kumar Ananya","year":"2019","unstructured":"Ananya Kumar, Percy Liang, and Tengyu Ma. 2019. Verified Uncertainty Calibration. In Annual Conference on Neural Information Processing Systems (NeurIPS). 3787\u20133798."},{"key":"e_1_3_3_3_80_2","first-page":"6402","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS)","author":"Lakshminarayanan Balaji","year":"2017","unstructured":"Balaji Lakshminarayanan, Alexander Pritzel, and Charles Blundell. 2017. Simple and Scalable Predictive Uncertainty Estimation using Deep Ensembles. In Annual Conference on Neural Information Processing Systems (NeurIPS). 6402\u20136413."},{"key":"e_1_3_3_3_81_2","doi-asserted-by":"publisher","unstructured":"Michelle\u00a0S Lam Mitchell\u00a0L Gordon Dana\u00eb Metaxa Jeffrey\u00a0T Hancock James\u00a0A Landay and Michael\u00a0S Bernstein. 2022. End-user audits: A system empowering communities to lead large-scale investigations of harmful algorithmic behavior. Proceedings of the ACM on Human-Computer Interaction 6 CSCW2 (2022) 1\u201334. 10.1145\/3555625","DOI":"10.1145\/3555625"},{"key":"e_1_3_3_3_82_2","unstructured":"Fred Lambert. [n. d.]. Understanding the fatal Tesla accident on Autopilot and the NHTSA probe. electrek ([n. d.]). https:\/\/electrek.co\/2016\/07\/01\/understanding-fatal-tesla-accident-autopilot-nhtsa-probe\/"},{"key":"e_1_3_3_3_83_2","first-page":"27094","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS)","author":"Langlois Thomas\u00a0A.","year":"2021","unstructured":"Thomas\u00a0A. Langlois, H.\u00a0Charles Zhao, Erin Grant, Ishita Dasgupta, Thomas\u00a0L. Griffiths, and Nori Jacoby. 2021. Passive attention in artificial neural networks predicts human visual selectivity. In Annual Conference on Neural Information Processing Systems (NeurIPS). 27094\u201327106."},{"key":"e_1_3_3_3_84_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2021.ACL-LONG.143"},{"key":"e_1_3_3_3_85_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Li Kenneth","year":"2023","unstructured":"Kenneth Li, Aspen\u00a0K. Hopkins, David Bau, Fernanda\u00a0B. Vi\u00e9gas, Hanspeter Pfister, and Martin Wattenberg. 2023. Emergent World Representations: Exploring a Sequence Model Trained on a Synthetic Task. In International Conference on Learning Representations (ICLR). OpenReview.net."},{"key":"e_1_3_3_3_86_2","unstructured":"Johnny Lin. 2023. Neuronpedia: Interactive Reference and Tooling for Analyzing Neural Networks. https:\/\/www.neuronpedia.org"},{"key":"e_1_3_3_3_87_2","doi-asserted-by":"publisher","DOI":"10.5962\/bhl.title.156783"},{"key":"e_1_3_3_3_88_2","volume-title":"Abstraction and Specification in Program Development (2 ed.)","author":"Liskov Barbara","year":"1986","unstructured":"Barbara Liskov and John\u00a0V. Guttag. 1986. Abstraction and Specification in Program Development (2 ed.). Vol.\u00a020. MIT press Cambridge."},{"key":"e_1_3_3_3_89_2","doi-asserted-by":"publisher","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692 (2019). 10.48550\/arXiv.1907.11692 arXiv:https:\/\/arXiv.org\/abs\/1907.11692","DOI":"10.48550\/arXiv.1907.11692"},{"key":"e_1_3_3_3_90_2","unstructured":"Jack Merullo Carsten Eickhoff and Ellie Pavlick. 2023. Circuit component reuse across tasks in transformer language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.08744 (2023)."},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"publisher","unstructured":"George\u00a0A. Miller. 1995. WordNet: A Lexical Database for English. Commun. ACM 38 11 (1995) 39\u201341. 10.1145\/219717.219748","DOI":"10.1145\/219717.219748"},{"key":"e_1_3_3_3_92_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2020.EMNLP-MAIN.466"},{"key":"e_1_3_3_3_93_2","volume-title":"Interpretable Machine Learning (2 ed.)","author":"Molnar Christoph","year":"2022","unstructured":"Christoph Molnar. 2022. Interpretable Machine Learning (2 ed.). https:\/\/christophm.github.io\/interpretable-ml-book"},{"key":"e_1_3_3_3_94_2","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372850"},{"key":"e_1_3_3_3_95_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1100"},{"key":"e_1_3_3_3_96_2","doi-asserted-by":"crossref","unstructured":"Mark\u00a0A Musen. 1992. Dimensions of knowledge sharing and reuse. Computers and biomedical research 25 5 (1992) 435\u2013467.","DOI":"10.1016\/0010-4809(92)90003-S"},{"key":"e_1_3_3_3_97_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Muttenthaler Lukas","year":"2023","unstructured":"Lukas Muttenthaler, Jonas Dippel, Lorenz Linhardt, Robert\u00a0A. Vandermeulen, and Simon Kornblith. 2023. Human alignment of neural network representations. In International Conference on Learning Representations (ICLR). OpenReview.net."},{"key":"e_1_3_3_3_98_2","doi-asserted-by":"publisher","unstructured":"Lukas Muttenthaler Klaus Greff Frieda Born Bernhard Spitzer Simon Kornblith Michael\u00a0C. Mozer Klaus-Robert M\u00fcller Thomas Unterthiner and Andrew\u00a0K. Lampinen. 2024. Aligning Machine and Human Visual Representations across Abstraction Levels. CoRR abs\/2409.06509 (2024). 10.48550\/ARXIV.2409.06509 arXiv:https:\/\/arXiv.org\/abs\/2409.06509","DOI":"10.48550\/ARXIV.2409.06509"},{"key":"e_1_3_3_3_99_2","unstructured":"Neel Nanda. 2024. Actually Othello-GPT Has A Linear Emergent World Representation. Transformer Circuits Thread (2024)."},{"key":"e_1_3_3_3_100_2","doi-asserted-by":"publisher","unstructured":"Zabir\u00a0Al Nazi and Wei Peng. 2024. Large Language Models in Healthcare and Medical Domain: A Review. Informatics 11 3 (2024) 57. 10.3390\/INFORMATICS11030057","DOI":"10.3390\/INFORMATICS11030057"},{"key":"e_1_3_3_3_101_2","volume-title":"Neural Information Processing Systems (NeurIPS) Track on Datasets and Benchmarks","author":"Northcutt Curtis\u00a0G.","year":"2021","unstructured":"Curtis\u00a0G. Northcutt, Anish Athalye, and Jonas Mueller. 2021. Pervasive Label Errors in Test Sets Destabilize Machine Learning Benchmarks. In Neural Information Processing Systems (NeurIPS) Track on Datasets and Benchmarks."},{"key":"e_1_3_3_3_102_2","unstructured":"N. Noy and Deborah Mcguinness. 2001. Ontology Development 101: A Guide to Creating Your First Ontology. Knowledge Systems Laboratory 32 (01 2001)."},{"key":"e_1_3_3_3_103_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Oikarinen Tuomas\u00a0P.","year":"2023","unstructured":"Tuomas\u00a0P. Oikarinen and Tsui-Wei Weng. 2023. CLIP-Dissect: Automatic Description of Neuron Representations in Deep Vision Networks. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_3_104_2","doi-asserted-by":"publisher","unstructured":"Kerem Oktar Ilia Sucholutsky Tania Lombrozo and Thomas\u00a0L. Griffiths. 2023. Dimensions of Disagreement: Unpacking Divergence and Misalignment in Cognitive Science and Artificial Intelligence. CoRR abs\/2310.12994 (2023). 10.48550\/ARXIV.2310.12994 arXiv:https:\/\/arXiv.org\/abs\/2310.12994","DOI":"10.48550\/ARXIV.2310.12994"},{"key":"e_1_3_3_3_105_2","doi-asserted-by":"publisher","unstructured":"Chris Olah Nick Cammarata Ludwig Schubert Gabriel Goh Michael Petrov and Shan Carter. 2020. Zoom In: An Introduction to Circuits. Distill (2020). 10.23915\/distill.00024.001https:\/\/distill.pub\/2020\/circuits\/zoom-in.","DOI":"10.23915\/distill.00024.001"},{"key":"e_1_3_3_3_106_2","doi-asserted-by":"publisher","unstructured":"Chris Olah Alexander Mordvintsev and Ludwig Schubert. 2017. Feature Visualization. Distill (2017). 10.23915\/distill.00007https:\/\/distill.pub\/2017\/feature-visualization.","DOI":"10.23915\/distill.00007"},{"key":"e_1_3_3_3_107_2","doi-asserted-by":"publisher","unstructured":"Chris Olah Arvind Satyanarayan Ian Johnson Shan Carter Ludwig Schubert Katherine Ye and Alexander Mordvintsev. 2018. The Building Blocks of Interpretability. Distill (2018). 10.23915\/distill.00010https:\/\/distill.pub\/2018\/building-blocks.","DOI":"10.23915\/distill.00010"},{"key":"e_1_3_3_3_108_2","doi-asserted-by":"crossref","unstructured":"Kimberly O\u2019Malley Karon\u00a0F. Cook Matt\u00a0D. Price Kimberly\u00a0Raiford Wildes John\u00a0F. Hurdle and Carol\u00a0M. Ashton. 2005. Measuring diagnoses: ICD code accuracy. Health services research 40 5 Pt 2 (2005) 1620\u201339.","DOI":"10.1111\/j.1475-6773.2005.00444.x"},{"key":"e_1_3_3_3_109_2","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS)","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll\u00a0L. Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, John Schulman, Jacob Hilton, Fraser Kelton, Luke Miller, Maddie Simens, Amanda Askell, Peter Welinder, Paul\u00a0F. Christiano, Jan Leike, and Ryan Lowe. 2022. Training language models to follow instructions with human feedback. In Annual Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_3_3_110_2","doi-asserted-by":"publisher","unstructured":"Xiny Pan Daniel Hern\u00e1ndez Philipp Seifer Ralf L\u00e4mmel and Steffen Staab. 2024. eSPARQL: Representing and Reconciling Agnostic and Atheistic Beliefs in RDF-star Knowledge Graphs. CoRR abs\/2407.21483 (2024). 10.48550\/ARXIV.2407.21483","DOI":"10.48550\/ARXIV.2407.21483"},{"key":"e_1_3_3_3_111_2","first-page":"8024","volume-title":"Annual Conference on Neural Information Processing Systems (NeurIPS)","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward\u00a0Z. Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Annual Conference on Neural Information Processing Systems (NeurIPS). 8024\u20138035."},{"key":"e_1_3_3_3_112_2","doi-asserted-by":"publisher","unstructured":"Amandalynne Paullada Inioluwa\u00a0Deborah Raji Emily\u00a0M. Bender Emily Denton and Alex Hanna. 2021. Data and its (dis)contents: A survey of dataset development and use in machine learning research. Patterns 2 11 (2021) 100336. 10.1016\/J.PATTER.2021.100336","DOI":"10.1016\/J.PATTER.2021.100336"},{"key":"e_1_3_3_3_113_2","doi-asserted-by":"publisher","DOI":"10.24432\/C5201W"},{"key":"e_1_3_3_3_114_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/D19-1250"},{"key":"e_1_3_3_3_115_2","first-page":"151","volume-title":"British Machine Vision Conference (BMVC)","author":"Petsiuk Vitali","year":"2018","unstructured":"Vitali Petsiuk, Abir Das, and Kate Saenko. 2018. RISE: Randomized Input Sampling for Explanation of Black-box Models. In British Machine Vision Conference (BMVC). BMVA Press, 151."},{"key":"e_1_3_3_3_116_2","series-title":"Proceedings of Machine Learning Research","first-page":"5142","volume-title":"International Conference on Machine Learning (ICML)","volume":"97","author":"Phuong Mary","year":"2019","unstructured":"Mary Phuong and Christoph Lampert. 2019. Towards Understanding Knowledge Distillation. In International Conference on Machine Learning (ICML)(Proceedings of Machine Learning Research, Vol.\u00a097). PMLR, 5142\u20135151."},{"key":"e_1_3_3_3_117_2","volume-title":"The Tacit Dimension","author":"Polanyi Michael","year":"1966","unstructured":"Michael Polanyi. 1966. The Tacit Dimension. Routledge & Kegan Paul, London."},{"key":"e_1_3_3_3_118_2","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. OpenAI (2019)."},{"key":"e_1_3_3_3_119_2","doi-asserted-by":"crossref","unstructured":"Arun Rai. 2020. Explainable AI: From black box to glass box. Journal of the Academy of Marketing Science 48 1 (2020) 137\u2013141.","DOI":"10.1007\/s11747-019-00710-5"},{"key":"e_1_3_3_3_120_2","doi-asserted-by":"publisher","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. CoRR abs\/2204.06125 (2022). 10.48550\/ARXIV.2204.06125 arXiv:https:\/\/arXiv.org\/abs\/2204.06125","DOI":"10.48550\/ARXIV.2204.06125"},{"key":"e_1_3_3_3_121_2","doi-asserted-by":"publisher","unstructured":"Sunayana Rane Polyphony\u00a0J. Bruna Ilia Sucholutsky Christopher\u00a0T. Kello and Thomas\u00a0L. Griffiths. 2024. Concept Alignment. CoRR abs\/2401.08672 (2024). 10.48550\/ARXIV.2401.08672 arXiv:https:\/\/arXiv.org\/abs\/2401.08672","DOI":"10.48550\/ARXIV.2401.08672"},{"key":"e_1_3_3_3_122_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.690"},{"key":"e_1_3_3_3_123_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580790"},{"key":"e_1_3_3_3_124_2","doi-asserted-by":"crossref","unstructured":"Nat\u00e1lia Villa\u00a0Nova Rodrigues Luis\u00a0Raul Abramo and Nina Sumiko\u00a0Tomita Hirata. 2021. The information of attribute uncertainties: what convolutional neural networks can learn about errors in input data. Machine Learning: Science and Technology 4 (2021).","DOI":"10.1088\/2632-2153\/ad0285"},{"key":"e_1_3_3_3_125_2","volume-title":"Principles of categorization","author":"Rosch Eleanor","year":"2002","unstructured":"Eleanor Rosch. 2002. Principles of categorization. MIT Press, Cambridge, MA, USA. 251\u2013270 pages."},{"key":"e_1_3_3_3_126_2","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/8842.003.0006"},{"key":"e_1_3_3_3_127_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533127"},{"key":"e_1_3_3_3_128_2","doi-asserted-by":"publisher","unstructured":"Lisa Schut Nenad Tomasev Tom McGrath Demis Hassabis Ulrich Paquet and Been Kim. 2023. Bridging the Human-AI Knowledge Gap: Concept Discovery and Transfer in AlphaZero. CoRR abs\/2310.16410 (2023). 10.48550\/ARXIV.2310.16410","DOI":"10.48550\/ARXIV.2310.16410"},{"key":"e_1_3_3_3_129_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.74"},{"key":"e_1_3_3_3_130_2","doi-asserted-by":"publisher","unstructured":"Claude\u00a0Elwood Shannon. 1948. A mathematical theory of communication. The Bell System Technical Journal 27 3 (1948) 379\u2013423. 10.1002\/J.1538-7305.1948.TB01338.X","DOI":"10.1002\/J.1538-7305.1948.TB01338.X"},{"key":"e_1_3_3_3_131_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2021.EMNLP-MAIN.757"},{"key":"e_1_3_3_3_132_2","doi-asserted-by":"publisher","unstructured":"Zoe\u00a0De Simone Angie\u00a0W. Boggust Arvind Satyanarayan and Ashia Wilson. 2023. What is a Fair Diffusion Model? Designing Generative Text-To-Image Models to Incorporate Various Worldviews. CoRR abs\/2309.09944 (2023). 10.48550\/ARXIV.2309.09944 arXiv:https:\/\/arXiv.org\/abs\/2309.09944","DOI":"10.48550\/ARXIV.2309.09944"},{"key":"e_1_3_3_3_133_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511137"},{"key":"e_1_3_3_3_134_2","doi-asserted-by":"publisher","unstructured":"Daniel Smilkov Nikhil Thorat Been Kim Fernanda\u00a0B. Vi\u00e9gas and Martin Wattenberg. 2017. SmoothGrad: removing noise by adding noise. 10.48550\/arXiv.1706.03825 arXiv:https:\/\/arXiv.org\/abs\/1706.03825","DOI":"10.48550\/arXiv.1706.03825"},{"key":"e_1_3_3_3_135_2","doi-asserted-by":"publisher","unstructured":"Jan-Tobias Sohns Christoph Garth and Heike Leitte. 2023. Decision Boundary Visualization for Counterfactual Reasoning. Computer Graphics Forum 42 1 (2023) 7\u201320. 10.1111\/CGF.14650","DOI":"10.1111\/CGF.14650"},{"key":"e_1_3_3_3_136_2","doi-asserted-by":"publisher","unstructured":"Hendrik Strobelt Sebastian Gehrmann Michael Behrisch Adam Perer Hanspeter Pfister and Alexander\u00a0M Rush. 2018. Seq2seq-vis: A visual debugging tool for sequence-to-sequence models. IEEE Transactions on Visualization and Computer Graphics 25 1 (2018) 353\u2013363. 10.1109\/TVCG.2018.2865044","DOI":"10.1109\/TVCG.2018.2865044"},{"key":"e_1_3_3_3_137_2","doi-asserted-by":"publisher","unstructured":"Ilia Sucholutsky Lukas Muttenthaler Adrian Weller Andi Peng Andreea Bobu Been Kim Bradley\u00a0C. Love Erin Grant Jascha Achterberg Joshua\u00a0B. Tenenbaum Katherine\u00a0M. Collins Katherine\u00a0L. Hermann Kerem Oktar Klaus Greff Martin\u00a0N. Hebart Nori Jacoby Qiuyi Zhang Raja Marjieh Robert Geirhos Sherol Chen Simon Kornblith Sunayana Rane Talia Konkle Thomas\u00a0P. O\u2019Connell Thomas Unterthiner Andrew\u00a0K. Lampinen Klaus-Robert M\u00fcller Mariya Toneva and Thomas\u00a0L. Griffiths. 2023. Getting aligned on representational alignment. CoRR abs\/2310.13018 (2023). 10.48550\/ARXIV.2310.13018 arXiv:https:\/\/arXiv.org\/abs\/2310.13018","DOI":"10.48550\/ARXIV.2310.13018"},{"key":"e_1_3_3_3_138_2","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-48005-6_18"},{"key":"e_1_3_3_3_139_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445088"},{"key":"e_1_3_3_3_140_2","doi-asserted-by":"publisher","DOI":"10.1145\/3465416.3483305"},{"key":"e_1_3_3_3_141_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581482"},{"key":"e_1_3_3_3_142_2","series-title":"JMLR Workshop and Conference Proceedings","first-page":"1139","volume-title":"International Conference on Machine Learning (ICML)","volume":"28","author":"Sutskever Ilya","year":"2013","unstructured":"Ilya Sutskever, James Martens, George\u00a0E. Dahl, and Geoffrey\u00a0E. Hinton. 2013. On the importance of initialization and momentum in deep learning. In International Conference on Machine Learning (ICML)(JMLR Workshop and Conference Proceedings, Vol.\u00a028). JMLR.org, 1139\u20131147."},{"key":"e_1_3_3_3_143_2","unstructured":"Adly Templeton Tom Conerly Jonathan Marcus Jack Lindsey Trenton Bricken Brian Chen Adam Pearce Craig Citro Emmanuel Ameisen Andy Jones Hoagy Cunningham Nicholas\u00a0L Turner Callum McDougall Monte MacDiarmid C.\u00a0Daniel Freeman Theodore\u00a0R. Sumers Edward Rees Joshua Batson Adam Jermyn Shan Carter Chris Olah and Tom Henighan. 2024. Scaling Monosemanticity: Extracting Interpretable Features from Claude 3 Sonnet. Transformer Circuits Thread (2024). https:\/\/transformer-circuits.pub\/2024\/scaling-monosemanticity\/index.html"},{"key":"e_1_3_3_3_144_2","doi-asserted-by":"publisher","unstructured":"Michael Terry Chinmay Kulkarni Martin Wattenberg Lucas Dixon and Meredith\u00a0Ringel Morris. 2023. AI Alignment in the Design of Interactive AI: Specification Alignment Process Alignment and Evaluation Support. CoRR abs\/2311.00710 (2023). 10.48550\/ARXIV.2311.00710 arXiv:https:\/\/arXiv.org\/abs\/2311.00710","DOI":"10.48550\/ARXIV.2311.00710"},{"key":"e_1_3_3_3_145_2","unstructured":"Tesla. [n. d.]. A Tragic Loss. https:\/\/www.tesla.com\/es_mx\/blog\/tragic-loss. Accessed: 2024-06-16."},{"key":"e_1_3_3_3_146_2","doi-asserted-by":"publisher","unstructured":"Yonglong Tian Dilip Krishnan and Phillip Isola. 2019. Contrastive Representation Distillation. CoRR abs\/1910.10699 (2019). 10.48550\/arXiv.1910.10699 arXiv:https:\/\/arXiv.org\/abs\/1910.10699","DOI":"10.48550\/arXiv.1910.10699"},{"key":"e_1_3_3_3_147_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"e_1_3_3_3_148_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-88564-1_2"},{"key":"e_1_3_3_3_149_2","unstructured":"Edward Tufte. 2006. Beautiful Evidence."},{"key":"e_1_3_3_3_150_2","unstructured":"Julie Vaughn Avital Baral Mayukha Vadari and William Boag. 2020. Dataset Bias in Diagnostic AI systems: Guidelines for Dataset Collection and Usage. ACM Conference in Health Inference and Learning Workshop (2020)."},{"key":"e_1_3_3_3_151_2","unstructured":"Bret Victor. 2011. Up and Down the Ladder of Abstraction; A Systematic Approach to Interactive Visualization. https:\/\/worrydream.com\/LadderOfAbstraction\/."},{"key":"e_1_3_3_3_152_2","doi-asserted-by":"publisher","unstructured":"Hanjing Wang and Qiang Ji. 2024. Epistemic Uncertainty Quantification for Pretrained Neural Networks. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2024) 11052\u201311061. 10.1109\/CVPR52733.2024.01051","DOI":"10.1109\/CVPR52733.2024.01051"},{"key":"e_1_3_3_3_153_2","unstructured":"Kevin Wang Alexandre Variengien Arthur Conmy Buck Shlegeris and Jacob Steinhardt. 2022. Interpretability in the wild: a circuit for indirect object identification in gpt-2 small. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.00593 (2022)."},{"key":"e_1_3_3_3_154_2","doi-asserted-by":"publisher","unstructured":"Martin Wattenberg and Fernanda\u00a0B. Vi\u00e9gas. 2024. Relational Composition in Neural Networks: A Survey and Call to Action. CoRR abs\/2407.14662 (2024). 10.48550\/ARXIV.2407.14662 arXiv:https:\/\/arXiv.org\/abs\/2407.14662","DOI":"10.48550\/ARXIV.2407.14662"},{"key":"e_1_3_3_3_155_2","doi-asserted-by":"publisher","unstructured":"Martin Wattenberg Fernanda Vi\u00e9gas and Ian Johnson. 2016. How to Use t-SNE Effectively. Distill (2016). 10.23915\/distill.00002","DOI":"10.23915\/distill.00002"},{"key":"e_1_3_3_3_156_2","doi-asserted-by":"publisher","unstructured":"James Wexler Mahima Pushkarna Tolga Bolukbasi Martin Wattenberg Fernanda\u00a0B. Vi\u00e9gas and Jimbo Wilson. 2020. The What-If Tool: Interactive Probing of Machine Learning Models. IEEE Transactions on Visualization and Computer Graphics 26 1 (2020) 56\u201365. 10.1109\/TVCG.2019.2934619","DOI":"10.1109\/TVCG.2019.2934619"},{"key":"e_1_3_3_3_157_2","doi-asserted-by":"crossref","unstructured":"Langdon Winner. 1980. Do Artifacts Have Politics? Daedalus 109 1 (1980) 121\u2013136.","DOI":"10.1515\/9783110808681.121"},{"key":"e_1_3_3_3_158_2","volume-title":"International Classification of Diseases, Ninth Revision (ICD-9)","author":"Organization World Health","year":"1978","unstructured":"World Health Organization. 1978. International Classification of Diseases, Ninth Revision (ICD-9). World Health Organization, Geneva, Switzerland."},{"key":"e_1_3_3_3_159_2","volume-title":"International Classification of Diseases, 10th Revision","author":"Organization World Health","year":"2022","unstructured":"World Health Organization. 2022. International Classification of Diseases, 10th Revision. World Health Organization, Geneva, Switzerland."},{"key":"e_1_3_3_3_160_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/P19-1073"},{"key":"e_1_3_3_3_161_2","doi-asserted-by":"publisher","unstructured":"Eiling Yee. 2019. Abstraction and concepts: when how where what and why? Language Cognition and Neuroscience 34 10 (2019) 1257\u20131265. 10.1080\/23273798.2019.1660797","DOI":"10.1080\/23273798.2019.1660797"},{"key":"e_1_3_3_3_162_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2024.ACL-LONG.365"},{"key":"e_1_3_3_3_163_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2023.FINDINGS-EACL.150"},{"key":"e_1_3_3_3_164_2","first-page":"609","volume-title":"International Conference on Machine Learning (ICML)","author":"Zadrozny Bianca","year":"2001","unstructured":"Bianca Zadrozny and Charles Elkan. 2001. Obtaining calibrated probability estimates from decision trees and naive Bayesian classifiers. In International Conference on Machine Learning (ICML). Morgan Kaufmann, 609\u2013616."},{"key":"e_1_3_3_3_165_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-84060-0_20"},{"key":"e_1_3_3_3_166_2","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2022.EMNLP-MAIN.591"},{"key":"e_1_3_3_3_167_2","volume-title":"International Conference on Machine Learning (ICML)","author":"Zhang Muru","year":"2024","unstructured":"Muru Zhang, Ofir Press, William Merrill, Alisa Liu, and Noah\u00a0A. Smith. 2024. How Language Model Hallucinations Can Snowball. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_3_3_168_2","doi-asserted-by":"publisher","unstructured":"Shen Zheng Jie Huang and Kevin Chen-Chuan Chang. 2023. Why Does ChatGPT Fall Short in Providing Truthful Answers? CoRR abs\/2304.10513 (2023). 10.48550\/ARXIV.2304.10513 arXiv:https:\/\/arXiv.org\/abs\/2304.10513","DOI":"10.48550\/ARXIV.2304.10513"}],"event":{"name":"CHI 2025: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713406","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706598.3713406","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T04:56:11Z","timestamp":1751604971000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713406"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":167,"alternative-id":["10.1145\/3706598.3713406","10.1145\/3706598"],"URL":"https:\/\/doi.org\/10.1145\/3706598.3713406","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}