{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:42:37Z","timestamp":1775068957592,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,14]]},"DOI":"10.1145\/3677052.3698612","type":"proceedings-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T06:38:06Z","timestamp":1731566286000},"page":"660-668","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Mechanistic interpretability of large language models with applications to the financial services industry"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0251-4203","authenticated-orcid":false,"given":"Ashkan","family":"Golgoon","sequence":"first","affiliation":[{"name":"Discover Financial Services, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6881-4460","authenticated-orcid":false,"given":"Khashayar","family":"Filom","sequence":"additional","affiliation":[{"name":"Discover Financial Services, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4498-1800","authenticated-orcid":false,"given":"Arjun","family":"Ravi Kannan","sequence":"additional","affiliation":[{"name":"Discover Financial Services, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"U.S. lawmakers take an interest","author":"Bartz Diane","year":"2023","unstructured":"Diane Bartz. 2023. As ChatGPT\u2019s popularity explodes, U.S. lawmakers take an interest.Reuters, https:\/\/www.reuters.com\/technology\/chatgpts-popularity-explodes-us-lawmakers-take-an-interest-2023-02-13."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Samuel\u00a0R. Bowman. 2023. Eight Things to Know about Large Language Models. (2023). arXiv:2304.00612","DOI":"10.1215\/2834703X-11556011"},{"key":"e_1_3_2_1_3_1","unstructured":"Consumer Financial Protection\u00a0Bureau (CFPB). 2012. Unfair Deceptive Or Abusive Acts Or Practices.https:\/\/files.consumerfinance.gov\/f\/documents\/cfpb_unfair-deceptive-abusive-acts-practices-udaaps_procedures_2023-09.pdf."},{"key":"e_1_3_2_1_4_1","unstructured":"Bilal Chughtai Lawrence Chan and Neel Nanda. 2023. A Toy Model of Universality: Reverse Engineering How Networks Learn Group Operations. (2023). arXiv:2302.03025"},{"key":"e_1_3_2_1_5_1","unstructured":"California\u00a0Civil Code. 2018. California Consumer Privacy Act.https:\/\/cppa.ca.gov\/regulations\/pdf\/cppa_act.pdf."},{"key":"e_1_3_2_1_6_1","first-page":"16318","article-title":"Towards automated circuit discovery for mechanistic interpretability","volume":"36","author":"Conmy Arthur","year":"2023","unstructured":"Arthur Conmy, Augustine Mavor-Parker, Aengus Lynch, Stefan Heimersheim, 2023. Towards automated circuit discovery for mechanistic interpretability. Advances in Neural Information Processing Systems 36 (2023), 16318\u201316352.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","unstructured":"Michael\u00a0R. Douglas. 2023. Large Language Models. (2023). arXiv:2307.05782"},{"key":"e_1_3_2_1_8_1","volume-title":"A Mathematical Framework for Transformer Circuits. Transformer Circuits Thread","author":"Elhage Nelson","year":"2021","unstructured":"Nelson Elhage, Neel Nanda, Catherine Olsson, Tom Henighan, 2021. A Mathematical Framework for Transformer Circuits. Transformer Circuits Thread (2021). https:\/\/transformer-circuits.pub\/2021\/framework\/index.html."},{"key":"e_1_3_2_1_9_1","unstructured":"Federal Communications\u00a0Commission (FCC). 1991. Telephone Consumer Protection Act.https:\/\/www.fcc.gov\/sites\/default\/files\/tcpa-rules.pdf."},{"key":"e_1_3_2_1_10_1","unstructured":"Federal Deposit Insurance\u00a0Corporation (FDIC). 2006. Military Lending Act.https:\/\/www.fdic.gov\/resources\/supervision-and-examinations\/consumer-compliance-examination-manual\/documents\/5\/v-13-1.pdf."},{"key":"e_1_3_2_1_11_1","volume-title":"Tara Rezaei Kheirkhah","author":"Gurnee Wes","year":"2024","unstructured":"Wes Gurnee, Theo Horsley, Zifan\u00a0Carl Guo, Tara Rezaei Kheirkhah, 2024. Universal Neurons in GPT2 Language Models. (2024). arXiv:2401.12181"},{"key":"e_1_3_2_1_12_1","unstructured":"Yufei Huang Shengding Hu Xu Han Zhiyuan Liu 2024. Unified View of Grokking Double Descent and Emergent Abilities: A Perspective from Circuits Competition. (2024). arXiv:2402.15175"},{"key":"e_1_3_2_1_13_1","unstructured":"Ezra Klein. 2023. This Changes Everything.New York Times https:\/\/www.nytimes.com\/2023\/03\/12\/opinion\/chatbots-artificial-intelligence-future-weirdness.html."},{"key":"e_1_3_2_1_14_1","volume-title":"A survey on fairness in large language models. arXiv preprint arXiv:2308.10149","author":"Li Yingji","year":"2023","unstructured":"Yingji Li, Mengnan Du, Rui Song, Xin Wang, 2023. A survey on fairness in large language models. arXiv preprint arXiv:2308.10149 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604237.3626869"},{"key":"e_1_3_2_1_16_1","unstructured":"Ted Lieu. 2023. I\u2019m a Congressman Who Codes. A.I. Freaks Me Out.New York Times https:\/\/www.nytimes.com\/2023\/01\/23\/opinion\/ted-lieu-ai-chatgpt-congress.html."},{"key":"e_1_3_2_1_17_1","volume-title":"Data-centric FinGPT: Democratizing Internet-scale Data for Financial Large Language Models. NeurIPS Workshop on Instruction Tuning and Instruction Following","author":"Liu Xiao-Yang","year":"2023","unstructured":"Xiao-Yang Liu, Guoxuan Wang, Hongyang Yang, and Daochen Zha. 2023. Data-centric FinGPT: Democratizing Internet-scale Data for Financial Large Language Models. NeurIPS Workshop on Instruction Tuning and Instruction Following (2023)."},{"key":"e_1_3_2_1_18_1","unstructured":"Callum McDougall. 2024. ARENA (Alignment Research Engineer Accelerator) 3.0 [accessed June 2024]. https:\/\/arena3-chapter1-transformer-interp.streamlit.app\/; https:\/\/arena-ch1-transformers.streamlit.app\/; https:\/\/github.com\/callummcdougall\/ARENA_2.0."},{"key":"e_1_3_2_1_19_1","first-page":"17359","article-title":"Locating and editing factual associations in GPT","volume":"35","author":"Meng Kevin","year":"2022","unstructured":"Kevin Meng, David Bau, Alex Andonian, and Yonatan Belinkov. 2022. Locating and editing factual associations in GPT. Advances in Neural Information Processing Systems 35 (2022), 17359\u201317372.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_20_1","unstructured":"Shervin Minaee Tomas Mikolov Narjes Nikzad Meysam Chenaghlu 2024. Large Language Models: A Survey. (2024). arXiv:2402.06196"},{"key":"e_1_3_2_1_21_1","volume-title":"A Comprehensive Mechanistic Interpretability Explainer & Glossary [accessed","author":"Nanda Neel","year":"2024","unstructured":"Neel Nanda. 2022. A Comprehensive Mechanistic Interpretability Explainer & Glossary [accessed June 2024]. https:\/\/neelnanda.io\/glossary"},{"key":"e_1_3_2_1_22_1","volume-title":"Mechanistic Interpretability Quickstart Guide [accessed","author":"Nanda Neel","year":"2024","unstructured":"Neel Nanda. 2023. Mechanistic Interpretability Quickstart Guide [accessed June 2024]. https:\/\/www.lesswrong.com\/posts\/jLAvJt8wuSFySN975\/mechanistic-interpretability-quickstart-guide."},{"key":"e_1_3_2_1_23_1","unstructured":"Neel Nanda and Joseph Bloom. 2022. TransformerLens. https:\/\/github.com\/neelnanda-io\/TransformerLens."},{"key":"e_1_3_2_1_24_1","unstructured":"Neel Nanda Lawrence Chan Tom Lieberum Jess Smith 2023. Progress measures for grokking via mechanistic interpretability. (2023). arXiv:2301.05217"},{"key":"e_1_3_2_1_25_1","unstructured":"U.S.\u00a0Department of Justice\u00a0(DoJ). 1940. Servicemembers Civil Relief Act.https:\/\/www.justice.gov\/crt\/servicemembers-civil-relief-act-summary."},{"key":"e_1_3_2_1_26_1","unstructured":"U.S.\u00a0Department of Justice\u00a0(DoJ). 1968. Fair Housing Act.https:\/\/www.justice.gov\/crt\/fair-housing-act-1."},{"key":"e_1_3_2_1_27_1","unstructured":"U.S.\u00a0Department of Justice\u00a0(DoJ). 2011. Equal Credit Opportunity Act.https:\/\/www.govinfo.gov\/content\/pkg\/USCODE-2011-title15\/html\/USCODE-2011-title15-chap41-subchapIV.htm."},{"key":"e_1_3_2_1_28_1","unstructured":"Chris Olah. 2022. Mechanistic Interpretability Variables and the Importance of Interpretable Bases. https:\/\/www.transformer-circuits.pub\/2022\/mech-interp-essay."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00024.001"},{"key":"e_1_3_2_1_30_1","volume-title":"In-context Learning and Induction Heads. Transformer Circuits Thread","author":"Olsson Catherine","year":"2022","unstructured":"Catherine Olsson, Nelson Elhage, Neel Nanda, Nicholas Joseph, 2022. In-context Learning and Induction Heads. Transformer Circuits Thread (2022). https:\/\/transformer-circuits.pub\/2022\/in-context-learning-and-induction-heads\/index.html."},{"key":"e_1_3_2_1_31_1","volume-title":"Real-world examples of \u2018Domain-Specific LLMs\u2019: Bring tailored AI to your business [accessed","author":"Park Sungmin","year":"2024","unstructured":"Sungmin Park. 2024. Real-world examples of \u2018Domain-Specific LLMs\u2019: Bring tailored AI to your business [accessed June 2024]. https:\/\/www.upstage.ai\/feed\/insight\/examples-of-domain-specific-llms."},{"key":"e_1_3_2_1_32_1","unstructured":"Carolin Penke. 2022. A mathematician\u2019s introduction to transformers and large language models. Technical Report. J\u00fclich Supercomputing Center."},{"key":"e_1_3_2_1_33_1","unstructured":"Mary Phuong and Marcus Hutter. 2022. Formal Algorithms for Transformers. Latest 2022 version at http:\/\/www.hutter1.net\/publ\/transalg.pdf. (2022). arXiv:2207.09238"},{"key":"e_1_3_2_1_34_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, 2019. Language models are unsupervised multitask learners. OpenAI blog 1, 8 (2019), 9."},{"key":"e_1_3_2_1_35_1","volume-title":"Large language model alignment: A survey. arXiv preprint arXiv:2309.15025","author":"Shen Tianhao","year":"2023","unstructured":"Tianhao Shen, Renren Jin, Yufei Huang, Chuang Liu, 2023. Large language model alignment: A survey. arXiv preprint arXiv:2309.15025 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Rethinking interpretability in the era of large language models. arXiv preprint arXiv:2402.01761","author":"Singh Chandan","year":"2024","unstructured":"Chandan Singh, Jeevana\u00a0Priya Inala, Michel Galley, Rich Caruana, 2024. Rethinking interpretability in the era of large language models. arXiv preprint arXiv:2402.01761 (2024)."},{"key":"e_1_3_2_1_37_1","volume-title":"Scaling Monosemanticity: Extracting Interpretable Features from Claude 3 Sonnet. Transformer Circuits Thread","author":"Templeton Adly","year":"2024","unstructured":"Adly Templeton, Tom Conerly, Jonathan Marcus, Jack Lindsey, 2024. Scaling Monosemanticity: Extracting Interpretable Features from Claude 3 Sonnet. Transformer Circuits Thread (2024). https:\/\/transformer-circuits.pub\/2024\/scaling-monosemanticity\/index.html"},{"key":"e_1_3_2_1_38_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_39_1","volume-title":"NeurIPS ML Safety Workshop.","author":"Wang Kevin\u00a0Ro","year":"2022","unstructured":"Kevin\u00a0Ro Wang, Alexandre Variengien, Arthur Conmy, Buck Shlegeris, 2022. Interpretability in the Wild: a Circuit for Indirect Object Identification in GPT-2 small. In NeurIPS ML Safety Workshop."},{"key":"e_1_3_2_1_40_1","volume-title":"Emergent abilities of large language models. arXiv preprint arXiv:2206.07682","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Yi Tay, Rishi Bommasani, Colin Raffel, 2022. Emergent abilities of large language models. arXiv preprint arXiv:2206.07682 (2022)."},{"key":"e_1_3_2_1_41_1","unstructured":"Shijie Wu Ozan Irsoy Steven Lu Vadim Dabravolski 2023. BloombergGPT: A Large Language Model for Finance. (2023). arXiv:2303.17564"},{"key":"e_1_3_2_1_42_1","volume-title":"Cognitive mirage: A review of hallucinations in large language models. arXiv preprint arXiv:2309.06794","author":"Ye Hongbin","year":"2023","unstructured":"Hongbin Ye, Tong Liu, Aijia Zhang, Wei Hua, 2023. Cognitive mirage: A review of hallucinations in large language models. arXiv preprint arXiv:2309.06794 (2023)."},{"key":"e_1_3_2_1_43_1","unstructured":"Haiyan Zhao Fan Yang Bo Shen Himabindu Lakkaraju 2024. Towards Uncovering How Large Language Model Works: An Explainability Perspective. (2024). arXiv:2402.10688v2"},{"key":"e_1_3_2_1_44_1","volume-title":"Representation engineering: A top-down approach to ai transparency. arXiv preprint arXiv:2310.01405","author":"Zou Andy","year":"2023","unstructured":"Andy Zou, Long Phan, Sarah Chen, James Campbell, 2023. Representation engineering: A top-down approach to ai transparency. arXiv preprint arXiv:2310.01405 (2023)."}],"event":{"name":"ICAIF '24: 5th ACM International Conference on AI in Finance","location":"Brooklyn NY USA","acronym":"ICAIF '24"},"container-title":["Proceedings of the 5th ACM International Conference on AI in Finance"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698612","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677052.3698612","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:11:18Z","timestamp":1755882678000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698612"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":44,"alternative-id":["10.1145\/3677052.3698612","10.1145\/3677052"],"URL":"https:\/\/doi.org\/10.1145\/3677052.3698612","relation":{},"subject":[],"published":{"date-parts":[[2024,11,14]]},"assertion":[{"value":"2024-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}