{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T18:46:35Z","timestamp":1761417995434,"version":"3.37.3"},"reference-count":98,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["CCF-2311469","CNS-2132281","CCF-2007246","CCF-1955853","CCF-2311468","CCF-2132285"],"award-info":[{"award-number":["CCF-2311469","CNS-2132281","CCF-2007246","CCF-1955853","CCF-2311468","CCF-2132285"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IIEEE Trans. Software Eng."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1109\/tse.2024.3379943","type":"journal-article","created":{"date-parts":[[2024,3,21]],"date-time":"2024-03-21T18:22:15Z","timestamp":1711045335000},"page":"1215-1243","source":"Crossref","is-referenced-by-count":9,"title":["Toward a Theory of Causation for Interpreting Neural Code Models"],"prefix":"10.1109","volume":"50","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6166-7595","authenticated-orcid":false,"given":"David","family":"Nader Palacio","sequence":"first","affiliation":[{"name":"Department of Computer Science, William &#x0026; Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4829-1017","authenticated-orcid":false,"given":"Alejandro","family":"Velasco","sequence":"additional","affiliation":[{"name":"Department of Computer Science, William &#x0026; Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2498-705X","authenticated-orcid":false,"given":"Nathan","family":"Cooper","sequence":"additional","affiliation":[{"name":"Department of Computer Science, William &#x0026; Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0333-8880","authenticated-orcid":false,"given":"Alvaro","family":"Rodriguez","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Universidad Nacional de Colombia, Bogota, Colombia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9683-5616","authenticated-orcid":false,"given":"Kevin","family":"Moran","sequence":"additional","affiliation":[{"name":"Department of Computer Science, George Mason University, Fairfax, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5626-7586","authenticated-orcid":false,"given":"Denys","family":"Poshyvanyk","sequence":"additional","affiliation":[{"name":"Department of Computer Science, William &#x0026; Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380429"},{"key":"ref2","first-page":"334","article-title":"Toward deep learning software repositories","volume-title":"Proc. 12th IEEE Work. Conf. Mining Softw. Repositories (MSR)","author":"White","year":"2015"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2021.3128234"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00041"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"542","DOI":"10.1145\/3196398.3196431","article-title":"Deep learning similarities from different representations of source code","volume-title":"Proc. IEEE\/ACM 15th Int. Conf. Mining Softw. Repositories (MSR)","author":"Tufano","year":"2018"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/SANER.2019.8668043"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1109\/ICSE43902.2021.00027","article-title":"Towards automating code review activities","volume-title":"Proc. 43rd Int. Conf. Softw. Eng. (ICSE)","author":"Tufano","year":"2021"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MSR52588.2021.00024"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2019.2940179"},{"year":"2022","key":"ref10","article-title":"Visual studio intellicode \u2014 Visual studio"},{"key":"ref11","article-title":"What is Tabnine?"},{"article-title":"Openai codex","year":"2023","author":"Zaremba","key":"ref12"},{"year":"2023","key":"ref13","article-title":"GitHub copilot $\\cdot$\u22c5"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ms.2006.105"},{"article-title":"Evaluating large language models trained on code","year":"2021","author":"Chen","key":"ref15"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"747","DOI":"10.18653\/v1\/P19-1073","article-title":"Errudite: Scalable, reproducible, and testable error analysis","volume-title":"Proc. 57th Annu. Meeting Assoc. Comput. Linguistics","author":"Wu","year":"2019"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"4902","DOI":"10.18653\/v1\/2020.acl-main.442","article-title":"Beyond accuracy: Behavioral testing of NLP models with CheckList","volume-title":"Proc. 58th Annu. Meeting Assoc. Comput. Linguistics","author":"Ribeiro","year":"2020"},{"key":"ref18","first-page":"2685","article-title":"COMET: A neural framework for MT evaluation","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process. (EMNLP)","author":"Rei","year":"2020"},{"author":"Kocmi","key":"ref19","article-title":"To ship or not to ship: An extensive evaluation of automatic metrics for machine translation"},{"author":"Dehghani","key":"ref20","article-title":"The benchmark lottery"},{"article-title":"Holistic evaluation of language models","year":"2022","author":"Liang","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"article-title":"Towards a rigorous science of interpretable machine learning","year":"2017","author":"Doshi-Velez","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-98131-4_1"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-65965-3_28"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3236386.3241340"},{"author":"Doshi-Velez","key":"ref27","article-title":"Towards a rigorous science of interpretable machine learning"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.4171\/icm2022\/173"},{"volume-title":"Causal Inference in Statistics: A Primer","year":"2016","author":"Pearl","key":"ref29"},{"volume-title":"Interpretable Machine Learning","year":"2019","author":"Molnar","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-FoSE59343.2023.00010"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"article-title":"CodeSearchNet challenge: Evaluating the state of semantic code search","year":"2019","author":"Husain","key":"ref34"},{"year":"2024","key":"ref35","article-title":"WM-SEMERU\/CausalSE"},{"article-title":"Which explanation should I choose? A function approximation perspective to characterizing post hoc explanations","year":"2022","author":"Han","key":"ref36"},{"journal-title":"The Book of Why: The New Science of Cause and Effect","year":"2018","author":"Pearl","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2012.6227135"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/2491411.2491458"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2594291.2594321"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/2635868.2635875"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3106237.3106290"},{"article-title":"Maybe deep neural networks are the best choice for modeling source code","year":"2019","author":"Karampatsis","key":"ref43"},{"key":"ref44","first-page":"294","article-title":"Open-vocabulary models for source code (Extended Abstract)","volume-title":"Proc. ACM\/IEEE 42nd Int. Conf. Softw. Eng., Companion","author":"Karampatsis","year":"2020"},{"key":"ref45","first-page":"20601","article-title":"Unsupervised translation of programming languages","volume-title":"Proc. Adv.Neural Inf. Process. Syst.","volume":"33","author":"Roziere","year":"2020"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3196321.3196334"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3238147.3240732"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3340544"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00021"},{"key":"ref50","first-page":"87","article-title":"Deep learning code fragments for code clone detection","volume-title":"Proc. 31st IEEE\/ACM Int. Conf. Automated Softw. Eng. (ASE)","author":"White","year":"2016"},{"key":"ref51","article-title":"Learning to represent programs with graphs","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Allamanis","year":"2018"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2015.336"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1142\/S0218194020500230"},{"key":"ref54","first-page":"1536","article-title":"CodeBERT: A pre-trained model for programming and natural languages","volume-title":"Proc. Findings Assoc. Comput. Linguistics (EMNLP), Association for Computational Linguistics","author":"Feng","year":"2020"},{"key":"ref55","article-title":"GraphCodeBERT: Pre-training code representations with data flow","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Guo","year":"2021"},{"key":"ref56","doi-asserted-by":"crossref","first-page":"4593","DOI":"10.18653\/v1\/P19-1452","article-title":"BERT rediscovers the classical NLP pipeline","volume-title":"Proc. 57th Annu. Meeting Assoc. Comput. Linguistics","author":"Tenney","year":"2019"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.581"},{"key":"ref58","article-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2019"},{"key":"ref59","doi-asserted-by":"crossref","first-page":"784","DOI":"10.18653\/v1\/P18-2124","article-title":"Know what you don\u2019t know: Unanswerable questions for SQuAD","volume-title":"Proc. 56th Annu. Meeting Assoc. Comput. Linguistics (Volume 2: Short Papers)","author":"Rajpurkar","year":"2018"},{"article-title":"CodeXGLUE: A machine learning benchmark dataset for code understanding and generation","year":"2021","author":"Lu","key":"ref60"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/3641540"},{"article-title":"Explainable AI for pre-trained code models: What do they learn? When they do not work?","year":"2022","author":"Mohammadkhani","key":"ref62"},{"key":"ref63","doi-asserted-by":"crossref","first-page":"284","DOI":"10.18653\/v1\/P18-1027","article-title":"Sharp nearby, fuzzy far away: How neural language models use context","volume-title":"Proc. 56th Annu. Meeting Assoc. Comput. Linguistics (Volume 1: Long Papers)","author":"Khandelwal","year":"2018"},{"key":"ref64","first-page":"5740","article-title":"Perturbation sensitivity analysis to detect unintended model biases","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process., 9th Int. Joint Conf. Natural Lang. Process. (EMNLP-IJCNLP)","author":"Prabhakaran","year":"2019"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2021.106552"},{"article-title":"Visualizing and understanding recurrent networks","year":"2015","author":"Karpathy","key":"ref66"},{"key":"ref67","first-page":"6707","article-title":"Polyjuice: Generating counterfactuals for explaining, evaluating, and improving models","volume-title":"Proc. 59th Annu. Meeting Assoc. Comput. Linguistics, 11th Int. Joint Conf. Natural Lang. Process. (Volume 1: Long Papers)","author":"Wu","year":"2021"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/3510457.3513081"},{"key":"ref69","first-page":"9020","article-title":"Neuron dependency graphs: A causal abstraction of neural networks","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","volume":"162","author":"Hu","year":"2022"},{"article-title":"On the relationship between explanation and prediction: A causal view","year":"2022","author":"Karimi","key":"ref70"},{"article-title":"DoWhy: A Python package for causal inference","year":"2024","author":"Sharma","key":"ref71"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2019.00046"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.2015.7332459"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME58846.2023.00040"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/3241036"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511803161"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2007.256941"},{"article-title":"Learning from data","year":"2023","author":"Abu-Mastafa.","key":"ref78"},{"key":"ref79","first-page":"1137","article-title":"A neural probabilistic language model","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"3","author":"Bengio","year":"2003"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1145\/3359591.3359735"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/3485275"},{"key":"ref83","article-title":"Github"},{"author":"Abadi","key":"ref84","article-title":"TensorFlow: Large-scale machine learning on heterogeneous distributed systems"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"key":"ref86","first-page":"38","article-title":"Transformers: State-of-the-art natural language processing","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process., Syst. Demonstrations","author":"Wolf","year":"2020"},{"key":"ref87","first-page":"1073","article-title":"Big code != big vocabulary: Open-vocabulary models for source code","volume-title":"Proc. Int. Conf. Softw. Eng.","author":"Karampatsis","year":"2020"},{"article-title":"Adam: A method for stochastic optimization","year":"2015","author":"Kingma","key":"ref88"},{"article-title":"CodeXGLUE: A machine learning benchmark dataset for code understanding and generation","year":"2021","author":"Lu","key":"ref89"},{"year":"2024","key":"ref90","article-title":"Tree-sitter introduction"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p16-1162"},{"key":"ref92","first-page":"4186","article-title":"Interpretability beyond feature attribution: Quantitative Testing with Concept Activation Vectors (TCAV)","volume-title":"Proc. 35th Int. Conf. Mach. Learn. (ICML)","volume":"6","author":"Kim"},{"year":"2023","key":"ref93","article-title":"Java language keywords (The Java${}^{\\textrm{TM}}$TM"},{"year":"2023","key":"ref94","article-title":"tree-sitter\/tree-sitter-python"},{"article-title":"DoWhy: Addressing challenges in expressing and validating causal assumptions","year":"2021","author":"Sharma","key":"ref95"},{"article-title":"A survey on in-context learning","year":"2023","author":"Dong","key":"ref96"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1145\/2884781.2884848"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ICDMW.2011.169"}],"container-title":["IEEE Transactions on Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/32\/10531114\/10477672-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/32\/10531114\/10477672.pdf?arnumber=10477672","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T21:00:34Z","timestamp":1731618034000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10477672\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":98,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tse.2024.3379943","relation":{},"ISSN":["0098-5589","1939-3520","2326-3881"],"issn-type":[{"type":"print","value":"0098-5589"},{"type":"electronic","value":"1939-3520"},{"type":"electronic","value":"2326-3881"}],"subject":[],"published":{"date-parts":[[2024,5]]}}}