{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T15:55:31Z","timestamp":1781020531934,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3551349.3556900","type":"proceedings-article","created":{"date-parts":[[2023,1,5]],"date-time":"2023-01-05T20:43:54Z","timestamp":1672951434000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["AST-Probe: Recovering abstract syntax trees from hidden representations of pre-trained language models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2439-2136","authenticated-orcid":false,"given":"Jos\u00e9 Antonio","family":"Hern\u00e1ndez L\u00f3pez","sequence":"first","affiliation":[{"name":"Department of Computer Science and Systems, University of Murcia, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9755-5616","authenticated-orcid":false,"given":"Martin","family":"Weyssow","sequence":"additional","affiliation":[{"name":"DIRO, Universit\u00e9 de Montr\u00e9al, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jes\u00fas S\u00e1nchez","family":"Cuadrado","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Systems, University of Murcia, Spain"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6304-9926","authenticated-orcid":false,"given":"Houari","family":"Sahraoui","sequence":"additional","affiliation":[{"name":"DIRO, Universit\u00e9 de Montr\u00e9al, Canada"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,1,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.3115\/112405.112467"},{"key":"e_1_3_2_1_2_1","unstructured":"Yossi Adi Einat Kermany Yonatan Belinkov Ofer Lavi and Yoav Goldberg. 2016. Fine-grained analysis of sentence embeddings using auxiliary prediction tasks. arXiv preprint arXiv:1608.04207(2016)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Wasi\u00a0Uddin Ahmad Saikat Chakraborty Baishakhi Ray and Kai-Wei Chang. 2021. Unified pre-training for program understanding and generation. arXiv preprint arXiv:2103.06333(2021).","DOI":"10.18653\/v1\/2021.naacl-main.211"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3212695"},{"key":"e_1_3_2_1_5_1","unstructured":"Yonatan Belinkov. 2016. Probing classifiers: Promises shortcomings and advances. Computational Linguistics(2016) 1\u201312."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Yonatan Belinkov Nadir Durrani Fahim Dalvi Hassan Sajjad and James Glass. 2017. What do neural machine translation models learn about morphology?arXiv preprint arXiv:1704.03471(2017).","DOI":"10.18653\/v1\/P17-1080"},{"key":"e_1_3_2_1_7_1","unstructured":"Tom\u00a0B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel\u00a0M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. arxiv:2005.14165\u00a0[cs.CL]"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462840"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338906.3340458"},{"key":"e_1_3_2_1_10_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de\u00a0Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, 2021. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374(2021)."},{"key":"e_1_3_2_1_11_1","unstructured":"Ethan\u00a0A Chi John Hewitt and Christopher\u00a0D Manning. 2020. Finding universal grammatical relations in multilingual BERT. arXiv preprint arXiv:2005.04511(2020)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Alexis Conneau German Kruszewski Guillaume Lample Lo\u00efc Barrault and Marco Baroni. 2018. What you can cram into a single vector: Probing sentence embeddings for linguistic properties. arXiv preprint arXiv:1805.01070(2018).","DOI":"10.18653\/v1\/P18-1198"},{"key":"e_1_3_2_1_13_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805(2018).","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805(2018)."},{"key":"e_1_3_2_1_14_1","volume-title":"Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155(2020).","author":"Feng Zhangyin","year":"2020","unstructured":"Zhangyin Feng, Daya Guo, Duyu Tang, Nan Duan, Xiaocheng Feng, Ming Gong, Linjun Shou, Bing Qin, Ting Liu, Daxin Jiang, 2020. Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155(2020)."},{"key":"e_1_3_2_1_15_1","volume-title":"Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366(2020).","author":"Guo Daya","year":"2020","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie Liu, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, 2020. Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366(2020)."},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Hewitt John","year":"2019","unstructured":"John Hewitt and Christopher\u00a0D Manning. 2019. A structural probe for finding syntax in word representations. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). 4129\u20134138."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2902362"},{"key":"e_1_3_2_1_18_1","unstructured":"Hamel Husain Ho-Hsiang Wu Tiferet Gazit Miltiadis Allamanis and Marc Brockschmidt. 2019. Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436(2019)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE51524.2021.9678927"},{"key":"e_1_3_2_1_20_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980(2014).","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980(2014)."},{"key":"e_1_3_2_1_21_1","unstructured":"Tomasz Limisiewicz and David Mare\u010dek. 2020. Introducing orthogonal constraint in structural probes. arXiv preprint arXiv:2012.15228(2020)."},{"key":"e_1_3_2_1_22_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692(2019).","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692(2019)."},{"key":"e_1_3_2_1_23_1","unstructured":"Shuai Lu Nan Duan Hojae Han Daya Guo Seung-won Hwang and Alexey Svyatkovskiy. 2022. ReACC: A Retrieval-Augmented Code Completion Framework. arXiv preprint arXiv:2203.07722(2022)."},{"key":"e_1_3_2_1_24_1","volume-title":"CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1).","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, 2021. CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Rowan\u00a0Hall Maudslay Josef Valvoda Tiago Pimentel Adina Williams and Ryan Cotterell. 2020. A tale of a probe and a parser. arXiv preprint arXiv:2005.01641(2020).","DOI":"10.18653\/v1\/2020.acl-main.659"},{"key":"e_1_3_2_1_26_1","volume-title":"Thien\u00a0Huu Nguyen, Oscar Sainz, Eneko Agirre, Ilana Heinz, and Dan Roth.","author":"Min Bonan","year":"2021","unstructured":"Bonan Min, Hayley Ross, Elior Sulem, Amir Pouran\u00a0Ben Veyseh, Thien\u00a0Huu Nguyen, Oscar Sainz, Eneko Agirre, Ilana Heinz, and Dan Roth. 2021. Recent Advances in Natural Language Processing via Large Pre-Trained Language Models: A Survey. arXiv preprint arXiv:2111.01243(2021)."},{"key":"e_1_3_2_1_27_1","volume-title":"Comparing the Attention of Humans with Neural Models of Code. In 2021 36th IEEE\/ACM International Conference on Automated Software Engineering (ASE). IEEE, 867\u2013879","author":"Paltenghi Matteo","year":"2021","unstructured":"Matteo Paltenghi and Michael Pradel. 2021. Thinking Like a Developer? Comparing the Attention of Humans with Neural Models of Code. In 2021 36th IEEE\/ACM International Conference on Automated Software Engineering (ASE). IEEE, 867\u2013879."},{"key":"e_1_3_2_1_28_1","unstructured":"A. Radford. 2018. Improving Language Understanding by Generative Pre-Training."},{"key":"e_1_3_2_1_29_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_1_30_1","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J Liu. 2019. Exploring the limits of transfer learning with a unified text-to-text transformer. arXiv preprint arXiv:1910.10683(2019)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00349"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Rico Sennrich Barry Haddow and Alexandra Birch. 2015. Neural machine translation of rare words with subword units. arXiv preprint arXiv:1508.07909(2015).","DOI":"10.18653\/v1\/P16-1162"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Rishab Sharma Fuxiang Chen Fatemeh Fard and David Lo. 2022. An Exploratory Study on Code Attention in BERT. arXiv preprint arXiv:2204.10200(2022).","DOI":"10.1145\/3524610.3527921"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Yikang Shen Zhouhan Lin Athul\u00a0Paul Jacob Alessandro Sordoni Aaron Courville and Yoshua Bengio. 2018. Straight to the tree: Constituency parsing with neural syntactic distance. arXiv preprint arXiv:1806.04168(2018).","DOI":"10.18653\/v1\/P18-1108"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR52588.2021.00045"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Sergey Troshin and Nadezhda Chirkova. 2022. Probing Pretrained Models of Source Code. arXiv preprint arXiv:2202.08975(2022).","DOI":"10.18653\/v1\/2022.blackboxnlp-1.31"},{"key":"e_1_3_2_1_37_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Yao Wan Wei Zhao Hongyu Zhang Yulei Sui Guandong Xu and Hai Jin. 2022. What Do They Capture?\u2013A Structural Analysis of Pre-Trained Language Models for Source Code. arXiv preprint arXiv:2202.06840(2022).","DOI":"10.1145\/3510003.3510050"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Yue Wang Weishi Wang Shafiq Joty and Steven\u00a0CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859(2021).","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Jennifer\u00a0C White Tiago Pimentel Naomi Saphra and Ryan Cotterell. 2021. A Non-Linear Structural Probe. arXiv preprint arXiv:2105.10185(2021).","DOI":"10.18653\/v1\/2021.naacl-main.12"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz 2019. Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771(2019).","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.11"}],"event":{"name":"ASE '22: 37th IEEE\/ACM International Conference on Automated Software Engineering","location":"Rochester MI USA","acronym":"ASE '22"},"container-title":["Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3551349.3556900","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3551349.3556900","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T08:25:52Z","timestamp":1755851152000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3551349.3556900"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":42,"alternative-id":["10.1145\/3551349.3556900","10.1145\/3551349"],"URL":"https:\/\/doi.org\/10.1145\/3551349.3556900","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2023-01-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}