{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T04:07:36Z","timestamp":1746245256962,"version":"3.40.4"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,1,23]],"date-time":"2025-01-23T00:00:00Z","timestamp":1737590400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,23]],"date-time":"2025-01-23T00:00:00Z","timestamp":1737590400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072007,62192733,61832009,62192730"],"award-info":[{"award-number":["62072007,62192733,61832009,62192730"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s10664-025-10612-6","type":"journal-article","created":{"date-parts":[[2025,1,23]],"date-time":"2025-01-23T08:38:24Z","timestamp":1737621504000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Transformer-based code model with compressed hierarchy representation"],"prefix":"10.1007","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3290-0244","authenticated-orcid":false,"given":"Kechi","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jia","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhuo","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhi","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ge","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,1,23]]},"reference":[{"key":"10612_CR1","doi-asserted-by":"crossref","unstructured":"Ahmad WU, Chakraborty S, Ray B, Chang K (2020) A transformer-based approach for source code summarization. In: Proceedings of the annual meeting of the association for computational linguistics","DOI":"10.18653\/v1\/2020.acl-main.449"},{"key":"10612_CR2","doi-asserted-by":"crossref","unstructured":"Allamanis M, Barr ET, Devanbu PT, Sutton C (2018) A survey of machine learning for big code and naturalness. ACM Comput Surv 51(4)","DOI":"10.1145\/3212695"},{"key":"10612_CR3","doi-asserted-by":"crossref","unstructured":"Allamanis M, Barr ET, Ducousso S, Gao Z (2020) Typilus: neural type hints. In: Proceedings of the ACM SIGPLAN Conference on programming language design and implementation","DOI":"10.1145\/3385412.3385997"},{"key":"10612_CR4","unstructured":"Allamanis M, Brockschmidt M, Khademi M (2018) Learning to represent programs with graphs. In: International conference on learning representations"},{"key":"10612_CR5","unstructured":"Allamanis M, Peng H, Sutton C (2016) A convolutional attention network for extreme summarization of source code. In: International conference on machine learning"},{"key":"10612_CR6","unstructured":"Alon U, Brody S, Levy O, Yahav E (2019) code2seq: Generating sequences from structured representations of code. In: International conference on learning representations"},{"key":"10612_CR7","unstructured":"Alon U, Sadaka R, Levy O, Yahav E (2020) Structural language models of code. In: Proceedings of the international conference on machine learning"},{"key":"10612_CR8","unstructured":"Alon U, Yahav E (2021) On the bottleneck of graph neural networks and its practical implications. In: The international conference on learning representations"},{"key":"10612_CR9","doi-asserted-by":"crossref","unstructured":"Alon U, Zilberstein M, Levy O, Yahav E (2019) code2vec: learning distributed representations of code. Proc ACM Program Lang 3(POPL)","DOI":"10.1145\/3290353"},{"key":"10612_CR10","unstructured":"Ben-Nun T, Jakobovits AS, Hoefler T (2018) Neural code comprehension: A learnable representation of code semantics. In: Advances in neural information processing systems"},{"key":"10612_CR11","doi-asserted-by":"publisher","unstructured":"Brunsfeld M, Hlynskyi A, Qureshi A, Thomson P, Vera J, Turnbull P, dundargoc Clem T, ObserverOfTime Creager D, Helwer A, Rix R, Kavolis D, van Antwerpen H, Davis M, Ika Nguyen TA, Yahyaabadi A, Brunk S, Massicotte M, Hasabnis, N, bfredl Dong M, Moelius S, Kalt S, Lillis W, Kolja Panteleev V, Arnett J (2024) tree-sitter\/tree-sitter: v0.22.6. https:\/\/doi.org\/10.5281\/zenodo.11117307","DOI":"10.5281\/zenodo.11117307"},{"key":"10612_CR12","doi-asserted-by":"crossref","unstructured":"Bui NDQ, Yu Y, Jiang L (2021) Treecaps: Tree-based capsule networks for source code processing. In: 35th AAAI conference on artificial intelligence, AAAI 2021","DOI":"10.1609\/aaai.v35i1.16074"},{"key":"10612_CR13","unstructured":"Buratti L, Pujar S, Bornea MA, McCarley JS, Zheng Y, Rossiello G, Morari A, Laredo J, Thost V, Zhuang Y, Domeniconi G (2020) Exploring software naturalness through neural language models. CoRR arXiv:2006.12641"},{"key":"10612_CR14","doi-asserted-by":"crossref","unstructured":"Cai R, Liang Z, Xu B, Li Z, Hao Y, Chen Y (2020) TAG : Type auxiliary guiding for code comment generation. In: Proceedings of the 58th annual meeting of the association for computational linguistics","DOI":"10.18653\/v1\/2020.acl-main.27"},{"key":"10612_CR15","doi-asserted-by":"crossref","unstructured":"Chirkova N, Troshin S (2021) Empirical study of transformers for source code. In: ESEC\/FSE \u201921: 29th ACM Joint European software engineering conference and symposium on the foundations of software engineering, 2021","DOI":"10.1145\/3468264.3468611"},{"key":"10612_CR16","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang M, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. In: Burstein J, Doran C, Solorio T (eds) Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, vol 1 (Long and Short Papers), pp 4171\u20134186. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/V1\/N19-1423","DOI":"10.18653\/V1\/N19-1423"},{"key":"10612_CR17","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, Duan N, Feng X, Gong M, Shou L, Qin B, Liu T, Jiang D, Zhou M (2020) Codebert: A pre-trained model for programming and natural languages. In: Findings of the association for computational linguistics: EMNLP 2020","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"10612_CR18","unstructured":"Fernandes P, Allamanis M, Brockschmidt M (2019) Structured neural summarization. In: International conference on learning representations"},{"key":"10612_CR19","unstructured":"Guo D, Ren S, Lu S, Feng Z, Tang D, Liu S, Zhou L, Duan N, Svyatkovskiy A, Fu S, Tufano M, Deng SK, Clement CB, Drain D, Sundaresan N, Yin J, Jiang D, Zhou M (2021) Graphcodebert: Pre-training code representations with data flow. In: 9th international conference on learning representations, ICLR 2021"},{"key":"10612_CR20","unstructured":"Hellendoorn VJ, Sutton C, Singh R, Maniatis P, Bieber D (2020) Global relational models of source code. In: International conference on learning representations"},{"key":"10612_CR21","doi-asserted-by":"crossref","unstructured":"Hindle A, Barr ET, Su Z, Gabel M, Devanbu PT (2012) On the naturalness of software. In: 34th international conference on software engineering, ICSE 2012","DOI":"10.1109\/ICSE.2012.6227135"},{"key":"10612_CR22","doi-asserted-by":"crossref","unstructured":"Hu X, Li G, Xia X, Lo D, Jin Z (2018) Deep code comment generation. In: 2018 IEEE\/ACM 26th international conference on program comprehension (ICPC). IEEE","DOI":"10.1145\/3196321.3196334"},{"key":"10612_CR23","unstructured":"Husain H, Wu HH, Gazit T, Allamanis M, Brockschmidt M (2019) CodeSearchNet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436"},{"key":"10612_CR24","doi-asserted-by":"crossref","unstructured":"Iyer S, Konstas I, Cheung A, Zettlemoyer L (2016) Summarizing source code using a neural attention model. In: Proceedings of the 54th annual meeting of the association for computational linguistics, ACL 2016","DOI":"10.18653\/v1\/P16-1195"},{"key":"10612_CR25","doi-asserted-by":"crossref","unstructured":"Kim S, Zhao J, Tian Y, Chandra S (2021) Code prediction by feeding trees to transformers. In: 43rd IEEE\/ACM international conference on software engineering, ICSE 2021","DOI":"10.1109\/ICSE43902.2021.00026"},{"issue":"8","key":"10612_CR26","first-page":"922","volume":"7","author":"R Kohavi","year":"2017","unstructured":"Kohavi R, Longbotham R (2017) Online controlled experiments and a\/b testing. Encycl Mach Learn Data Min 7(8):922\u2013929","journal-title":"Encycl Mach Learn Data Min"},{"key":"10612_CR27","doi-asserted-by":"crossref","unstructured":"LeClair A, Jiang S, McMillan C (2019) A neural model for generating natural language summaries of program subroutines. In: Proceedings of the 41st international conference on software engineering, ICSE 2019","DOI":"10.1109\/ICSE.2019.00087"},{"key":"10612_CR28","doi-asserted-by":"crossref","unstructured":"Li J, Li Y, Li G, Jin Z, Hao Y, Hu X (2023) Skcoder: A sketch-based approach for automatic code generation. arXiv preprint arXiv:2302.06144","DOI":"10.1109\/ICSE48619.2023.00179"},{"key":"10612_CR29","doi-asserted-by":"crossref","unstructured":"Li Z, Lu S, Guo D, Duan N, Jannu S, Jenks G, Majumder D, Green J, Svyatkovskiy A, Fu S, Sundaresan N (2022) Automating code review activities by large-scale pre-training. ESEC\/FSE 2022","DOI":"10.1145\/3540250.3549081"},{"key":"10612_CR30","unstructured":"Li Y, Tarlow D, Brockschmidt M, Zemel RS (2016) Gated graph sequence neural networks. In: International conference on learning representations"},{"key":"10612_CR31","doi-asserted-by":"crossref","unstructured":"Liu K, Kim D, Bissyand\u00e9 TF, Kim T, Kim K, Koyuncu A, Kim S, Traon YL (2019) Learning to spot and refactor inconsistent method names. In: ICSE 2019","DOI":"10.1109\/ICSE.2019.00019"},{"key":"10612_CR32","doi-asserted-by":"crossref","unstructured":"Liu F, Li G, Fu Z, Lu S, Hao Y, Jin Z (2022) Learning to recommend method names with global context. CoRR arXiv:2201.10705","DOI":"10.1145\/3510003.3510154"},{"key":"10612_CR33","doi-asserted-by":"publisher","unstructured":"Li J, Wang Y, Lyu MR, King I (2018) Code completion with neural attention and pointer networks. In: Lang J (ed) Proceedings of the 27th international joint conference on artificial intelligence, IJCAI 2018, July 13-19, 2018, Stockholm, Sweden, pp 4159\u20134165.https:\/\/doi.org\/10.24963\/IJCAI.2018\/578","DOI":"10.24963\/IJCAI.2018\/578"},{"key":"10612_CR34","doi-asserted-by":"crossref","unstructured":"Luan S, Yang D, Barnaby C, Sen K, Chandra S (2019) Aroma: code recommendation via structural code search (OOPSLA)","DOI":"10.1145\/3360578"},{"key":"10612_CR35","unstructured":"Lu S, Guo D, Ren S, Huang J, Svyatkovskiy A, Blanco A, Clement CB, Drain D, Jiang D, Tang D, Li G, Zhou L, Shou L, Zhou L, Tufano M, Gong M, Zhou M, Duan N, Sundaresan N, Deng SK, Fu S, Liu S (2021) Codexglue: A machine learning benchmark dataset for code understanding and generation. In: NeurIPS Datasets and Benchmarks 2021"},{"key":"10612_CR36","doi-asserted-by":"crossref","unstructured":"Mou L, Li G, Zhang L, Wang T, Jin Z (2016) Convolutional neural networks over tree structures for programming language processing. In: Proceedings of the 30th AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v30i1.10139"},{"key":"10612_CR37","doi-asserted-by":"crossref","unstructured":"Musgrave K, Belongie SJ, Lim S (2020) A metric learning reality check. In: Computer vision - ECCV 2020 - 16th European conference","DOI":"10.1007\/978-3-030-58595-2_41"},{"key":"10612_CR38","doi-asserted-by":"crossref","unstructured":"Nguyen S, Phan H, Le T, Nguyen TN (2020) Suggesting natural method names to check name consistencies. In: ICSE \u201920: 42nd international conference on software engineering","DOI":"10.1145\/3377811.3380926"},{"key":"10612_CR39","doi-asserted-by":"crossref","unstructured":"Niu C, Li C, Ng V, Ge J, Huang L, Luo B (2022) Spt-code: Sequence-to-sequence pre-training for learning source code representations. CoRR arXiv:2201.01549","DOI":"10.1145\/3510003.3510096"},{"key":"10612_CR40","unstructured":"Puri R, Kung DS, Janssen G, Zhang W, Domeniconi G, Zolotov V, Dolby J, Chen J, Choudhury M, Decker L, Thost V, Buratti L, Pujar S, Ramji S, Finkler U, Malaika S, Reiss F (2021) Codenet: A large-scale AI for code dataset for learning a diversity of coding tasks. In: Neural information processing systems datasets and benchmarks track"},{"key":"10612_CR41","doi-asserted-by":"crossref","unstructured":"Schlichtkrull MS, Kipf TN, Bloem P, van\u00a0den Berg R, Titov I, Welling M (2018) Modeling relational data with graph convolutional networks. In: ESWC","DOI":"10.1007\/978-3-319-93417-4_38"},{"key":"10612_CR42","doi-asserted-by":"publisher","unstructured":"Sennrich R, Haddow B, Birch A (2016) Neural machine translation of rare words with subword units. In: Proceedings of the 54th annual meeting of the association for computational linguistics, ACL 2016, August 7-12, 2016, Berlin, Germany, vol 1: Long Papers. The Association for Computer Linguistics (2016). https:\/\/doi.org\/10.18653\/v1\/p16-1162","DOI":"10.18653\/v1\/p16-1162"},{"key":"10612_CR43","doi-asserted-by":"publisher","unstructured":"Sun Z, Zhu Q, Xiong Y, Sun Y, Mou L, Zhang L (2020) Treegen: A tree-based transformer architecture for code generation. In: The 34th AAAI conference on artificial intelligence, AAAI 2020, The 32nd innovative applications of artificial intelligence conference, IAAI 2020, The 10th AAAI symposium on educational advances in artificial intelligence, EAAI 2020, New York, NY, USA, February 7-12, 2020, pp 8984\u20138991. AAAI Press. https:\/\/doi.org\/10.1609\/AAAI.V34I05.6430","DOI":"10.1609\/AAAI.V34I05.6430"},{"key":"10612_CR44","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems"},{"key":"10612_CR45","unstructured":"Vinyals O, Fortunato M, Jaitly N (2015) Pointer networks. In: Advances in neural information processing systems"},{"key":"10612_CR46","doi-asserted-by":"crossref","unstructured":"Wang W, Li G, Ma B, Xia X, Jin Z (2020) Detecting code clones with graph neural network and flow-augmented abstract syntax tree. In: 2020 IEEE 27th international conference on software analysis, evolution and reengineering (SANER). IEEE","DOI":"10.1109\/SANER48275.2020.9054857"},{"key":"10612_CR47","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang K, Gao F, Wang L (2020) Learning semantic program embeddings with graph interval neural network. Proc ACM Program Lang 4(OOPSLA)","DOI":"10.1145\/3428205"},{"key":"10612_CR48","unstructured":"Wang S, Wen M, Lin B, Mao X (2024) Lightweight global and local contexts guided method name recommendation with prior knowledge. In: ESEC\/FSE \u201921: 29th ACM joint European software engineering conference and symposium on the foundations of software engineering"},{"key":"10612_CR49","doi-asserted-by":"crossref","unstructured":"Wang W, Zhang K, Li G, Liu S, Li A, Jin Z, Liu Y (2022) Learning program representations with a tree-structured transformer","DOI":"10.1109\/SANER56733.2023.00032"},{"issue":"4","key":"10612_CR50","first-page":"1","volume":"42","author":"M Yang","year":"2022","unstructured":"Yang M, Gu B, Duan Z, Jin Z, Zhan N, Dong Y (2022) Intelligent program synthesis framework and key scientific problems for embedded software. Chin Space Sci Technol 42(4):1","journal-title":"Chin Space Sci Technol"},{"key":"10612_CR51","unstructured":"Ye F, Zhou S, Venkat A, Marcus R, Tatbul N, Tithi JJ, Petersen P, Mattson TG, Kraska T, Dubey P, Sarkar V, Gottschlich J (2020) MISIM: an end-to-end neural code similarity system. CoRR arXiv:2006.05265"},{"key":"10612_CR52","unstructured":"Yin P, Neubig G, Allamanis M, Brockschmidt M, Gaunt AL (2019) Learning to represent edits. In: International conference on learning representations"},{"key":"10612_CR53","doi-asserted-by":"crossref","unstructured":"Yu H, Lam W, Chen L, Li G, Xie T, Wang Q (2019) Neural detection of semantic code clones via tree-based convolution. In: Proceedings of the 27th international conference on program comprehension, ICPC 2019","DOI":"10.1109\/ICPC.2019.00021"},{"key":"10612_CR54","doi-asserted-by":"publisher","unstructured":"Zhang K, Li Z, Jin Z, Li G (2023) Implant global and local hierarchy information to sequence based code representation models. In: 31st IEEE\/ACM international conference on program comprehension, ICPC 2023, Melbourne, Australia, May 15-16, 2023, pp 157\u2013168. IEEE. https:\/\/doi.org\/10.1109\/ICPC58990.2023.00030","DOI":"10.1109\/ICPC58990.2023.00030"},{"key":"10612_CR55","doi-asserted-by":"crossref","unstructured":"Zhang K, Wang W, Zhang H, Li G, Jin Z (2022) Learning to represent programs with heterogeneous graphs. In: Proceedings of the 30th IEEE\/ACM international conference on program comprehension, ICPC \u201922","DOI":"10.1145\/3524610.3527905"},{"key":"10612_CR56","doi-asserted-by":"crossref","unstructured":"Zhang J, Wang X, Zhang H, Sun H, Wang K, Liu X (2019) A novel neural source code representation based on abstract syntax tree. In: Proceedings of the 41st international conference on software engineering, ICSE 2019","DOI":"10.1109\/ICSE.2019.00086"},{"key":"10612_CR57","unstructured":"Z\u00fcgner D, Kirschstein T, Catasta M, Leskovec J, G\u00fcnnemann S (2021) Language-agnostic representation learning of source code from structure and context. In: 9th international conference on learning representations, ICLR 2021"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10612-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-025-10612-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10612-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T13:46:44Z","timestamp":1746193604000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-025-10612-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,23]]},"references-count":57,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["10612"],"URL":"https:\/\/doi.org\/10.1007\/s10664-025-10612-6","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"type":"print","value":"1382-3256"},{"type":"electronic","value":"1573-7616"}],"subject":[],"published":{"date-parts":[[2025,1,23]]},"assertion":[{"value":"10 January 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 January 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Beyond this, the authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}],"article-number":"60"}}