{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T15:08:12Z","timestamp":1774883292122,"version":"3.50.1"},"reference-count":94,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T00:00:00Z","timestamp":1765929600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T00:00:00Z","timestamp":1765929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100010663","name":"H2020 European Research Council","doi-asserted-by":"publisher","award":["949014"],"award-info":[{"award-number":["949014"]}],"id":[{"id":"10.13039\/100010663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1007\/s10664-025-10763-6","type":"journal-article","created":{"date-parts":[[2025,12,17]],"date-time":"2025-12-17T10:44:13Z","timestamp":1765968253000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning to represent code changes"],"prefix":"10.1007","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6377-0884","authenticated-orcid":false,"given":"Xunzhu","family":"Tang","sequence":"first","affiliation":[]},{"given":"Haoye","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Weiguo","family":"Pian","sequence":"additional","affiliation":[]},{"given":"Saad","family":"Ezzini","sequence":"additional","affiliation":[]},{"given":"Abdoul Kader","family":"Kabor\u00e9","sequence":"additional","affiliation":[]},{"given":"Andrew","family":"Habib","sequence":"additional","affiliation":[]},{"given":"Kisub","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Jacques","family":"Klein","sequence":"additional","affiliation":[]},{"given":"Tegawend\u00e9 F.","family":"Bissyand\u00e9","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,17]]},"reference":[{"issue":"4","key":"10763_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3212695","volume":"51","author":"M Allamanis","year":"2018","unstructured":"Allamanis M, Barr ET, Devanbu P, Sutton C (2018) A survey of machine learning for big code and naturalness. ACM Comput Surv (CSUR) 51(4):1\u201337","journal-title":"ACM Comput Surv (CSUR)"},{"key":"10763_CR2","unstructured":"Alon U, Sadaka R, Levy O, Yahav E (2020) Structural language models of code. In: Proceedings of the 37th international conference on machine learning, ICML 2020, 13-18 July 2020, Virtual Event, Proceedings of Machine Learning Research, vol. 119, pp 245\u2013256. PMLR. http:\/\/proceedings.mlr.press\/v119\/alon20a.html"},{"key":"10763_CR3","doi-asserted-by":"publisher","unstructured":"Alon U, Zilberstein M, Levy O, Yahav E (2019) code2vec: learning distributed representations of code. PACMPL 3(POPL) 40:1\u201340:29. https:\/\/doi.org\/10.1145\/3290353","DOI":"10.1145\/3290353"},{"key":"10763_CR4","unstructured":"Banerjee S, Lavie A (2005) Meteor: An automatic metric for mt evaluation with improved correlation with human judgments. In: Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization, pp 65\u201372"},{"key":"10763_CR5","doi-asserted-by":"crossref","unstructured":"Barr ET, Brun Y, Devanbu P, Harman M, Sarro F (2014) The plastic surgery hypothesis. In: Proceedings of the 22nd ACM SIGSOFT international symposium on foundations of software engineering, pp 306\u2013317","DOI":"10.1145\/2635868.2635898"},{"key":"10763_CR6","doi-asserted-by":"crossref","unstructured":"Brody S, Alon U, Yahav E (2020) A structural model for contextual code changes. Proceed ACM Program Lang 4(OOPSLA), pp 1\u201328","DOI":"10.1145\/3428283"},{"key":"10763_CR7","doi-asserted-by":"crossref","unstructured":"Buse RP, Weimer WR (2010) Automatically documenting program changes. In: Proceedings of the IEEE\/ACM international conference on Automated software engineering, pp 33\u201342","DOI":"10.1145\/1858996.1859005"},{"issue":"3","key":"10763_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s42979-021-00566-z","volume":"2","author":"R Cabrera Lozoya","year":"2021","unstructured":"Cabrera Lozoya R, Baumann A, Sabetta A, Bezzi M (2021) Commit2vec: Learning distributed representations of code changes. SN Comput Sci 2(3):1\u201316","journal-title":"SN Comput Sci"},{"key":"10763_CR9","doi-asserted-by":"crossref","unstructured":"Ciniselli M, Cooper N, Pascarella L, Poshyvanyk D, Di\u00a0Penta M, Bavota G (2021) An empirical study on the usage of bert models for code completion. In: 2021 IEEE\/ACM 18th international conference on mining software repositories (MSR), IEEE, pp 108\u2013119","DOI":"10.1109\/MSR52588.2021.00024"},{"key":"10763_CR10","doi-asserted-by":"crossref","unstructured":"Cordella LP, Foggia P, Sansone C, Vento M (1999) Performance evaluation of the vf graph matching algorithm. In: Proceedings 10th international conference on image analysis and processing, IEEE, pp 1172\u20131177","DOI":"10.1109\/ICIAP.1999.797762"},{"key":"10763_CR11","doi-asserted-by":"crossref","unstructured":"Cort\u00e9s-Coy LF, Linares-V\u00e1squez M, Aponte J, Poshyvanyk D (2014) On automatically generating commit messages via summarization of source code changes. In: 2014 IEEE 14th International working conference on source code analysis and manipulation, IEEE, pp 275\u2013284","DOI":"10.1109\/SCAM.2014.14"},{"key":"10763_CR12","doi-asserted-by":"crossref","unstructured":"DeFreez D, Thakur AV, Rubio-Gonz\u00e1lez C (2018) Path-based function embedding and its application to error-handling specification mining. In: Proceedings of the 2018 26th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, pp 423\u2013433","DOI":"10.1145\/3236024.3236059"},{"key":"10763_CR13","unstructured":"Devlin J, Chang MW, Lee K, Toutanova K (2018) Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv:1810.04805"},{"key":"10763_CR14","doi-asserted-by":"crossref","unstructured":"Dong J, Lou Y, Zhu Q, Sun Z, Li Z, Zhang W, Hao D (2022) Fira: Fine-grained graph-based code change representation for automated commit message generation","DOI":"10.1145\/3510003.3510069"},{"key":"10763_CR15","doi-asserted-by":"crossref","unstructured":"Dyer R, Nguyen HA, Rajan H, Nguyen TN (2013) Boa: A language and infrastructure for analyzing ultra-large-scale software repositories. In: 2013 35th International conference on software engineering (ICSE), IEEE, pp 422\u2013431","DOI":"10.1109\/ICSE.2013.6606588"},{"key":"10763_CR16","doi-asserted-by":"crossref","unstructured":"Elnaggar A, Ding W, Jones L, Gibbs T, Feher T, Angerer C, Severini S, Matthes F, Rost B (2021) Codetrans: Towards cracking the language of silicon\u2019s code through self-supervised deep learning and high performance computing. arXiv:2104.02443","DOI":"10.1101\/2020.07.12.199554"},{"key":"10763_CR17","doi-asserted-by":"publisher","unstructured":"Falleri JR, Morandat F, Blanc X, Martinez M, Monperrus M (2014) Fine-grained and accurate source code differencing. In: Proceedings of the 29th ACM\/IEEE international conference on Automated software engineering, pp 313\u2013324. https:\/\/doi.org\/10.1145\/2642937.2642982","DOI":"10.1145\/2642937.2642982"},{"key":"10763_CR18","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, Duan N, Feng X, Gong M, Shou L, Qin B, Liu T, Jiang D et\u00a0al (2020) Codebert: A pre-trained model for programming and natural languages. arXiv:2002.08155","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"issue":"2","key":"10763_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3418461","volume":"30","author":"X Gao","year":"2021","unstructured":"Gao X, Wang B, Duck GJ, Ji R, Xiong Y, Roychoudhury A (2021) Beyond tests: Program vulnerability repair via crash constraint extraction. ACM Trans Softw Eng Methodol (TOSEM) 30(2):1\u201327","journal-title":"ACM Trans Softw Eng Methodol (TOSEM)"},{"key":"10763_CR20","doi-asserted-by":"crossref","unstructured":"Ghanbari A, Marcus A (2022) Patch correctness assessment in automated program repair based on the impact of patches on production and test code","DOI":"10.1145\/3533767.3534368"},{"key":"10763_CR21","doi-asserted-by":"crossref","unstructured":"Gissurarson MP, Applis L, Panichella A, van Deursen A, Sands D (2022) Propr: property-based automatic program repair. In: Proceedings of the 44th international conference on software engineering, pp 1768\u20131780","DOI":"10.1145\/3510003.3510620"},{"key":"10763_CR22","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the thirteenth international conference on artificial intelligence and statistics, pp 249\u2013256. JMLR Workshop and Conference Proceedings"},{"key":"10763_CR23","unstructured":"Glorot X, Bordes A, Bengio Y (2011) Deep sparse rectifier neural networks. In: Proceedings of the fourteenth international conference on artificial intelligence and statistics, pp 315\u2013323. JMLR Workshop and Conference Proceedings"},{"key":"10763_CR24","unstructured":"Guo D, Ren S, Lu S, Feng Z, Tang D, Liu S, Zhou L, Duan N, Svyatkovskiy A, Fu S, Tufano M, Deng SK, Clement CB, Drain D, Sundaresan N, Yin J, Jiang D, Zhou M (2021) Graphcodebert: Pre-training code representations with data flow. In: 9th International conference on learning representations, ICLR 2021, Virtual Event, Austria, May 3-7. OpenReview.net. https:\/\/openreview.net\/forum?id=jLoC4ez43PZ"},{"key":"10763_CR25","doi-asserted-by":"crossref","unstructured":"Henkel J, Lahiri SK, Liblit B, Reps T (2018) Code vectors: Understanding programs through embedded abstracted symbolic traces. In: Proceedings of the 2018 26th ACM joint meeting on european software engineering conference and symposium on the foundations of software engineering, pp 163\u2013174","DOI":"10.1145\/3236024.3236085"},{"key":"10763_CR26","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"10763_CR27","unstructured":"Hindle A, German DM, Holt R (2009) Software process recovery using recovered unified process views. In: 2009 IEEE International conference on software maintenance, IEEE, pp 285\u2013294"},{"key":"10763_CR28","doi-asserted-by":"crossref","unstructured":"Hoang T, Dam HK, Kamei Y, Lo D, Ubayashi N (2019) Deepjit: an end-to-end deep learning framework for just-in-time defect prediction. In: 2019 IEEE\/ACM 16th international conference on mining software repositories (MSR), IEEE, pp 34\u201345","DOI":"10.1109\/MSR.2019.00016"},{"key":"10763_CR29","doi-asserted-by":"crossref","unstructured":"Hoang T, Kang HJ, Lo D, Lawall J (2020) Cc2vec: Distributed representations of code changes. In: Proceedings of the ACM\/IEEE 42nd international conference on software engineering, pp 518\u2013529","DOI":"10.1145\/3377811.3380361"},{"issue":"2","key":"10763_CR30","first-page":"1","volume":"5","author":"M Hossin","year":"2015","unstructured":"Hossin M, Sulaiman MN (2015) A review on evaluation metrics for data classification evaluations. Int J Data Mining Knowl Manag Process 5(2):1","journal-title":"Int J Data Mining Knowl Manag Process"},{"issue":"6","key":"10763_CR31","doi-asserted-by":"publisher","first-page":"1258","DOI":"10.1007\/s11390-020-0496-0","volume":"35","author":"Y Huang","year":"2020","unstructured":"Huang Y, Jia N, Zhou HJ, Chen XP, Zheng ZB, Tang MD (2020) Learning human-written commit messages to document code changes. J Comput Sci Technol 35(6):1258\u20131277","journal-title":"J Comput Sci Technol"},{"issue":"2","key":"10763_CR32","doi-asserted-by":"publisher","first-page":"C399","DOI":"10.1137\/24M1636071","volume":"47","author":"C Jacobsen","year":"2025","unstructured":"Jacobsen C, Zhuang Y, Duraisamy K (2025) Cocogen: Physically consistent and conditioned score-based generative models for forward and inverse problems. SIAM J Sci Comput 47(2):C399\u2013C425","journal-title":"SIAM J Sci Comput"},{"key":"10763_CR33","doi-asserted-by":"crossref","unstructured":"Jiang S, Armaly A, McMillan C (2017) Automatically generating commit messages from diffs using neural machine translation. In: Proceedings of the 32nd IEEE\/ACM international conference on automated software engineering, IEEE, pp 135\u2013146","DOI":"10.1109\/ASE.2017.8115626"},{"key":"10763_CR34","doi-asserted-by":"crossref","unstructured":"Jiang N, Lutellier T, Tan L (2021) Cure: Code-aware neural machine translation for automatic program repair. In: 2021 IEEE\/ACM 43rd International conference on software engineering (ICSE), IEEE, pp 1161\u20131173","DOI":"10.1109\/ICSE43902.2021.00107"},{"issue":"6","key":"10763_CR35","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1109\/TSE.2012.70","volume":"39","author":"Y Kamei","year":"2012","unstructured":"Kamei Y, Shihab E, Adams B, Hassan AE, Mockus A, Sinha A, Ubayashi N (2012) A large-scale empirical study of just-in-time quality assurance. IEEE Trans Softw Eng 39(6):757\u2013773","journal-title":"IEEE Trans Softw Eng"},{"issue":"5","key":"10763_CR36","doi-asserted-by":"publisher","first-page":"2072","DOI":"10.1007\/s10664-015-9400-x","volume":"21","author":"Y Kamei","year":"2016","unstructured":"Kamei Y, Fukushima T, McIntosh S, Yamashita K, Ubayashi N, Hassan AE (2016) Studying just-in-time defect prediction using cross-project models. Empirical Softw Eng 21(5):2072\u20132106","journal-title":"Empirical Softw Eng"},{"key":"10763_CR37","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. arXiv:1412.6980"},{"key":"10763_CR38","unstructured":"Kipf TN, Welling M (2016) Semi-supervised classification with graph convolutional networks. arXiv:1609.02907"},{"key":"10763_CR39","doi-asserted-by":"crossref","unstructured":"Li M, Miao Z, Zhang XP, Xu W (2021) An attention-seq2seq model based on crnn encoding for automatic labanotation generation from motion capture data. In: ICASSP 2021-2021 IEEE international conference on acoustics, speech and signal processing (ICASSP), IEEE, pp 4185\u20134189","DOI":"10.1109\/ICASSP39728.2021.9414976"},{"issue":"3","key":"10763_CR40","first-page":"1","volume":"31","author":"B Lin","year":"2022","unstructured":"Lin B, Wang S, Wen M, Mao X (2022) Context-aware code change embedding for better patch correctness assessment. ACM Trans Softw Eng Methodol (TOSEM) 31(3):1\u201329","journal-title":"ACM Trans Softw Eng Methodol (TOSEM)"},{"key":"10763_CR41","doi-asserted-by":"crossref","unstructured":"Linares-V\u00e1squez M, Cort\u00e9s-Coy LF, Aponte J, Poshyvanyk D (2015) Changescribe: A tool for automatically generating commit messages. In: 2015 IEEE\/ACM 37th IEEE international conference on software engineering, IEEE, vol.\u00a02, pp 709\u2013712","DOI":"10.1109\/ICSE.2015.229"},{"key":"10763_CR42","doi-asserted-by":"crossref","unstructured":"Lin B, Wang S, Liu Z, Liu Y, Xia X, Mao X (2023) Cct5: A code-change-oriented pre-trained model. In: Proceedings of the 31st ACM joint European software engineering conference and symposium on the foundations of software engineering, pp 1509\u20131521","DOI":"10.1145\/3611643.3616339"},{"key":"10763_CR43","unstructured":"Liu S, Gao C, Chen S, Yiu NL, Liu Y (2020c) Atom: Commit message generation based on abstract syntax tree and hybrid ranking. IEEE Trans Softw Eng"},{"key":"10763_CR44","doi-asserted-by":"crossref","unstructured":"Liu Q, Liu Z, Zhu H, Fan H, Du B, Qian Y (2019) Generating commit messages from diffs using pointer-generator network. In: 2019 IEEE\/ACM 16th International conference on mining software repositories (MSR), IEEE, pp 299\u2013309","DOI":"10.1109\/MSR.2019.00056"},{"key":"10763_CR45","doi-asserted-by":"crossref","unstructured":"Liu F, Li G, Wei B, Xia X, Fu Z, Jin Z (2020a) A self-attentional neural architecture for code completion with multi-task learning. In: Proceedings of the 28th international conference on program comprehension, pp 37\u201347","DOI":"10.1145\/3387904.3389261"},{"key":"10763_CR46","doi-asserted-by":"crossref","unstructured":"Liu F, Li G, Zhao Y, Jin Z (2020b) Multi-task learning based pre-trained language model for code completion. In: Proceedings of the 35th IEEE\/ACM international conference on automated software engineering, pp 473\u2013485","DOI":"10.1145\/3324884.3416591"},{"key":"10763_CR47","doi-asserted-by":"publisher","unstructured":"Liu Z, Tang Z, Xia X, Yang X (2023) Ccrep: Learning code change representations via pre-trained code model and query back. In: 45th IEEE\/ACM International conference on software engineering, ICSE 2023, Melbourne, Australia, IEEE, May 14-20, 2023, pp 17\u201329. https:\/\/doi.org\/10.1109\/ICSE48619.2023.00014","DOI":"10.1109\/ICSE48619.2023.00014"},{"key":"10763_CR48","doi-asserted-by":"crossref","unstructured":"Liu Z, Xia X, Hassan AE, Lo D, Xing Z, Wang X (2018) Neural-machine-translation-based commit message generation: how far are we? In: Proceedings of the 33rd ACM\/IEEE international conference on automated software engineering, pp 373\u2013384","DOI":"10.1145\/3238147.3238190"},{"key":"10763_CR49","doi-asserted-by":"crossref","unstructured":"Luo C, Zhan J, Xue X, Wang L, Ren R, Yang Q (2018) Cosine normalization: Using cosine similarity instead of dot product in neural networks. In: International conference on artificial neural networks, Springer, pp 382\u2013391","DOI":"10.1007\/978-3-030-01418-6_38"},{"key":"10763_CR50","doi-asserted-by":"publisher","unstructured":"Networkx (2018) In:\u00a0Alhajj R, Rokne JG (eds.) Encyclopedia of social network analysis and mining, 2nd Edition. Springer. https:\/\/doi.org\/10.1007\/978-1-4939-7131-2_100771","DOI":"10.1007\/978-1-4939-7131-2_100771"},{"key":"10763_CR51","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1016\/j.neucom.2021.05.039","volume":"459","author":"LY Nie","year":"2021","unstructured":"Nie LY, Gao C, Zhong Z, Lam W, Liu Y, Xu Z (2021) Coregen: Contextualized code representation learning for commit message generation. Neurocomputing 459:97\u2013107","journal-title":"Neurocomputing"},{"key":"10763_CR52","doi-asserted-by":"crossref","unstructured":"Niemeyer M, Geiger A (2021) Giraffe: Representing scenes as compositional generative neural feature fields. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11453\u201311464","DOI":"10.1109\/CVPR46437.2021.01129"},{"key":"10763_CR53","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu WJ (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the Association for Computational Linguistics, pp 311\u2013318","DOI":"10.3115\/1073083.1073135"},{"key":"10763_CR54","doi-asserted-by":"publisher","first-page":"5239","DOI":"10.1609\/aaai.v37i4.25654","volume":"37","author":"W Pian","year":"2023","unstructured":"Pian W, Peng H, Tang X, Sun T, Tian H, Habib A, Klein J, Bissyand\u00e9 TF (2023) Metatptrans: A meta learning approach for multilingual code representation learning. Proceedings of the AAAI conference on artificial intelligence 37:5239\u20135247","journal-title":"Proceedings of the AAAI conference on artificial intelligence"},{"key":"10763_CR55","doi-asserted-by":"crossref","unstructured":"Pian W, Peng H, Tang X, Sun T, Tian H, Habib A, Klein J, Bissyand\u00e9 TF (2022) Metatptrans: A meta learning approach for multilingual code representation learning. arXiv:2206.06460","DOI":"10.1609\/aaai.v37i4.25654"},{"key":"10763_CR56","doi-asserted-by":"crossref","unstructured":"Qi Z, Long F, Achour S, Rinard M (2015) An analysis of patch plausibility and correctness for generate-and-validate patch generation systems. In: Proceedings of the 2015 international symposium on software testing and analysis, pp 24\u201336","DOI":"10.1145\/2771783.2771791"},{"key":"10763_CR57","doi-asserted-by":"crossref","unstructured":"Qin L, Liu T, Che W, Kang B, Zhao S, Liu T (2021) A co-interactive transformer for joint slot filling and intent detection. In: ICASSP 2021-2021 IEEE International conference on acoustics, speech and signal processing (ICASSP), IEEE, pp 8193\u20138197","DOI":"10.1109\/ICASSP39728.2021.9414110"},{"key":"10763_CR58","unstructured":"Rouge LC (2004) A package for automatic evaluation of summaries. In: Proceedings of workshop on text summarization of ACL, Spain"},{"key":"10763_CR59","doi-asserted-by":"crossref","unstructured":"See A, Liu PJ, Manning CD (2017) Get to the point: Summarization with pointer-generator networks. arXiv:1704.04368","DOI":"10.18653\/v1\/P17-1099"},{"key":"10763_CR60","doi-asserted-by":"crossref","unstructured":"Shariffdeen R, Noller Y, Grunske L, Roychoudhury A (2021) Concolic program repair. In: Proceedings of the 42nd ACM SIGPLAN international conference on programming language design and implementation, pp 390\u2013405","DOI":"10.1145\/3453483.3454051"},{"key":"10763_CR61","doi-asserted-by":"publisher","unstructured":"Shaw P, Uszkoreit J, Vaswani, A (2018) Self-attention with relative position representations. In: Walker MA,\u00a0Ji H,\u00a0Stent A (eds.) Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT, New Orleans, Louisiana, USA, June 1\u20136, 2018, Volume 2 (Short Papers), pp 464\u2013468. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/n18-2074","DOI":"10.18653\/v1\/n18-2074"},{"key":"10763_CR62","doi-asserted-by":"publisher","unstructured":"Shi E, Wang Y, Du L, Zhang H, Han S, Zhang D, Sun H (2021) CAST: enhancing code summarization with hierarchical splitting and reconstruction of abstract syntax trees. In:\u00a0Moens M,\u00a0Huang X,\u00a0Specia L, Yih SW (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event \/ Punta Cana, Dominican Republic, 7-11 November, pp 4053\u20134062. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.332","DOI":"10.18653\/v1\/2021.emnlp-main.332"},{"key":"10763_CR63","doi-asserted-by":"crossref","unstructured":"Svyatkovskiy A, Zhao Y, Fu S, Sundaresan N (2019) Pythia: Ai-assisted code completion system. In: Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery & data mining, pp 2727\u20132735","DOI":"10.1145\/3292500.3330699"},{"key":"10763_CR64","doi-asserted-by":"crossref","unstructured":"Tang X, Zhu R, Sun T, Wang S (2021) Moto: Enhancing embedding with multiple joint factors for chinese text classification. In: 2020 25th International conference on pattern recognition (ICPR), IEEE, pp 2882\u20132888","DOI":"10.1109\/ICPR48806.2021.9412501"},{"key":"10763_CR65","doi-asserted-by":"crossref","unstructured":"Tao Y, Kim S, Kim M et\u00a0al (2012) How do software engineers understand code changes? an exploratory study in industry. In: Proceedings of the 20th international symposium on the foundations of software engineering, pp 1\u201310","DOI":"10.1145\/2393596.2393656"},{"key":"10763_CR66","unstructured":"Thunes C (2013) javalang: pure python java parser and tools"},{"key":"10763_CR67","doi-asserted-by":"crossref","unstructured":"Tian H, Li Y, Pian W, Kabore AK, Liu K, Habib A, Klein J, Bissyand\u00e9 TF (2022a) Predicting patch correctness based on the similarity of failing test cases. ACM Trans Softw Eng Methodol","DOI":"10.1145\/3511096"},{"key":"10763_CR68","doi-asserted-by":"crossref","unstructured":"Tian H, Liu K, Kabor\u00e9 AK, Koyuncu A, Li L, Klein J, Bissyand\u00e9 TF (2020) Evaluating representation learning of code changes for predicting patch correctness in program repair. In: 2020 35th IEEE\/ACM International conference on automated software engineering (ASE), IEEE, pp 981\u2013992","DOI":"10.1145\/3324884.3416532"},{"key":"10763_CR69","doi-asserted-by":"crossref","unstructured":"Tian H, Liu K, Li Y, Kabor\u00e9 AK, Koyuncu A, Habib A, Li L, Wen J, Klein J, Bissyand\u00e9 TF (2022b) The best of both worlds: Combining learned embeddings with engineered features for accurate prediction of correct patches. arXiv:2203.08912","DOI":"10.1145\/3576039"},{"key":"10763_CR70","doi-asserted-by":"crossref","unstructured":"Tian H, Tang X, Habib A, Wang S, Liu K, Xia X, Klein J, Bissyand\u00e9 TF (2022c) Is this change the answer to that problem? correlating descriptions of bug and code changes for evaluating patch correctness. arXiv:2208.04125","DOI":"10.1145\/3551349.3556914"},{"key":"10763_CR71","unstructured":"Van\u00a0der Maaten L, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9(11)"},{"key":"10763_CR72","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"10763_CR73","unstructured":"Vijayakumar AK, Cogswell M, Selvaraju RR, Sun Q, Lee S, Crandall D, Batra D (2016) Diverse beam search: Decoding diverse solutions from neural sequence models. arXiv:1610.02424"},{"key":"10763_CR74","unstructured":"Wang K, Singh R, Su Z (2017) Dynamic neural program embedding for program repair. arXiv:1711.07163"},{"key":"10763_CR75","doi-asserted-by":"crossref","unstructured":"Wang S, Tang D, Zhang L, Li H, Han D (2022a) Hienet: Bidirectional hierarchy framework for automated icd coding. In: International conference on database systems for advanced applications, Springer, pp 523\u2013539","DOI":"10.1007\/978-3-031-00126-0_38"},{"key":"10763_CR76","doi-asserted-by":"publisher","unstructured":"Wang S, Tang D, Zhang L, Li H, Han D (2022b) Hienet: Bidirectional hierarchy framework for automated ICD coding. In:\u00a0Bhattacharya A,\u00a0Lee J,\u00a0Li M,\u00a0Agrawal D, Reddy PK, Mohania MK,\u00a0Mondal A,\u00a0Goyal V, Kiran RU (eds.) Database Systems for Advanced Applications - 27th International Conference, DASFAA 2022, Virtual Event, April 11-14, 2022, Proceedings, Part II, Lecture Notes in Computer Science, Springer, vol. 13246, pp 523\u2013539. https:\/\/doi.org\/10.1007\/978-3-031-00126-0_38","DOI":"10.1007\/978-3-031-00126-0_38"},{"key":"10763_CR77","doi-asserted-by":"crossref","unstructured":"Wang X, Wang S, Feng P, Sun K, Jajodia S (2021b) Patchdb: A large-scale security patch dataset. In: 2021 51st Annual IEEE\/IFIP international conference on dependable systems and networks (DSN), IEEE, pp 149\u2013160","DOI":"10.1109\/DSN48987.2021.00030"},{"key":"10763_CR78","doi-asserted-by":"publisher","unstructured":"Wang Y, Wang W, Joty SR, Hoi SCH (2021d) Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. In:\u00a0Moens M,\u00a0Huang X,\u00a0Specia L, Yih SW (eds.) Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021, Virtual Event \/ Punta Cana, Dominican Republic, 7-11 November, pp 8696\u20138708. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.685","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"10763_CR79","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang W, Joty S, Hoi SC (2021c) Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv:2109.00859","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"10763_CR80","doi-asserted-by":"crossref","unstructured":"Wang S, Wang X, Sun K, Jajodia S, Wang H, Li Q (2023) Graphspd: Graph-based security patch detection with enriched code semantics. In: 2023 IEEE Symposium on security and privacy (SP), IEEE, pp 2409\u20132426","DOI":"10.1109\/SP46215.2023.10179479"},{"key":"10763_CR81","doi-asserted-by":"crossref","unstructured":"Wang H, Xia X, Lo D, He Q, Wang X, Grundy J (2021a) Context-aware retrieval-based deep commit message generation. ACM Trans Softw Eng Methodol (TOSEM) 30(4):1\u201330","DOI":"10.1145\/3464689"},{"key":"10763_CR82","unstructured":"Wang M, Zheng D, Ye Z, Gan Q, Li M, Song X, Zhou J, Ma C, Yu L, Gai Y et\u00a0al (2019) Deep graph library: A graph-centric, highly-performant package for graph neural networks. arXiv:1909.01315"},{"key":"10763_CR83","doi-asserted-by":"crossref","unstructured":"Wilcoxon F (1992) Individual comparisons by ranking methods. In: Breakthroughs in statistics: Methodology and distribution, Springer, pp 196\u2013202","DOI":"10.1007\/978-1-4612-4380-9_16"},{"key":"10763_CR84","doi-asserted-by":"crossref","unstructured":"Xu S, Yao Y, Xu F, Gu T, Tong H, Lu J (2019) Commit message generation for source code changes. In: IJCAI","DOI":"10.24963\/ijcai.2019\/552"},{"key":"10763_CR85","doi-asserted-by":"crossref","unstructured":"Yefet N, Alon U, Yahav E (2020) Adversarial examples for models of code. Proceed ACM Program Lang 4(OOPSLA):1\u201330","DOI":"10.1145\/3428230"},{"key":"10763_CR86","unstructured":"Yin P, Neubig G, Allamanis M, Brockschmidt M, Gaunt AL (2019) Learning to represent edits. In: International conference on learning representations. https:\/\/openreview.net\/forum?id=BJl6AjC5F7"},{"key":"10763_CR87","doi-asserted-by":"crossref","unstructured":"Zhang F, Chen B, Zhao Y, Peng X (2023) Slice-based code change representation learning. In: 2023 IEEE International conference on software analysis, evolution and reengineering (SANER), IEEE, pp 319\u2013330","DOI":"10.1109\/SANER56733.2023.00038"},{"key":"10763_CR88","doi-asserted-by":"crossref","unstructured":"Zhang Z, Han X, Liu Z, Jiang X, Sun M, Liu Q (2019c) Ernie: Enhanced language representation with informative entities. arXiv:1905.07129","DOI":"10.18653\/v1\/P19-1139"},{"key":"10763_CR89","doi-asserted-by":"publisher","unstructured":"Zhang Z, Han X, Liu Z, Jiang X, Sun M, Liu Q (2019d) ERNIE: Enhanced language representation with informative entities. In: Proceedings of the 57th annual meeting of the association for computational linguistics, pp 1441\u20131451. Association for Computational Linguistics, Florence, Italy. https:\/\/doi.org\/10.18653\/v1\/P19-1139","DOI":"10.18653\/v1\/P19-1139"},{"key":"10763_CR90","doi-asserted-by":"crossref","unstructured":"Zhang S, Tong H, Xu J, Maciejewski R (2019b) Graph convolutional networks: a comprehensive review. Computat Soc Netw 6(1):1\u201323","DOI":"10.1186\/s40649-019-0069-y"},{"key":"10763_CR91","unstructured":"Zhang Y, Wallace B (2015) A sensitivity analysis of (and practitioners\u2019 guide to) convolutional neural networks for sentence classification. arXiv:1510.03820"},{"key":"10763_CR92","doi-asserted-by":"crossref","unstructured":"Zhang J, Wang X, Zhang H, Sun H, Wang K, Liu X (2019a) A novel neural source code representation based on abstract syntax tree. In: 2019 IEEE\/ACM 41st international conference on software engineering (ICSE), IEEE, pp 783\u2013794","DOI":"10.1109\/ICSE.2019.00086"},{"issue":"1","key":"10763_CR93","first-page":"1","volume":"31","author":"Y Zhou","year":"2021","unstructured":"Zhou Y, Siow JK, Wang C, Liu S, Liu Y (2021) Spi: Automated identification of security patches via commits. ACM Trans Softw Eng Methodol (TOSEM) 31(1):1\u201327","journal-title":"ACM Trans Softw Eng Methodol (TOSEM)"},{"key":"10763_CR94","doi-asserted-by":"crossref","unstructured":"Zhou X, Xu B, Han D, Yang Z, He J, Lo D (2023) Ccbert: Self-supervised code change representation learning. In: 2023 IEEE International conference on software maintenance and evolution (ICSME), IEEE, pp 182\u2013193","DOI":"10.1109\/ICSME58846.2023.00028"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10763-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-025-10763-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10763-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T14:36:34Z","timestamp":1774881394000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-025-10763-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,17]]},"references-count":94,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,5]]}},"alternative-id":["10763"],"URL":"https:\/\/doi.org\/10.1007\/s10664-025-10763-6","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,17]]},"assertion":[{"value":"2 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest Statement"}},{"value":"This study does not involve human participants, animals, or other entities requiring ethical oversight. Consequently, no ethical approval was required.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"No human participants were involved in this study, and informed consent was therefore not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}}],"article-number":"50"}}