{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T15:08:16Z","timestamp":1774883296356,"version":"3.50.1"},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T00:00:00Z","timestamp":1770681600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T00:00:00Z","timestamp":1770681600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001866","name":"Fonds National de la Recherche Luxembourg","doi-asserted-by":"publisher","award":["CORE project under Grant C22\/IS\/17426831\/MeMoRIA"],"award-info":[{"award-number":["CORE project under Grant C22\/IS\/17426831\/MeMoRIA"]}],"id":[{"id":"10.13039\/501100001866","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1007\/s10664-026-10809-3","type":"journal-article","created":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T11:10:43Z","timestamp":1770721843000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GenCode: A generic data augmentation framework for boosting deep learning-based code understanding"],"prefix":"10.1007","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7742-0264","authenticated-orcid":false,"given":"Zeming","family":"Dong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8251-1669","authenticated-orcid":false,"given":"Qiang","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofei","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Maxime","family":"Cordy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mike","family":"Papadakis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yves\u00a0Le","family":"Traon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianjun","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,10]]},"reference":[{"issue":"4","key":"10809_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3212695","volume":"51","author":"M Allamanis","year":"2018","unstructured":"Allamanis M, Barr ET, Devanbu P, Sutton C (2018) A survey of machine learning for big code and naturalness. ACM Comput Surv (CSUR) 51(4):1\u201337","journal-title":"ACM Comput Surv (CSUR)"},{"key":"10809_CR2","unstructured":"Allamanis M, Jackson-Flux HR, Brockschmidt M (2021) Self-supervised bug detection and repair. In: Beygelzimer A, Dauphin Y, Liang P, Vaughan JW (eds) Advances in neural information processing systems"},{"key":"10809_CR3","unstructured":"Alon U, Brody S, Levy O, Yahav E (2019a) code2seq: generating sequences from structured representations of code. In: International conference on learning representations"},{"key":"10809_CR4","doi-asserted-by":"crossref","unstructured":"Alon U, Zilberstein M, Levy O, Yahav E (2019b) code2vec: learning distributed representations of code. Proc ACM Progr Lang 3(POPL):1\u201329","DOI":"10.1145\/3290353"},{"key":"10809_CR5","unstructured":"Ash JT, Zhang C, Krishnamurthy A, Langford J, Agarwal A (2019) Deep batch active learning by diverse uncertain gradient lower bounds. arXiv preprint arXiv:1906.03671"},{"key":"10809_CR6","unstructured":"Bielik P. and Vechev M (2020) Adversarial robustness for code. In III HD, Singh A (eds) In: Proceedings of the 37th international conference on machine learning volume 119 of proceedings of machine learning research, PMLR, pp 896\u2013907"},{"key":"10809_CR7","doi-asserted-by":"crossref","unstructured":"Bui ND, Yu Y, Jiang L (2021) Self-supervised contrastive learning for code retrieval and summarization via semantic-preserving transformations. In: Proceedings of the 44th international ACM SIGIR conference on research and development in information retrieval SIGIR \u201921, New York NY USA. Association for Computing Machinery, pp 511\u2013521","DOI":"10.1145\/3404835.3462840"},{"key":"10809_CR8","doi-asserted-by":"crossref","unstructured":"Cubuk ED, Zoph B, Shlens J, Le QV (2020) Randaugment: Practical automated data augmentation with a reduced search space. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 702\u2013703","DOI":"10.1109\/CVPRW50498.2020.00359"},{"key":"10809_CR9","doi-asserted-by":"crossref","unstructured":"de\u00a0Paula\u00a0Rodrigues GE, Braga AM, Dahab R (2023) Detecting cryptography misuses with machine learning: Graph embeddings transfer learning and data augmentation in source code related tasks. IEEE Trans Reliab","DOI":"10.1109\/TR.2023.3237849"},{"key":"10809_CR10","unstructured":"Dinella E, Dai H, Li Z, Naik M, Song L, Wang K (2020) Hoppity: Learning graph transformations to detect and fix bugs in programs. In: international conference on learning representations"},{"key":"10809_CR11","doi-asserted-by":"publisher","first-page":"1679","DOI":"10.18653\/v1\/2024.findings-acl.97","volume":"2024","author":"B Ding","year":"2024","unstructured":"Ding B, Qin C, Zhao R, Luo T, Li X, Chen G, Xia W, Hu J, Tuan LA, Joty S (2024) Data augmentation using llms: Data perspectives learning paradigms and challenges. In findings of the association for computational linguistics ACL 2024:1679\u20131705","journal-title":"In findings of the association for computational linguistics ACL"},{"key":"10809_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111328","volume":"285","author":"Z Dong","year":"2024","unstructured":"Dong Z, Hu Q, Zhang Z, Zhao J (2024) On the effectiveness of graph data augmentation for source code learning. Knowl-Based Syst 285:111328","journal-title":"Knowl-Based Syst"},{"issue":"3","key":"10809_CR13","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1007\/s10664-025-10624-2","volume":"30","author":"Z Dong","year":"2025","unstructured":"Dong Z, Hu Q, Guo Y, Zhang Z, Cordy M, Papadakis M, Le Traon Y, Zhao J (2025) Boosting source code learning with text-oriented data augmentation: An empirical study. Empir Softw Eng 30(3):68","journal-title":"Empir Softw Eng"},{"key":"10809_CR14","doi-asserted-by":"crossref","unstructured":"Dong Z, Hu Q, Zhang Z, Guo Y, Cordy M, Papadakis M, Le\u00a0Traon Y, Zhao J (2024a) On the effectiveness of hybrid pooling in mixup-based graph learning for language processing. J Syst Softw 112139","DOI":"10.1016\/j.jss.2024.112139"},{"key":"10809_CR15","doi-asserted-by":"crossref","unstructured":"Dong Z, Hu Q, Zhang Z, Zhao J (2024b) On the effectiveness of graph data augmentation for source code learning. Knowl-Based Syst 285:111328","DOI":"10.1016\/j.knosys.2023.111328"},{"key":"10809_CR16","doi-asserted-by":"crossref","unstructured":"Feng SY, Gangal V, Wei J, Chandar S, Vosoughi S, Mitamura T, Hovy E (2021) A survey of data augmentation approaches for NLP. In: Findings of the association for computational linguistics: ACL-IJCNLP 2021, Online. Association for Computational Linguistics, pp 968\u2013988","DOI":"10.18653\/v1\/2021.findings-acl.84"},{"key":"10809_CR17","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, Duan N, Feng X, Gong M, Shou L, Qin B, Liu T, Jiang D, Zhou M (2020) Codebert: A pre-trained model for programming and natural languages, pp 1536\u20131547","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"10809_CR18","doi-asserted-by":"crossref","unstructured":"Gao X, Saha RK, Prasad MR, Roychoudhury A (2020) Fuzz testing based data augmentation to improve robustness of deep neural networks. In: Proceedings of the ACM\/IEEE 42nd international conference on software engineering ICSE \u201920, New York NY USA. Association for Computing Machinery, pp 1147\u20131158","DOI":"10.1145\/3377811.3380415"},{"key":"10809_CR19","unstructured":"Goodfellow IJ, Shlens J, Szegedy C (2015) Explaining and harnessing adversarial examples. In: Bengio Y, LeCun Y (eds) 3rd international conference on learning representations (ICLR)"},{"key":"10809_CR20","unstructured":"Guo D, Ren S, Lu S, Feng Z, Tang D, Liu S, Zhou L, Duan N, Svyatkovskiy A, Fu S et\u00a0al (2020) Graphcodebert: Pre-training code representations with data flow. arXiv preprint arXiv:2009.08366"},{"key":"10809_CR21","unstructured":"Guo H, Mao Y, Zhang R (2019) Augmenting data with mixup for sentence classification: An empirical study"},{"key":"10809_CR22","doi-asserted-by":"crossref","unstructured":"Hu Q, Guo Y, Xie X, Cordy M, Ma L, Papadakis M, Le\u00a0Traon Y (2024) Active code learning: Benchmarking sample-efficient training of code models. IEEE Trans Softw Eng","DOI":"10.1109\/TSE.2024.3376964"},{"key":"10809_CR23","doi-asserted-by":"crossref","unstructured":"Hu Y, Ahmed UZ, Mechtaev S, Leong B, Roychoudhury A (2019) Re-factoring based program repair applied to programming assignments. In 34th IEEE\/ACM international conference on automated software engineering (ASE), IEEE, pp 388\u2013398","DOI":"10.1109\/ASE.2019.00044"},{"key":"10809_CR24","unstructured":"Hui B, Yang J, Cui Z, Yang J, Liu D, Zhang L, Liu T, Zhang J, Yu B, Lu K et\u00a0al (2024) Qwen2. 5-coder technical report. arXiv preprint arXiv:2409.12186"},{"key":"10809_CR25","doi-asserted-by":"crossref","unstructured":"Khajezade M, Wu JJ, Fard FH, Rodr\u00edguez-P\u00e9rez G, Shehata MS (2024) Investigating the efficacy of large language models for code clone detection. In: Proceedings of the 32nd IEEE\/ACM international conference on program comprehension, pp 161\u2013165","DOI":"10.1145\/3643916.3645030"},{"key":"10809_CR26","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"},{"key":"10809_CR27","first-page":"1885","volume-title":"Understanding black-box predictions via influence functions","author":"PW Koh","year":"2017","unstructured":"Koh PW, Liang P (2017) Understanding black-box predictions via influence functions. In: International conference on machine learning, PMLR, pp 1885\u20131894"},{"key":"10809_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2020.110610","volume":"167","author":"G Lacerda","year":"2020","unstructured":"Lacerda G, Petrillo F, Pimenta M, Gu\u00e9h\u00e9neuc YG (2020) Code smells and refactoring: A tertiary systematic review of challenges and observations. J Syst Softw 167:110610","journal-title":"J Syst Softw"},{"key":"10809_CR29","doi-asserted-by":"crossref","unstructured":"Li H, Miao C, Leung C, Huang Y, Huang Y, Zhang H, Wang Y (2022) Exploring representation-level augmentation for code search. In EMNLP, pp 4924\u20134936","DOI":"10.18653\/v1\/2022.emnlp-main.327"},{"key":"10809_CR30","unstructured":"Lozhkov A, Li R, Allal LB, Cassano F, Lamy-Poirier J, Tazi N, Tang A, Pykhtar D, Liu J, Wei Y et\u00a0al (2024) Starcoder 2 and the stack v2: The next generation. arXiv preprint arXiv:2402.19173"},{"key":"10809_CR31","unstructured":"Lu S, Guo D, Ren S, Huang J, Svyatkovskiy A, Blanco A, Clement C,Drain D, Jiang D, Tang D et\u00a0al (2021) Codexglue: A machine learning benchmark dataset for code understanding and generation. In: 35fth conference on neural information processing systems (NeurIPS)"},{"issue":"3","key":"10809_CR32","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/0098-3004(93)90090-R","volume":"19","author":"A Ma\u0107kiewicz","year":"1993","unstructured":"Ma\u0107kiewicz A, Ratajczak W (1993) Principal components analysis (PCA). Comput Geosci 19(3):303\u2013342","journal-title":"Comput Geosci"},{"key":"10809_CR33","doi-asserted-by":"crossref","unstructured":"Ma\u0107kiewicz A, Ratajczak W (1993) Principal components analysis (PCA). Comput Geosci 19(3):303\u2013342","DOI":"10.1016\/0098-3004(93)90090-R"},{"key":"10809_CR34","doi-asserted-by":"crossref","unstructured":"Marivate V, Sefara T (2020) Improving short text classification through global augmentation methods. In: machine learning and knowledge extraction: 4th IFIP TC 5 TC 12 WG 8.4 WG 8.9 WG 12.9 International cross-domain conference CD-MAKE 2020 Dublin Ireland August 25\u201328 2020 Proceedings 4, Springer, pp 385\u2013399","DOI":"10.1007\/978-3-030-57321-8_21"},{"key":"10809_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2020.106378","volume":"129","author":"Q Mi","year":"2021","unstructured":"Mi Q, Xiao Y, Cai Z, Jia X (2021) The effectiveness of data augmentation in code readability classification. Inf Softw Technol 129:106378","journal-title":"Inf Softw Technol"},{"key":"10809_CR36","doi-asserted-by":"crossref","unstructured":"Park S, Kim Y, Han YS (2023) Contrastive learning with keyword-based data augmentation for code search and code question answering. In: Vlachos A, Augenstein I (eds) Proceedings of the 17th conference of the european chapter of the association for computational linguistics, Dubrovnik Croatia. Association for Computational Linguistics, pp 3609\u20133619","DOI":"10.18653\/v1\/2023.eacl-main.262"},{"key":"10809_CR37","doi-asserted-by":"crossref","unstructured":"Pour MV, Li Z, Ma L, Hemmati H (2021) A search-based testing framework for deep neural networks of source code embedding. In 14th IEEE conference on software testing verification and validation (ICST), Los Alamitos CA USA. IEEE Computer Society, pp 36\u201346","DOI":"10.1109\/ICST49551.2021.00016"},{"key":"10809_CR38","unstructured":"Puri R, Kung DS, Janssen G, Zhang W, Domeniconi G, Zolotov V, Dolby J, Chen J, Choudhury M, Decker L et\u00a0al (2021) Codenet: A large-scale ai for code dataset for learning a diversity of coding tasks"},{"key":"10809_CR39","first-page":"29935","volume":"34","author":"SA Rebuffi","year":"2021","unstructured":"Rebuffi SA, Gowal S, Calian DA, Stimberg F, Wiles O, Mann TA (2021) Data augmentation can improve robustness. Adv Neural Inf Process Syst 34:29935\u201329948","journal-title":"Adv Neural Inf Process Syst"},{"key":"10809_CR40","doi-asserted-by":"crossref","unstructured":"Ren S, Zhang J, Li L, Sun X, Zhou J (2021) Text AutoAugment: Learning compositional augmentation policy for text classification. In: Proceedings of the 2021 conference on empirical methods in natural language processing","DOI":"10.18653\/v1\/2021.emnlp-main.711"},{"key":"10809_CR41","doi-asserted-by":"crossref","unstructured":"Ribeiro MT, Singh S, Guestrin C (2016) \" why should i trust you?\" explaining the predictions of any classifier. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp 1135\u20131144","DOI":"10.1145\/2939672.2939778"},{"issue":"5","key":"10809_CR42","doi-asserted-by":"publisher","first-page":"1763","DOI":"10.1213\/ANE.0000000000002864","volume":"126","author":"P Schober","year":"2018","unstructured":"Schober P, Boer C, Schwarte LA (2018) Correlation coefficients: appropriate use and interpretation. Anesth Analg 126(5):1763\u20131768","journal-title":"Anesth Analg"},{"key":"10809_CR43","doi-asserted-by":"crossref","unstructured":"Sun Z, Li L, Liu Y, Du X (2022) On the importance of building high-quality training datasets for neural code search. In: Proceedings of the 44th international conference on software engineering ICSE \u201922, New York NY USA. Association for Computing Machinery, pp 1609\u20131620","DOI":"10.1145\/3510003.3510160"},{"key":"10809_CR44","doi-asserted-by":"crossref","unstructured":"Svajlenko J, Islam JF, Keivanloo I, Roy CK, Mia MM (2014) Towards a big data curated benchmark of inter-project code clones. In: 2014 IEEE international conference on software maintenance and evolution, IEEE, pp 476\u2013480","DOI":"10.1109\/ICSME.2014.77"},{"key":"10809_CR45","first-page":"6438","volume-title":"Manifold mixup: better representations by interpolating hidden states","author":"V Verma","year":"2019","unstructured":"Verma V, Lamb A, Beckham C, Najafi A, Mitliagkas I, Lopez-Paz D, Bengio Y (2019) Manifold mixup: better representations by interpolating hidden states. In International conference on machine learning, PMLR, pp 6438\u20136447"},{"key":"10809_CR46","doi-asserted-by":"crossref","unstructured":"Wang D, Jia Z, Li S, Yu Y, Xiong Y, Dong W, Liao X (2022) Bridging pre-trained models and downstream tasks for source code understanding. In: Proceedings of the 44th international conference on software engineering, New York NY USA. Association for Computing Machinery, pp 287\u2013298","DOI":"10.1145\/3510003.3510062"},{"key":"10809_CR47","doi-asserted-by":"crossref","unstructured":"Wang W, Li G, Ma B, Xia X, Jin Z (2020) Detecting code clones with graph neural network and flow-augmented abstract syntax tree. In: 2020 IEEE 27th international conference on software analysis, evolution and reengineering (SANER), IEEE, pp 261\u2013271","DOI":"10.1109\/SANER48275.2020.9054857"},{"key":"10809_CR48","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang W, Joty S, Hoi SC (2021) CodeT5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. In: Proceedings of the 2021 conference on empirical methods in natural language processing, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics, pp 8696\u20138708","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"10809_CR49","doi-asserted-by":"crossref","unstructured":"Wei J, Zou K (2019) Eda: easy data augmentation techniques for boosting performance on text classification tasks. In: Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP), Hong Kong, China. Association for Computational Linguistics, pp 6382\u20136388","DOI":"10.18653\/v1\/D19-1670"},{"key":"10809_CR50","unstructured":"Wu X, Jang U, Chen J, Chen L, Jha S (2018) Reinforcing adversarial robustness using model confidence induced by adversarial training. In: Dy J, Krause A (eds) In: Proceedings of the 35th international conference on machine learning volume\u00a080 of Proceedings of machine learning research, PMLR, pp 5334\u20135342"},{"key":"10809_CR51","unstructured":"Xie Q, Dai Z, Hovy E, Luong MT, Le QV (2020) Unsupervised data augmentation for consistency training. In: Proceedings of the 34th international conference on neural information processing systems NIPS\u201920 Red Hook NY USA. Curran Associates Inc"},{"key":"10809_CR52","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2022.111577","volume":"197","author":"G Yang","year":"2023","unstructured":"Yang G, Zhou Y, Chen X, Zhang X, Han T, Chen T (2023) Exploitgen: Template-augmented exploit code generation based on codebert. J Syst Softw 197:111577","journal-title":"J Syst Softw"},{"issue":"7","key":"10809_CR53","doi-asserted-by":"publisher","first-page":"2805","DOI":"10.1007\/s10115-023-01853-2","volume":"65","author":"Z Yang","year":"2023","unstructured":"Yang Z, Sinnott RO, Bailey J, Ke Q (2023) A survey of automated data augmentation algorithms for deep learning-based image classification tasks. Knowl Inf Syst 65(7):2805\u20132861","journal-title":"Knowl Inf Syst"},{"key":"10809_CR54","doi-asserted-by":"crossref","unstructured":"Yang Z, Sinnott RO, Bailey J, Ke Q (2023b) A survey of automated data augmentation algorithms for deep learning-based image classification tasks. Knowl Inf Syst 65(7):2805\u20132861","DOI":"10.1007\/s10115-023-01853-2"},{"key":"10809_CR55","unstructured":"Yasunaga M, Liang P (2020) Graph-based self-supervised program repair from diagnostic feedback. In: Proceedings of the 37th international conference on machine learning ICML\u201920. JMLR.org"},{"key":"10809_CR56","doi-asserted-by":"crossref","unstructured":"Yefet N, Alon U, Yahav E (2020) Adversarial examples for models of code. Proc ACM Progr Lang 4(OOPSLA):1\u201330","DOI":"10.1145\/3428230"},{"key":"10809_CR57","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2022.111304","volume":"190","author":"S Yu","year":"2022","unstructured":"Yu S, Wang T, Wang J (2022) Data augmentation by program transformation. J Syst Softw 190:111304","journal-title":"J Syst Softw"},{"key":"10809_CR58","doi-asserted-by":"crossref","unstructured":"Yun S, Han D, Oh SJ, Chun S, Choe J, Yoo Y (2019) Cutmix: regularization strategy to train strong classifiers with localizable features. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6023\u20136032","DOI":"10.1109\/ICCV.2019.00612"},{"key":"10809_CR59","doi-asserted-by":"publisher","first-page":"1169","DOI":"10.1609\/aaai.v34i01.5469","volume":"34","author":"H Zhang","year":"2020","unstructured":"Zhang H, Li Z, Li G, Ma L, Liu Y, Jin Z (2020) Generating adversarial examples for holding robustness of source code processing models. In Proceedings of the AAAI conference on artificial intelligence 34:1169\u20131176","journal-title":"In Proceedings of the AAAI conference on artificial intelligence"},{"key":"10809_CR60","doi-asserted-by":"crossref","unstructured":"Zhang H, Li Z, Li G, Ma L, Liu Y, Jin Z (2020) Generating adversarial examples for holding robustness of source code processing models. In: Proceedings of the AAAI conference on artificial intelligence, vol\u00a034, pp 1169\u20131176","DOI":"10.1609\/aaai.v34i01.5469"},{"key":"10809_CR61","doi-asserted-by":"crossref","unstructured":"Zhang X, Zhou Y, Han T, Chen T (2021) Training deep code comment generation models via data augmentation. In: 12th Asia-Pacific symposium on internetware internetware \u201920, New York NY USA. Association for Computing Machinery, pp 185\u2013188","DOI":"10.1145\/3457913.3457937"},{"key":"10809_CR62","unstructured":"Zhou Y, Liu S, Siow J, Du X, Liu Y (2019) Devign: effective vulnerability identification by learning comprehensive program semantics via graph neural networks. In: Proceedings of the 33rd international conference on neural information processing systems Red Hook NY USA. Association for Computing Machinery"},{"key":"10809_CR63","unstructured":"Zhuo TY, Yang Z, Sun Z, Wang Y, Li L, Du X, Xing Z, Lo D (2023) Data augmentation approaches for source code models: A survey. arXiv preprint arXiv:2305.19915"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-026-10809-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-026-10809-3","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-026-10809-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T14:36:37Z","timestamp":1774881397000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-026-10809-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,10]]},"references-count":63,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,5]]}},"alternative-id":["10809"],"URL":"https:\/\/doi.org\/10.1007\/s10664-026-10809-3","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,10]]},"assertion":[{"value":"24 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This work is supported by the Luxembourg National Research Fund (FNR) through the CORE project under Grant C22\/IS\/17426831\/MeMoRIA.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Funding"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"Not applicable.","order":6,"name":"Ethics","group":{"name":"EthicsHeading","label":"Clinical Trial Number"}}],"article-number":"72"}}