{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,13]],"date-time":"2025-09-13T15:47:14Z","timestamp":1757778434408,"version":"3.41.0"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T00:00:00Z","timestamp":1748044800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T00:00:00Z","timestamp":1748044800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["N2317005"],"award-info":[{"award-number":["N2317005"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62302086"],"award-info":[{"award-number":["62302086"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07348-x","type":"journal-article","created":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T18:36:05Z","timestamp":1748111765000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Learning code better through structural information of data flow"],"prefix":"10.1007","volume":"81","author":[{"given":"Zhe","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yuming","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Tianzhe","family":"Jiao","sequence":"additional","affiliation":[]},{"given":"Lili","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Chaopeng","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Song","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,24]]},"reference":[{"key":"7348_CR1","unstructured":"Guo D, Ren S, Lu S, Feng Z, Tang D, Shujie L, Zhou L, Duan N, Svyatkovskiy A, Fu S, et al (2021) Graphcodebert: pre-training code representations with data flow. In: International Conference on Learning Representations"},{"key":"7348_CR2","doi-asserted-by":"crossref","unstructured":"Ding Y, Steenhoek B, Pei K, Kaiser G, Le W, Ray B (2024) Traced: execution-aware pre-training for source code. In: Proceedings of the 46th IEEE\/ACM International Conference on Software Engineering, pp 1\u201312","DOI":"10.1145\/3597503.3608140"},{"issue":"3","key":"7348_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3641850","volume":"23","author":"Z Bi","year":"2024","unstructured":"Bi Z, Chen J, Jiang Y, Xiong F, Guo W, Chen H, Zhang N (2024) Codekgc: code language model for generative knowledge graph construction. ACM Trans Asian Low-Resour Lang Inf Process 23(3):1\u201316","journal-title":"ACM Trans Asian Low-Resour Lang Inf Process"},{"key":"7348_CR4","doi-asserted-by":"publisher","first-page":"21048","DOI":"10.1007\/s11227-024-06265-9","volume":"80","author":"NAA Khleel","year":"2024","unstructured":"Khleel NAA, Neh\u00e9z K (2024) Improving accuracy of code smells detection using machine learning with data balancing techniques. J Supercomput 80:21048","journal-title":"J Supercomput"},{"issue":"1","key":"7348_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3522674","volume":"32","author":"S Gao","year":"2023","unstructured":"Gao S, Gao C, He Y, Zeng J, Nie L, Xia X, Lyu M (2023) Code structure-guided transformer for source code summarization. ACM Trans Softw Eng Methodol 32(1):1\u201332","journal-title":"ACM Trans Softw Eng Methodol"},{"key":"7348_CR6","doi-asserted-by":"crossref","unstructured":"Du Y, Yu Z (2023) Pre-training code representation with semantic flow graph for effective bug localization. In: Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering, pp 579\u2013591","DOI":"10.1145\/3611643.3616338"},{"key":"7348_CR7","doi-asserted-by":"publisher","first-page":"4550","DOI":"10.1109\/TSE.2023.3305244","volume":"49","author":"M Fu","year":"2023","unstructured":"Fu M, Nguyen V, Tantithamthavorn CK, Le T, Phung D (2023) Vulexplainer: a transformer-based hierarchical distillation for explaining vulnerability types. IEEE Trans Softw Eng 49:4550","journal-title":"IEEE Trans Softw Eng"},{"key":"7348_CR8","doi-asserted-by":"crossref","unstructured":"Liu S, Wu B, Xie X, Meng G, Liu Y (2023) Contrabert: enhancing code pre-trained models via contrastive learning. In: 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE). IEEE, pp 2476\u20132487","DOI":"10.1109\/ICSE48619.2023.00207"},{"key":"7348_CR9","unstructured":"Shojaee P, Jain A, Tipirneni S, Reddy CK (2023) Execution-based code generation using deep reinforcement learning. Trans Mach Learn Res"},{"key":"7348_CR10","doi-asserted-by":"crossref","unstructured":"Wang W, Li G, Ma B, Xia X, Jin Z (2020) Detecting code clones with graph neural network and flow-augmented abstract syntax tree. In: 2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER). IEEE, pp 261\u2013271","DOI":"10.1109\/SANER48275.2020.9054857"},{"key":"7348_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2022.111557","volume":"197","author":"A Zhang","year":"2023","unstructured":"Zhang A, Fang L, Ge C, Li P, Liu Z (2023) Efficient transformer with code token learner for code clone detection. J Syst Softw 197:111557","journal-title":"J Syst Softw"},{"key":"7348_CR12","doi-asserted-by":"crossref","unstructured":"Nguyen AT, Nguyen TT, Nguyen TN (2015) Divide-and-conquer approach for multi-phase statistical migration for source code (t). In: 2015 30th IEEE\/ACM International Conference on Automated Software Engineering (ASE). IEEE, pp 585\u2013596","DOI":"10.1109\/ASE.2015.74"},{"issue":"4","key":"7348_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3340544","volume":"28","author":"M Tufano","year":"2019","unstructured":"Tufano M, Watson C, Bavota G, Penta MD, White M, Poshyvanyk D (2019) An empirical study on learning bug-fixing patches in the wild via neural machine translation. ACM Trans Softw Eng Methodol (TOSEM) 28(4):1\u201329","journal-title":"ACM Trans Softw Eng Methodol (TOSEM)"},{"key":"7348_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063","volume":"568","author":"J Su","year":"2024","unstructured":"Su J, Ahmed M, Lu Y, Pan S, Bo W, Liu Y (2024) Roformer: enhanced transformer with rotary position embedding. Neurocomputing 568:127063","journal-title":"Neurocomputing"},{"key":"7348_CR15","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin W, Shi Y, et al (2021) A robustly optimized bert pre-training approach with post-training. In: China National Conference on Chinese Computational Linguistics, pp 471\u2013484","DOI":"10.1007\/978-3-030-84186-7_31"},{"key":"7348_CR16","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang W, Joty S, et al (2021) Codet5: identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp 8696\u20138708","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"7348_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.127815","volume":"595","author":"B Peng","year":"2024","unstructured":"Peng B, Han K, Zhong L, Wu S, Zhang T (2024) A head-to-head attention with prompt text augmentation for text classification. Neurocomputing 595:127815","journal-title":"Neurocomputing"},{"issue":"3","key":"7348_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3636430","volume":"18","author":"S Tipirneni","year":"2024","unstructured":"Tipirneni S, Zhu M, Reddy CK (2024) Structcoder: structure-aware transformer for code generation. ACM Trans Knowl Discov Data 18(3):1\u201320","journal-title":"ACM Trans Knowl Discov Data"},{"key":"7348_CR19","doi-asserted-by":"crossref","unstructured":"Bairi R, Sonwane A, Kanade A, et al (2024) Codeplan: repository-level coding using llms and planning. In: Proceedings of the ACM on Software Engineering 1(FSE), pp 675\u2013698","DOI":"10.1145\/3643757"},{"key":"7348_CR20","doi-asserted-by":"crossref","unstructured":"Niu C, Li C, Ng V, et al (2022) Spt-code: Sequence-to-sequence pre-training for learning source code representations. In: Proceedings of the 44th International Conference on Software Engineering, pp 2006\u20132018","DOI":"10.1145\/3510003.3510096"},{"key":"7348_CR21","unstructured":"Kanade A, Maniatis P, Balakrishnan G, et al (2020) Learning and evaluating contextual embedding of source code. In: International Conference on Machine Learning, pp 5110\u20135121"},{"key":"7348_CR22","first-page":"1536","volume":"2020","author":"Z Feng","year":"2020","unstructured":"Feng Z, Guo D, Tang D et al (2020) Codebert: a pre-trained model for programming and natural languages. Find Assoc Comput Linguist EMNLP 2020:1536\u20131547","journal-title":"Find Assoc Comput Linguist EMNLP"},{"key":"7348_CR23","doi-asserted-by":"crossref","unstructured":"Svyatkovskiy A, Deng SK, Fu S, Sundaresan N (2020) Intellicode compose: code generation using transformer. In: Proceedings of the 28th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering, pp 1433\u20131443","DOI":"10.1145\/3368089.3417058"},{"key":"7348_CR24","doi-asserted-by":"crossref","unstructured":"Ahmad WU, Chakraborty S, Ray B, et al (2021) Unified pre-training for program understanding and generation. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp 2655\u20132668","DOI":"10.18653\/v1\/2021.naacl-main.211"},{"key":"7348_CR25","doi-asserted-by":"crossref","unstructured":"Zhang S, Ding Y, Hu E, Yu Y, Zhang Y (2024) Enhancing code representation learning for code search with abstract code semantics. In: 2024 International Joint Conference on Neural Networks (IJCNN). IEEE, pp 1\u20138","DOI":"10.1109\/IJCNN60899.2024.10650119"},{"key":"7348_CR26","doi-asserted-by":"crossref","unstructured":"Guo D, Lu S, Duan N, et al (2022) Unixcoder: Unified cross-modal pre-training for code representation. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics, vol 1. Long Papers, pp 7212\u20137225","DOI":"10.18653\/v1\/2022.acl-long.499"},{"key":"7348_CR27","doi-asserted-by":"crossref","unstructured":"Cho K, Van\u00a0Merri\u00ebnboer B, Gulcehre C, Bahdanau D, Bougares F, Schwenk H, Bengio Y (2014) Learning phrase representations using rnn encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078","DOI":"10.3115\/v1\/D14-1179"},{"key":"7348_CR28","first-page":"9343","volume":"34","author":"H Peng","year":"2021","unstructured":"Peng H, Li G, Wang W et al (2021) Integrating tree path in transformer for code representation. Adv Neural Inf Process Syst 34:9343\u20139354","journal-title":"Adv Neural Inf Process Syst"},{"key":"7348_CR29","doi-asserted-by":"crossref","unstructured":"Svajlenko J, Islam JF, Keivanloo I, Roy CK, Mia MM (2014) Towards a big data curated benchmark of inter-project code clones. In: 2014 IEEE International Conference on Software Maintenance and Evolution. IEEE, pp 476\u2013480","DOI":"10.1109\/ICSME.2014.77"},{"key":"7348_CR30","unstructured":"Chen X, Liu C, Song D (2018) Tree-to-tree neural networks for program translation. Adv Neural Inf Process Syst 31"},{"key":"7348_CR31","unstructured":"Husain H, Wu H-H, Gazit T, Allamanis M, Brockschmidt M (2019) Codesearchnet challenge: evaluating the state of semantic code search. arXiv e-prints, 1909"},{"key":"7348_CR32","first-page":"9343","volume":"34","author":"H Peng","year":"2021","unstructured":"Peng H, Li G, Wang W et al (2021) Integrating tree path in transformer for code representation. Adv Neural Inf Process Syst 34:9343\u20139354","journal-title":"Adv Neural Inf Process Syst"},{"key":"7348_CR33","doi-asserted-by":"crossref","unstructured":"Xia C-S, Wei Y, Zhang L (2023) Automated program repair in the era of large pre-trained language models. In: 2023 IEEE\/ACM 45th International Conference on Software Engineering (ICSE), pp 1482\u20131494","DOI":"10.1109\/ICSE48619.2023.00129"},{"key":"7348_CR34","doi-asserted-by":"crossref","unstructured":"Zhang J, Wang X, Zhang H, Sun H, Wang K, Liu X (2019) A novel neural source code representation based on abstract syntax tree. In: 2019 IEEE\/ACM 41st International Conference on Software Engineering (ICSE). IEEE, pp 783\u2013794","DOI":"10.1109\/ICSE.2019.00086"},{"key":"7348_CR35","doi-asserted-by":"crossref","unstructured":"Koehn P, Och FJ, Marcu D (2003) Statistical phrase-based translation. In: Proceedings of the 2003 Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics, pp 127\u2013133","DOI":"10.3115\/1073445.1073462"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07348-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07348-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07348-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T18:36:11Z","timestamp":1748111771000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07348-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,24]]},"references-count":35,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2025,6]]}},"alternative-id":["7348"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07348-x","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,24]]},"assertion":[{"value":"25 April 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 May 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors. Informed consent was obtained from all the individual participants included in the study. The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"882"}}