{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T05:22:42Z","timestamp":1768800162997,"version":"3.49.0"},"reference-count":97,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T00:00:00Z","timestamp":1696550400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T00:00:00Z","timestamp":1696550400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s10664-023-10378-9","type":"journal-article","created":{"date-parts":[[2023,10,6]],"date-time":"2023-10-06T02:03:56Z","timestamp":1696557836000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["CoCoAST: Representing Source Code via Hierarchical Splitting and Reconstruction of Abstract Syntax Trees"],"prefix":"10.1007","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5543-2025","authenticated-orcid":false,"given":"Ensheng","family":"Shi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanlin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lun","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongyu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shi","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongmei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongbin","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,6]]},"reference":[{"key":"10378_CR1","doi-asserted-by":"crossref","unstructured":"Ahmad WU, Chakraborty S, Ray B, Chang K (2020) A transformer-based approach for source code summarization. In: ACL","DOI":"10.18653\/v1\/2020.acl-main.449"},{"key":"10378_CR2","doi-asserted-by":"crossref","unstructured":"Ahmad WU, Chakraborty S, Ray B, Chang K (2021) Unified pre-training for program understanding and generation. In: NAACL-HLT, pp. 2655\u20132668. Association for Computational Linguistics","DOI":"10.18653\/v1\/2021.naacl-main.211"},{"key":"10378_CR3","doi-asserted-by":"crossref","unstructured":"Allamanis M, Barr ET, Bird C, Sutton CA (2015) Suggesting accurate method and class names. In: FSE","DOI":"10.1145\/2786805.2786849"},{"key":"10378_CR4","unstructured":"Allamanis M, Brockschmidt M, Khademi M (2018) Learning to represent programs with graphs. In: ICLR. OpenReview.net"},{"key":"10378_CR5","unstructured":"Allamanis M, Peng H, Sutton C (2016) A convolutional attention network for extreme summarization of source code. In: ICML, JMLR Workshop and Conference Proceedings, JMLR.org vol. 48, pp 2091\u20132100"},{"key":"10378_CR6","unstructured":"Alon U, Brody S, Levy O, Yahav E (2019a) code2seq: Generating sequences from structured representations of code. In: ICLR (Poster). OpenReview.net"},{"key":"10378_CR7","unstructured":"Alon U, Yahav E (2021) On the bottleneck of graph neural networks and its practical implications. In: ICLR. OpenReview.net"},{"key":"10378_CR8","doi-asserted-by":"crossref","unstructured":"Alon U, Zilberstein M, Levy O, Yahav E (2019b) code2vec: Learning distributed representations of code. In: POPL","DOI":"10.1145\/3290353"},{"key":"10378_CR9","unstructured":"Banerjee S, Lavie A (2005) METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In: IEEvaluation@ACL"},{"key":"10378_CR10","doi-asserted-by":"crossref","unstructured":"Bansal A, Haque S, McMillan C (2021) Project-level encoding for neural source code summarization of subroutines. In: ICPC, IEEE pp 253\u2013264","DOI":"10.1109\/ICPC52881.2021.00032"},{"key":"10378_CR11","unstructured":"Bengio Y, Frasconi P, Simard PY (1993) The problem of learning long-term dependencies in recurrent networks. In: ICNN"},{"key":"10378_CR12","doi-asserted-by":"crossref","unstructured":"Cho K, van Merrienboer B, G\u00fcl\u00e7ehre \u00c7 , Bahdanau D, Bougares F, Schwenk H, Bengio Y (2014) Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: EMNLP, ACL pp 1724\u20131734","DOI":"10.3115\/v1\/D14-1179"},{"key":"10378_CR13","doi-asserted-by":"crossref","unstructured":"Du L, Shi X, Wang Y, Shi E, Han S, Zhang D (2021) Is a single model enough? mucos: A multi-model ensemble learning approach for semantic code search. In: CIKM, ACM pp 2994\u20132998","DOI":"10.1145\/3459637.3482127"},{"key":"10378_CR14","doi-asserted-by":"crossref","unstructured":"Eddy BP, Robinson JA, Kraft NA, Carver JC (2013) Evaluating source code summarization techniques: Replication and expansion. In: ICPC, IEEE Computer Society pp 13\u201322","DOI":"10.1109\/ICPC.2013.6613829"},{"key":"10378_CR15","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, Duan N, Feng X, Gong M, Shou L, Qin B, Liu T, Jiang D, Zhou M (2020) Codebert: A pre-trained model for programming and natural languages. In: EMNLP (Findings)","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"10378_CR16","unstructured":"Fernandes P, Allamanis M, Brockschmidt M (2019) Structured neural summarization. In: ICLR"},{"key":"10378_CR17","unstructured":"Fout A, Byrd J, Shariat B, Ben-Hur A (2017) Protein interface prediction using graph convolutional networks. In: NIPS, pp 6530\u20136539"},{"key":"10378_CR18","doi-asserted-by":"crossref","unstructured":"Franks C, Tu Z, Devanbu PT, Hellendoorn V (2015) CACHECA: A cache language model based code suggestion tool. In: ICSE, IEEE Computer Society (2), pp 705\u2013708","DOI":"10.1109\/ICSE.2015.228"},{"key":"10378_CR19","unstructured":"Gao S, Gao C, He Y, Zeng J, Nie LY, Xia X (2021) Code structure guided transformer for source code summarization. arXiv:2104.09340"},{"key":"10378_CR20","first-page":"3419","volume":"119","author":"VK Garg","year":"2020","unstructured":"Garg VK, Jegelka S, Jaakkola TS (2020) Generalization and representational limits of graph neural networks. ICML, Proceedings of Machine Learning Research, PMLR 119:3419\u20133430","journal-title":"ICML, Proceedings of Machine Learning Research, PMLR"},{"key":"10378_CR21","doi-asserted-by":"crossref","unstructured":"Gros D, Sezhiyan H, Devanbu P, Yu Z (2020) Code to comment \u201ctranslation\u201d: Data, metrics, baselining & evaluation. In: ASE","DOI":"10.1145\/3324884.3416546"},{"key":"10378_CR22","doi-asserted-by":"publisher","unstructured":"Gu J, Chen Z, Monperrus M (2021) Multimodal representation for neural code search. In: IEEE International Conference on Software Maintenance and Evolution, ICSME 2021, Luxembourg, 2021, pp 483\u2013494. IEEE. September 27 - October 1 https:\/\/doi.org\/10.1109\/ICSME52107.2021.00049","DOI":"10.1109\/ICSME52107.2021.00049"},{"key":"10378_CR23","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1016\/j.neunet.2021.04.019","volume":"141","author":"W Gu","year":"2021","unstructured":"Gu W, Li Z, Gao C, Wang C, Zhang H, Xu Z, Lyu MR (2021) Cradle: Deep code retrieval based on semantic dependency learning. Neural Networks 141:385\u2013394","journal-title":"Neural Networks"},{"key":"10378_CR24","doi-asserted-by":"crossref","unstructured":"Gu X, Zhang H, Kim S (2018) Deep code search. In: ICSE, ACM pp 933\u2013944","DOI":"10.1145\/3180155.3180167"},{"key":"10378_CR25","unstructured":"Guo D, Ren S, Lu S, Feng Z, Tang D, Liu S, Zhou L, Duan N, Svyatkovskiy A, Fu S, Tufano M, Deng SK, lement CB, Drain D, Sundaresan N, Yin J, Jiang D, Zhou M (2021) Graphcodebert: Pre-training code representations with data flow. In: 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, OpenReview.net. 3-7 May 2021. https:\/\/openreview.net\/forum?id=jLoC4ez43PZ"},{"key":"10378_CR26","doi-asserted-by":"crossref","unstructured":"Haiduc S, Aponte J, Moreno L, Marcus A (2010) On the use of automated text summarization techniques for summarizing source code. In:WCRE, IEEE Computer Society pp 35\u201344","DOI":"10.1109\/WCRE.2010.13"},{"key":"10378_CR27","unstructured":"Haije T (2016) Automatic comment generation using a neural translation model. Bachelor\u2019s thesis, University of Amsterdam"},{"key":"10378_CR28","doi-asserted-by":"publisher","unstructured":"Haldar R, Wu L, Xiong J, Hockenmaier J (2020) A multi-perspective architecture for semantic code search. In: Jurafsky D, Chai J, Schluter N, Tetreault JR (Eds.) Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, ACL 2020, Online, Association for Computational Linguistics pp 8563\u20138568 5-10 July 2020. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.758","DOI":"10.18653\/v1\/2020.acl-main.758"},{"key":"10378_CR29","doi-asserted-by":"crossref","unstructured":"Haque S, LeClair A, Wu L, McMillan C (2020) Improved automatic summarization of subroutines via attention to file context. In: MSR","DOI":"10.1145\/3379597.3387449"},{"key":"10378_CR30","doi-asserted-by":"publisher","unstructured":"He K, Fan H, Wu Y, Xie S, Girshick RB (2020) Momentum contrast for unsupervised visual representation learning. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, Computer Vision Foundation \/ IEEE. pp 9726\u20139735. 13-19 June 2020 https:\/\/doi.org\/10.1109\/CVPR42600.2020.00975","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"10378_CR31","unstructured":"Hellendoorn VJ, Sutton C, Singh R, Maniatis P, Bieber D (2020) Global relational models of source code. In: ICLR. OpenReview.net"},{"key":"10378_CR32","doi-asserted-by":"crossref","unstructured":"Hu X, Li G, Xia X, Lo D, Jin Z (2018) Deep code comment generation. In: ICPC","DOI":"10.1145\/3196321.3196334"},{"key":"10378_CR33","doi-asserted-by":"crossref","unstructured":"Hu X, Li G, Xia X, Lo D, Jin Z (2019) Deep code comment generation with hybrid lexical and syntactical information. Empirical Software Engineering","DOI":"10.1145\/3196321.3196334"},{"key":"10378_CR34","doi-asserted-by":"crossref","unstructured":"Hu X, Li G, Xia X, Lo D, Jin Z (2018) Summarizing source code with transferred api knowledge. In: IJCAI","DOI":"10.24963\/ijcai.2018\/314"},{"key":"10378_CR35","doi-asserted-by":"crossref","unstructured":"Huang J, Tang D, Shou L, Gong M, Xu K, Jiang D, Zhou M, Duan N (2021) Cosqa: 20, 000+ web queries for code search and question answering. In: ACL","DOI":"10.18653\/v1\/2021.acl-long.442"},{"key":"10378_CR36","unstructured":"Husain H, Wu H, Gazit T, Allamanis M, Brockschmidt M (2019) Codesearchnet challenge: Evaluating the state of semantic code search. CoRR abs\/1909.09436. arXiv:1909.09436"},{"key":"10378_CR37","doi-asserted-by":"crossref","unstructured":"Iyer S, Konstas I, Cheung A, Zettlemoyer L (2016) Summarizing source code using a neural attention model. In: ACL","DOI":"10.18653\/v1\/P16-1195"},{"key":"10378_CR38","doi-asserted-by":"crossref","unstructured":"Iyyer M, Manjunatha V, Boyd-Graber JL, III HD (2015) Deep unordered composition rivals syntactic methods for text classification. In: ACL (1), The Association for Computer Linguistics pp 1681\u20131691","DOI":"10.3115\/v1\/P15-1162"},{"key":"10378_CR39","doi-asserted-by":"crossref","unstructured":"Jain P, Jain A, Zhang T, Abbeel P, Gonzalez J, Stoica I (2021) Contrastive code representation learning. In: EMNLP, Association for Computational Linguistics (1), pp 5954\u20135971","DOI":"10.18653\/v1\/2021.emnlp-main.482"},{"key":"10378_CR40","first-page":"54","volume":"161","author":"X Jiang","year":"2021","unstructured":"Jiang X, Zheng Z, Lyu C, Li L, Lyu L (2021) Treebert: A tree-based pre-trained model for programming language. UAI, Proceedings of Machine Learning Research, AUAI Press 161:54\u201363","journal-title":"UAI, Proceedings of Machine Learning Research, AUAI Press"},{"key":"10378_CR41","unstructured":"Kanade A, Maniatis P, Balakrishnan G, Shi K (2020) Pre-trained contextual embedding of source code. arXiv:2001.00059"},{"key":"10378_CR42","doi-asserted-by":"crossref","unstructured":"Kim Y (2014) Convolutional neural networks for sentence classification. In: EMNLP, ACL pp 1746\u20131751","DOI":"10.3115\/v1\/D14-1181"},{"key":"10378_CR43","doi-asserted-by":"crossref","unstructured":"LeClair A, Bansal A, McMillan C (2021) Ensemble models for neural source code summarization of subroutines In: ICSME, IEEE pp 286\u2013297","DOI":"10.26226\/morressier.613b5418842293c031b5b62e"},{"key":"10378_CR44","doi-asserted-by":"crossref","unstructured":"LeClair A, Haque S, Wu L, McMillan C (2020) Improved code summarization via a graph neural network. In: ICPC, ACM pp 18\u2013195","DOI":"10.1145\/3387904.3389268"},{"key":"10378_CR45","doi-asserted-by":"crossref","unstructured":"LeClair A, Jiang S, McMillan C (2019) A neural model for generating natural language summaries of program subroutines. In: ICSE","DOI":"10.1109\/ICSE.2019.00087"},{"key":"10378_CR46","doi-asserted-by":"crossref","unstructured":"Li W, Qin H, Yan S, Shen B, Chen Y (2020) Learning code-query interaction for enhancing code searches. In: ICSME, IEEE pp 115\u2013126","DOI":"10.1109\/ICSME46990.2020.00021"},{"key":"10378_CR47","doi-asserted-by":"crossref","unstructured":"Libovick\u00fd J, Helcl J, Mare\u010dek D (2018) Input combination strategies for multi-source transformer decoder. In: WMT","DOI":"10.18653\/v1\/W18-6326"},{"key":"10378_CR48","unstructured":"Lin C (2004) ROUGE: A package for automatic evaluation of summaries. In: ACL"},{"key":"10378_CR49","doi-asserted-by":"crossref","unstructured":"Lin C, Och FJ (2004) Automatic evaluation of machine translation quality using longest common subsequence and skip-bigram statistics. In: ACL pp 605\u2013612","DOI":"10.3115\/1218955.1219032"},{"key":"10378_CR50","doi-asserted-by":"publisher","unstructured":"Ling C, Lin Z, Zou Y, Xie B (2020) Adaptive deep code search. In: ICPC \u201920: 28th International Conference on Program Comprehension, Seoul, Republic of Korea, ACM pp 48\u201359 13-15 July 2020. https:\/\/doi.org\/10.1145\/3387904.3389278","DOI":"10.1145\/3387904.3389278"},{"key":"10378_CR51","doi-asserted-by":"publisher","unstructured":"Ling X, Wu L, Wang S, Pan G, Ma T, Xu F, Liu AX, Wu C, Ji S (2021) Deep graph matching and searching for semantic code retrieval. ACM Trans Knowl Discov Data 15(5): 88:1\u201388:21. https:\/\/doi.org\/10.1145\/3447571","DOI":"10.1145\/3447571"},{"issue":"2","key":"10378_CR52","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1007\/s10618-008-0118-x","volume":"18","author":"E Linstead","year":"2009","unstructured":"Linstead E, Bajracharya SK, Ngo TC, Rigor P, Lopes CV, Baldi P (2009) Sourcerer: mining and searching internet-scale software repositories. Data Min Knowl Discov 18(2):300\u2013336","journal-title":"Data Min Knowl Discov"},{"key":"10378_CR53","doi-asserted-by":"crossref","unstructured":"Liu F, Li G, Zhao Y, Jin Z (2020) Multi-task learning based pre-trained language model for code completion. In: ASE, IEEE pp 473\u2013485","DOI":"10.1145\/3324884.3416591"},{"key":"10378_CR54","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis M, Zettlemoyer L, Stoyanov V (2019) Roberta: A robustly optimized BERT pretraining approach. arXiv:1907.11692"},{"key":"10378_CR55","unstructured":"Loshchilov I, Hutter F (2019) Decoupled weight decay regularization. In: ICLR"},{"key":"10378_CR56","unstructured":"Lu M, Sun X, Wang S, Lo D, Duan Y (2015) Query expansion via wordnet for effective code search. In: SANER, IEEE Computer Society pp 545\u2013549"},{"key":"10378_CR57","doi-asserted-by":"crossref","unstructured":"Lv F, Zhang H, Lou J, Wang S, Zhang D, Zhao J (2015) Codehow: Effective code search based on API understanding and extended boolean model (E). In: ASE, IEEE Computer Society pp 260\u2013270","DOI":"10.1109\/ASE.2015.42"},{"key":"10378_CR58","doi-asserted-by":"crossref","unstructured":"McMillan C, Grechanik M, Poshyvanyk D, Xie Q, Fu C (2011) Portfolio: finding relevant functions and their usage. In: ICSE, ACM pp 111\u2013120","DOI":"10.1145\/1985793.1985809"},{"key":"10378_CR59","doi-asserted-by":"crossref","unstructured":"Mou L, Li G, Zhang L, Wang T, Jin Z (2016) Convolutional neural networks over tree structures for programming language processing. In: AAAI","DOI":"10.1609\/aaai.v30i1.10139"},{"key":"10378_CR60","unstructured":"Oord Avd, Li Y, Vinyals O (2018) Representation learning with contrastive predictive coding"},{"key":"10378_CR61","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu W (2002) Bleu: A method for automatic evaluation of machine translation. In: ACL, pp 311\u2013318","DOI":"10.3115\/1073083.1073135"},{"key":"10378_CR62","unstructured":"Parr T (2013) The definitive ANTLR 4 reference (2 ed.). Pragmatic Bookshelf"},{"key":"10378_CR63","doi-asserted-by":"crossref","unstructured":"Rodeghero P, McMillan C, McBurney PW, Bosch N, D\u2019Mello SK (2014) Improving automated source code summarization via an eye-tracking study of programmers. In: ICSE, ACM pp 390\u2013401","DOI":"10.1145\/2568225.2568247"},{"key":"10378_CR64","doi-asserted-by":"crossref","unstructured":"Sajnani H, Saini V, Svajlenko J, Roy CK, Lopes CV (2016) Sourcerercc: Scaling code clone detection to big-code. In: ICSE, ACM pp 1157\u20131168","DOI":"10.1145\/2884781.2884877"},{"key":"10378_CR65","doi-asserted-by":"crossref","unstructured":"See A, Liu PJ, Manning CD (2017) Get to the point: Summarization with pointergenerator networks. In: ACL","DOI":"10.18653\/v1\/P17-1099"},{"key":"10378_CR66","doi-asserted-by":"publisher","unstructured":"Shi E, Gu W, Wang Y, Du L, Zhang H, Han S, Zhang D, Sun H (2022) Enhancing semantic code search with multimodal contrastive learning and soft data augmentation. https:\/\/doi.org\/10.48550\/arXiv.2204.03293","DOI":"10.48550\/arXiv.2204.03293"},{"key":"10378_CR67","doi-asserted-by":"crossref","unstructured":"Shi E, Wang Y, Du L, Chen J, Han S, Zhang H, Zhang D, Sun H (2022) On the evaluation of neural code summarization","DOI":"10.1145\/3510003.3510060"},{"key":"10378_CR68","doi-asserted-by":"crossref","unstructured":"Shi E, Wang Y, Du L, Zhang H, Han S, Zhang D, Sun H (2021) CAST: Enhancing code summarization with hierarchical splitting and reconstruction of abstract syntax trees. In: EMNLP (1), Association for Computational Linguistics pp 4053\u20134062","DOI":"10.18653\/v1\/2021.emnlp-main.332"},{"key":"10378_CR69","doi-asserted-by":"crossref","unstructured":"Shi L, Mu F, Chen X, Wang S, Wang J, Yang Y, Li G, Xia X, Wang Q (2022) Are we building on the rock? on the importance of data preprocessing for code summarization. In: ESEC\/SIGSOFT FSE, ACM pp 107\u2013119","DOI":"10.1145\/3540250.3549145"},{"key":"10378_CR70","unstructured":"Shin ECR, Allamanis M, Brockschmidt M, Polozov A (2019) Program synthesis and semantic parsing with learned code idioms. In: NeurIPS, pp 10824\u201310834"},{"key":"10378_CR71","doi-asserted-by":"publisher","unstructured":"Shuai J, Xu L, Liu C, Yan M, Xia X, Lei Y (2020) Improving code search with coattentive representation learning. In: ICPC \u201920: 28th International Conference on Program Comprehension, Seoul, Republic of Korea, ACM pp 196\u2013207 July 13-15, 2020. https:\/\/doi.org\/10.1145\/3387904.3389269","DOI":"10.1145\/3387904.3389269"},{"key":"10378_CR72","doi-asserted-by":"crossref","unstructured":"Sridhara G, Hill E, Muppaneni D, Pollock LL, Vijay-Shanker K (2010) Towards automatically generating summary comments for java methods. In: ASE, pp 43\u201352","DOI":"10.1145\/1858996.1859006"},{"key":"10378_CR73","doi-asserted-by":"crossref","unstructured":"Sun Z, Li L, Liu Y, Du X, Li L (2022) On the importance of building high-quality training datasets for neural code search. In: ICSE, ACM pp 1609\u20131620","DOI":"10.1145\/3510003.3510160"},{"key":"10378_CR74","doi-asserted-by":"crossref","unstructured":"Svyatkovskiy A, Deng SK, Fu S, Sundaresan N (2020) Intellicode compose: code generation using transformer. In: ESEC\/SIGSOFT FSE, ACM pp 1433\u20131443","DOI":"10.1145\/3368089.3417058"},{"key":"10378_CR75","doi-asserted-by":"crossref","unstructured":"Tai KS, Socher R, Manning CD (2015) Improved semantic representations from treestructured long short-term memory networks. In: ACL (1), The Association for Computer Linguistics pp 1556\u20131566","DOI":"10.3115\/v1\/P15-1150"},{"key":"10378_CR76","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: NIPS, pp 5998\u20136008"},{"key":"10378_CR77","doi-asserted-by":"crossref","unstructured":"Vedantam R, Zitnick CL, Parikh D (2015) Cider: Consensus-based image description evaluation. In: CVPR","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"10378_CR78","doi-asserted-by":"crossref","unstructured":"Wan Y, Shu J, SuiY, Xu G, Zhao Z, Wu J, Yu PS (2019) Multi-modal attention network learning for semantic source code retrieval. In: ASE, IEEE pp 13\u201325","DOI":"10.1109\/ASE.2019.00012"},{"key":"10378_CR79","doi-asserted-by":"crossref","unstructured":"Wan Y, Zhao Z, Yang M, Xu G, Ying H, Wu J, Yu PS (2018) Improving automatic source code summarization via deep reinforcement learning. In: ASE","DOI":"10.1145\/3238147.3238206"},{"key":"10378_CR80","unstructured":"Wang X, Wang Y, Mi F, Zhou P, Wan Y, Liu X, Li L, Wu H, Liu J, Jiang X (2021) Syncobert: Syntax-guided multi-modal contrastive pre-training for code representation. arXiv:2108.04556"},{"key":"10378_CR81","unstructured":"Wang Y, Du L, Shi E, Hu Y, Han S, Zhang D (2020) Cocogum: Contextual code summarization with multi-relational gnn on umls. Tech rep, Microsoft, MSR-TR-2020-16"},{"key":"10378_CR82","doi-asserted-by":"crossref","unstructured":"Wang Y, Li H (2021) Code completion by modeling flattened abstract syntax trees as graphs. In: AAAI","DOI":"10.1609\/aaai.v35i16.17650"},{"key":"10378_CR83","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang W, Joty SR, Hoi SCH (2021) Codet5: Identifier-aware unified pretrained encoder-decoder models for code understanding and generation. In: EMNLP (1), Association for Computational Linguistics pp 8696\u20138708","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"10378_CR84","unstructured":"Wei B, Li G, Xia X, Fu Z, Jin Z (2019) Code generation as a dual task of code summarization. In: NeurIPS, pp 6559\u20136569"},{"key":"10378_CR85","doi-asserted-by":"crossref","unstructured":"Wei B, Li Y, Li G, Xia X, Jin Z (2020) Retrieve and refine: Exemplar-based neural comment generation. In: ASE, IEEE pp 349\u2013360","DOI":"10.1145\/3324884.3416578"},{"key":"10378_CR86","doi-asserted-by":"crossref","unstructured":"White M, Tufano M, Vendome C, Poshyvanyk D (2016) Deep learning code fragments for code clone detection. In: ASE, ACM pp 87\u201398","DOI":"10.1145\/2970276.2970326"},{"key":"10378_CR87","first-page":"171","volume":"1","author":"F Wilcoxon","year":"1970","unstructured":"Wilcoxon F, Katti S, Wilcox RA (1970) Critical values and probability levels for the wilcoxon rank sum test and the wilcoxon signed rank test. Selected tables in mathematical statistics 1:171\u2013259","journal-title":"Selected tables in mathematical statistics"},{"key":"10378_CR88","doi-asserted-by":"crossref","unstructured":"Wu H, Zhao H, Zhang M (2021) Code summarization with structure-induced transformer. In: ACL\/IJCNLP (Findings), Findings of ACL, vol. ACL\/IJCNLP 2021, Association for Computational Linguistics pp 1078\u20131090","DOI":"10.18653\/v1\/2021.findings-acl.93"},{"key":"10378_CR89","doi-asserted-by":"crossref","unstructured":"Wu Y, Lian D, Xu Y, Wu L, Chen E (2020) Graph convolutional networks with markov random field reasoning for social spammer detection. In: AAAI, AAAI Press pp 1054\u20131061","DOI":"10.1609\/aaai.v34i01.5455"},{"key":"10378_CR90","doi-asserted-by":"crossref","unstructured":"Wu Z, Xiong Y, Yu SX, Lin D (2018) Unsupervised feature learning via non-parametric instance discrimination. In: CVPR, Computer Vision Foundation \/IEEE Computer Society pp 3733\u20133742","DOI":"10.1109\/CVPR.2018.00393"},{"key":"10378_CR91","doi-asserted-by":"crossref","unstructured":"Yang M, Zhou M, Li Z, Liu J, Pan L, Xiong H, King I (2022) Hyperbolic graph neural networks: A review of methods and applications. arXiv:2202.13852","DOI":"10.1145\/3580305.3599562"},{"key":"10378_CR92","doi-asserted-by":"publisher","unstructured":"Ye W, Xie R, Zhang J, Hu T, Wang X, Zhang S (2020) Leveraging code generation to improve code retrieval and summarization via dual learning. In: Huang Y, King I, Liu T, van Steen M (Eds)WWW\u201920: TheWeb Conference 2020, Taipei, Taiwan, ACM \/ IW3C2 pp 2309\u20132319. 20-24 April 2020. https:\/\/doi.org\/10.1145\/3366423.3380295","DOI":"10.1145\/3366423.3380295"},{"key":"10378_CR93","doi-asserted-by":"crossref","unstructured":"Yu X, Huang Q, Wang Z, Feng Y, Zhao D (2020) Towards context-aware code comment generation. In: EMNLP (Findings), Association for Computational Linguistics pp 3938\u20133947","DOI":"10.18653\/v1\/2020.findings-emnlp.350"},{"key":"10378_CR94","unstructured":"Zhang J, Panthaplackel S, Nie P, Mooney RJ, Li JJ, Gligoric M (2021) Learning to generate code comments from class hierarchies. arXiv:2103.13426"},{"key":"10378_CR95","doi-asserted-by":"crossref","unstructured":"Zhang J, Wang X, Zhang H, Sun H, Liu X (2020) Retrieval-based neural source code summarization. In: ICSE","DOI":"10.1145\/3377811.3380383"},{"key":"10378_CR96","doi-asserted-by":"crossref","unstructured":"Zhang J, Wang X, Zhang H, Sun H, Wang K, Liu X (2019) A novel neural source code representation based on abstract syntax tree. In: ICSE","DOI":"10.1109\/ICSE.2019.00086"},{"key":"10378_CR97","doi-asserted-by":"publisher","unstructured":"Zhu Q, Sun Z, Liang X, Xiong Y, Zhang L (2020) Ocor: An overlapping-aware code retriever. In: 35th IEEE\/ACM International Conference on Automated Software Engineering, ASE 2020, Melbourne, Australia, IEEE pp 883\u2013894 21-25 Sept 2020. https:\/\/doi.org\/10.1145\/3324884.3416530","DOI":"10.1145\/3324884.3416530"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-023-10378-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-023-10378-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-023-10378-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,29]],"date-time":"2023-11-29T12:10:51Z","timestamp":1701259851000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-023-10378-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,6]]},"references-count":97,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["10378"],"URL":"https:\/\/doi.org\/10.1007\/s10664-023-10378-9","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,6]]},"assertion":[{"value":"1 August 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 October 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"135"}}