{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T10:07:36Z","timestamp":1768385256431,"version":"3.49.0"},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T00:00:00Z","timestamp":1764892800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T00:00:00Z","timestamp":1764892800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Youth Project of Fundamental Research Program (Free Exploration Category) of Shanxi Province","award":["202403021212168"],"award-info":[{"award-number":["202403021212168"]}]},{"name":"Youth Project of Fundamental Research Program (Free Exploration Category) of Shanxi Province","award":["202303021222098"],"award-info":[{"award-number":["202303021222098"]}]},{"DOI":"10.13039\/501100004735","name":"Natural Science Foundation of Hunan Province","doi-asserted-by":"publisher","award":["2022JJ40527"],"award-info":[{"award-number":["2022JJ40527"]}],"id":[{"id":"10.13039\/501100004735","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014472","name":"Scientific Research Foundation of Hunan Provincial Education Department","doi-asserted-by":"publisher","award":["21B0760"],"award-info":[{"award-number":["21B0760"]}],"id":[{"id":"10.13039\/100014472","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10664-025-10762-7","type":"journal-article","created":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T06:52:28Z","timestamp":1764917548000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Repository-level code completion with adaptive segmentation and fused retrieval"],"prefix":"10.1007","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-1832-1168","authenticated-orcid":false,"given":"Yuanyuan","family":"Shen","sequence":"first","affiliation":[]},{"given":"Pinle","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Kaiyi","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Fuwei","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Fan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Guiji","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,5]]},"reference":[{"key":"10762_CR1","doi-asserted-by":"publisher","unstructured":"Allal LB, Li R, Kocetkov D, Mou C, Akiki C, Ferrandis CM, Muennighoff N, Mishra M, Gu A, Dey M et al (2023) SantaCoder: don\u2019t reach for the stars!. https:\/\/doi.org\/10.48550\/arXiv.2301.03988","DOI":"10.48550\/arXiv.2301.03988"},{"key":"10762_CR2","doi-asserted-by":"publisher","unstructured":"Athiwaratkun B, Gouda SK, Wang Z, Li X, Tian Y, Tan M, Ahmad WU, Wang S, Sun Q, Shang M et al (2023) Multi-lingual evaluation of code generation models. https:\/\/doi.org\/10.48550\/arXiv.2210.14868","DOI":"10.48550\/arXiv.2210.14868"},{"key":"10762_CR3","unstructured":"Biderman S, Schoelkopf H, Anthony QG, Bradley H, O\u2019Brien K, Hallahan E, Khan MA, Purohit S, Prashanth US, Raff E et al (2023) Pythia: a suite for analyzing large language models across training and scaling. In: International Conference on Machine Learning (ICML 2023), pp 2397\u20132430. https:\/\/proceedings.mlr.press\/v202\/biderman23a.html"},{"key":"10762_CR4","doi-asserted-by":"publisher","unstructured":"B Y, N Z, SP L, X X (2020) Survey of intelligent code completion. J Softw (JOS) 31(5):1435\u20131453. https:\/\/doi.org\/10.13328\/j.cnki.jos.005966","DOI":"10.13328\/j.cnki.jos.005966"},{"issue":"ISSTA","key":"10762_CR5","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1145\/3728922","volume":"2","author":"I Bouzenia","year":"2025","unstructured":"Bouzenia I, Pradel M (2025) You name it, i run it: an llm agent to execute tests of arbitrary projects. Proc ACM Softw Eng 2(ISSTA):1054\u20131076. https:\/\/doi.org\/10.1145\/3728922","journal-title":"Proc ACM Softw Eng"},{"key":"10762_CR6","doi-asserted-by":"publisher","unstructured":"Bouzenia I, Krishan BP, Pradel M (2024) Dypybench: a benchmark of executable python software. ACM Int Conf Foundations Softw Eng (FSE) 1(FSE). https:\/\/doi.org\/10.1145\/3643742","DOI":"10.1145\/3643742"},{"key":"10762_CR7","doi-asserted-by":"publisher","first-page":"1877","DOI":"10.5555\/3495724.3495883","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A et al (2020) Language models are few-shot learners. Adv Neural Inf Process Syst (NeurIPS 2020) 33:1877\u20131901. https:\/\/doi.org\/10.5555\/3495724.3495883","journal-title":"Adv Neural Inf Process Syst (NeurIPS 2020)"},{"key":"10762_CR8","unstructured":"Carlini N, Tramer F, Wallace E, Jagielski M, Herbert-Voss A, Lee K, Roberts A, Brown T, Song D, Erlingsson U et al (2021) Extracting training data from large language models. In: 30th USENIX Security Symposium (USENIX Security 21), pp 2633\u20132650. https:\/\/www.usenix.org\/conference\/usenixsecurity21\/presentation\/carlini-extracting"},{"issue":"1s","key":"10762_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3539225","volume":"19","author":"J Chen","year":"2023","unstructured":"Chen J, Pan Y, Li Y, Yao T, Chao H, Mei T (2023) Retrieval augmented convolutional encoder-decoder networks for video captioning. ACM Trans Multimed Comput Commun Appl (TOMCCAP) 19(1s):1\u201324. https:\/\/doi.org\/10.1145\/3539225","journal-title":"ACM Trans Multimed Comput Commun Appl (TOMCCAP)"},{"key":"10762_CR10","doi-asserted-by":"publisher","unstructured":"Chen J, Hu X, Li Z, Gao C, Xia X, Lo D (2024a) Code search is all you need? improving code suggestions with code search. In: 2024 IEEE\/ACM 46th International Conference on Software Engineering (ICSE), pp 880\u2013892. https:\/\/doi.org\/10.1145\/3597503.3639085","DOI":"10.1145\/3597503.3639085"},{"key":"10762_CR11","doi-asserted-by":"publisher","unstructured":"Chen M, Tian H, Liu Z, Ren X, Sun J (2024b) JumpCoder: go beyond autoregressive coder via online modification. In: Proceedings of the 62nd annual meeting of the association for computational linguistics (ACL, Volume 1: Long Papers), pp 11500\u201311520. https:\/\/doi.org\/10.18653\/v1\/2024.acl-long.619","DOI":"10.18653\/v1\/2024.acl-long.619"},{"key":"10762_CR12","doi-asserted-by":"publisher","unstructured":"Chen M, Tworek J, Jun H, Yuan Q, Pinto HPdO, Kaplan J, Edwards H, Burda Y, Joseph N, Brockman G et al (2021) Evaluating large language models trained on code. arXiv:2107.03374. https:\/\/doi.org\/10.48550\/arXiv.2107.03374","DOI":"10.48550\/arXiv.2107.03374"},{"issue":"240","key":"10762_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.5555\/3648699.3648939","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery A, Narang S, Devlin J, Bosma M, Mishra G, Roberts A, Barham P, Chung HW, Sutton C, Gehrmann S et al (2023) Palm: scaling language modeling with pathways. J Mach Learn Res 24(240):1\u2013113. https:\/\/doi.org\/10.5555\/3648699.3648939","journal-title":"J Mach Learn Res"},{"key":"10762_CR14","doi-asserted-by":"publisher","unstructured":"Christopoulou F, Lampouras G, Gritta M, Zhang G, Guo Y, Li Z, Zhang Q, Xiao M, Shen B, Li L et al (2022) Pangu-coder: program synthesis with function-level language modeling. arXiv:2207.11280. https:\/\/doi.org\/10.48550\/arXiv.2207.11280","DOI":"10.48550\/arXiv.2207.11280"},{"key":"10762_CR15","doi-asserted-by":"publisher","unstructured":"Di P, Li J, Yu H, Jiang W, Cai W, Cao Y, Chen C, Chen D, Chen H, Chen L et al (2024) Codefuse-13b: a pretrained multi-lingual code large language model. In: Proceedings of the 46th International Conference on Software Engineering: Software Engineering in Practice (ICSE-SEIP 2024), pp 418\u2013429. https:\/\/doi.org\/10.1145\/3639477.3639719","DOI":"10.1145\/3639477.3639719"},{"key":"10762_CR16","unstructured":"Ding Y, Wang Z, Ahmad WU, Ding H, Tan M, Jain N, Ramanathan MK, Nallapati R, Bhatia P, Roth D, Xiang B (2023) Crosscodeeval: a diverse and multilingual benchmark for cross-file code completion. In: Thirty-seventh conference on neural information processing systems datasets and benchmarks track (NeurIPS 2023). https:\/\/openreview.net\/forum?id=wgDcbBMSfh"},{"key":"10762_CR17","doi-asserted-by":"publisher","unstructured":"Ding Y, Wang Z, Ahmad WU, Ramanathan MK, Nallapati R, Bhatia P, Roth D, Xiang B (2021) Leveraging passage retrieval with generative models for open domain question answering. In: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2021), pp 874\u2013880. https:\/\/doi.org\/10.18653\/v1\/2021.eacl-main.74","DOI":"10.18653\/v1\/2021.eacl-main.74"},{"key":"10762_CR18","unstructured":"Ding Y, Wang Z, Ahmad WU, Ramanathan MK, Nallapati R, Bhatia P, Roth D, Xiang B (2024) Cocomic: code completion by jointly modeling in-file and cross-file context. In: Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pp 3433\u20133445. https:\/\/aclanthology.org\/2024.lrec-main.305\/"},{"key":"10762_CR19","doi-asserted-by":"publisher","unstructured":"Eghbali A, Pradel M (2024) De-Hallucinator: mitigating LLM hallucinations in code generation tasks via iterative grounding. https:\/\/doi.org\/10.48550\/arXiv.2401.01701","DOI":"10.48550\/arXiv.2401.01701"},{"key":"10762_CR20","doi-asserted-by":"publisher","unstructured":"Guo D, Lu S, Duan N, Wang Y, Zhou M, Yin J (2022) Unixcoder: unified cross-modal pre-training for code representation. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (ACL 2022), pp 7212\u20137225. https:\/\/doi.org\/10.18653\/v1\/2022.acl-long.499","DOI":"10.18653\/v1\/2022.acl-long.499"},{"key":"10762_CR21","doi-asserted-by":"publisher","unstructured":"Guo D, Zhu Q, Yang D, Xie Z, Dong K, Zhang W, Chen G, Bi X, Wu Y, Li Y et al (2024) Deepseek-coder: when the large language model meets programming\u2013the rise of code intelligence. arXiv:2401.14196. https:\/\/doi.org\/10.48550\/arXiv.2401.14196","DOI":"10.48550\/arXiv.2401.14196"},{"key":"10762_CR22","doi-asserted-by":"publisher","unstructured":"Hendrycks D, Basart S, Kadavath S, Mazeika M, Arora A, Guo E, Burns C, Puranik S, He H, Song D, Steinhardt J (2021) Measuring coding challenge competence with APPS. https:\/\/doi.org\/10.48550\/arXiv.2105.09938","DOI":"10.48550\/arXiv.2105.09938"},{"key":"10762_CR23","doi-asserted-by":"publisher","unstructured":"Huang Y, Huang J (2024) A survey on retrieval-augmented text generation for large language models. https:\/\/doi.org\/10.48550\/arXiv.2404.10981","DOI":"10.48550\/arXiv.2404.10981"},{"key":"10762_CR24","doi-asserted-by":"publisher","unstructured":"Huang D, Zhang JM, Luck M, Bu Q, Qing Y, Cui H (2024) AgentCoder: multi-agent-based code generation with iterative testing and optimisation. https:\/\/doi.org\/10.48550\/arXiv.2312.13010","DOI":"10.48550\/arXiv.2312.13010"},{"key":"10762_CR25","doi-asserted-by":"publisher","unstructured":"Izadi M, Gismondi R, Gousios G (2022) Codefill: multi-token code completion by jointly learning from structure and naming sequences. In: Proceedings of the 44th International Conference on Software Engineering (ICSE 2022), pp 401\u2013412. https:\/\/doi.org\/10.1145\/3510003.3510172","DOI":"10.1145\/3510003.3510172"},{"key":"10762_CR26","doi-asserted-by":"publisher","unstructured":"Izadi M, Katzy J, Van\u00a0Dam T, Otten M, Popescu RM, Van\u00a0Deursen A (2024) Language models for code completion: a practical evaluation. In: Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering (ICSE 2024), pp 1\u201313. https:\/\/doi.org\/10.1145\/3597503.3639138","DOI":"10.1145\/3597503.3639138"},{"key":"10762_CR27","doi-asserted-by":"publisher","unstructured":"Jin M, Shahriar S, Tufano M, Shi X, Lu S, Sundaresan N, Svyatkovskiy A (2023) Inferfix: end-to-end program repair with llms. In: Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC\/FSE 2023), pp 1646\u20131656. https:\/\/doi.org\/10.1145\/3611643.3613892","DOI":"10.1145\/3611643.3613892"},{"key":"10762_CR28","unstructured":"Kim J, Nam J, Mo S, Park J, Lee S-W, Seo M, Ha J-W, Shin J (2024) Sure: summarizing retrievals using answer candidates for open-domain QA of LLMs. In: The Twelfth International Conference on Learning Representations (ICLR 2024). https:\/\/iclr.cc\/media\/iclr-2024\/Slides\/17509.pdf"},{"issue":"4","key":"10762_CR29","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1145\/146370.146380","volume":"24","author":"K Kukich","year":"1992","unstructured":"Kukich K (1992) Techniques for automatically correcting words in text. ACM Comput Surv (CSUR) 24(4):377\u2013439. https:\/\/doi.org\/10.1145\/146370.146380","journal-title":"ACM Comput Surv (CSUR)"},{"key":"10762_CR30","doi-asserted-by":"publisher","unstructured":"Li JA, Li Y, Li G, Hu X, Xia X, Jin Z (2021) Editsum: a retrieve-and-edit framework for source code summarization. In: 2021 36th IEEE\/ACM International Conference on Automated Software Engineering (ASE 2021). IEEE, pp 155\u2013166. https:\/\/doi.org\/10.1109\/ASE51524.2021.9678724","DOI":"10.1109\/ASE51524.2021.9678724"},{"key":"10762_CR31","doi-asserted-by":"publisher","unstructured":"Li R, Allal LB, Zi Y, Muennighoff N, Kocetkov D, Mou C, Marone M, Akiki C, Li J, Chim J et al (2023) StarCoder: may the source be with you!. https:\/\/doi.org\/10.48550\/arXiv.2305.06161","DOI":"10.48550\/arXiv.2305.06161"},{"key":"10762_CR32","doi-asserted-by":"publisher","unstructured":"Liang M, Xie X, Zhang G, Zheng X, Di P, Jiang, Chen H, Wang C, Fan G (2024) REPOFUSE: repository-level code completion with fused dual context. https:\/\/doi.org\/10.48550\/arXiv.2402.14323","DOI":"10.48550\/arXiv.2402.14323"},{"key":"10762_CR33","doi-asserted-by":"publisher","unstructured":"Liao D, Pan S, Sun X, Ren X, Huang Q, Xing Z, Jin H, Li Q (2024) A3-codgen: a repository-level code generation framework for code reuse with local-aware, global-aware, and third-party-library-aware. IEEE Trans Softw Eng (TSE) (01):1\u201316. https:\/\/doi.org\/10.1109\/TSE.2024.3486195","DOI":"10.1109\/TSE.2024.3486195"},{"key":"10762_CR34","doi-asserted-by":"publisher","unstructured":"Liu Z, Chen C, Wang J, Chen M, Wu B, Tian Z, Huang Y, Hu J, Wang Q (2024a) Testing the limits: unusual text inputs generation for mobile app crash detection with large language model. In: Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering (ICSE 2024), pp 1\u201312. https:\/\/doi.org\/10.1145\/3597503.3639118","DOI":"10.1145\/3597503.3639118"},{"key":"10762_CR35","doi-asserted-by":"publisher","unstructured":"Liu T, Xu C, McAuley J (2023) RepoBench: benchmarking repository-level code auto-completion systems. https:\/\/doi.org\/10.48550\/arXiv.2306.03091","DOI":"10.48550\/arXiv.2306.03091"},{"key":"10762_CR36","doi-asserted-by":"publisher","unstructured":"Liu W, Yu A, Zan D, Shen B, Zhang W, Zhao H, Jin Z, Wang Q (2024b) Graphcoder: enhancing repository-level code completion via code context graph-based retrieval and language model. arXiv:2406.07003. https:\/\/doi.org\/10.48550\/arXiv.2406.07003","DOI":"10.48550\/arXiv.2406.07003"},{"key":"10762_CR37","doi-asserted-by":"publisher","unstructured":"Lozhkov A, Li R, Allal LB, Cassano F, Lamy-Poirier J, Tazi N, Tang A, Pykhtar D, Liu J, Wei Y et al (2024) StarCoder 2 and The Stack v2: the next generation. https:\/\/doi.org\/10.48550\/arXiv.2402.19173","DOI":"10.48550\/arXiv.2402.19173"},{"key":"10762_CR38","doi-asserted-by":"publisher","unstructured":"Mallen A, Asai A, Zhong V, Das R, Khashabi D, Hajishirzi H (2023) When not to trust language models: investigating effectiveness of parametric and non-parametric memories. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (ACL 2023). https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.546","DOI":"10.18653\/v1\/2023.acl-long.546"},{"key":"10762_CR39","doi-asserted-by":"publisher","unstructured":"Nijkamp E, Hayashi H, Xiong C, Savarese S, Zhou Y (2023a) Codegen2: lessons for training llms on programming and natural languages. arXiv:2305.02309. https:\/\/doi.org\/10.48550\/arXiv.2305.02309","DOI":"10.48550\/arXiv.2305.02309"},{"key":"10762_CR40","doi-asserted-by":"publisher","unstructured":"Nijkamp E, Pang B, Hayashi H, Tu L, Wang H, Zhou Y, Savarese S, Xiong C (2023b) CodeGen: an open large language model for code with multi-turn program synthesis. https:\/\/doi.org\/10.48550\/arXiv.2203.13474","DOI":"10.48550\/arXiv.2203.13474"},{"key":"10762_CR41","unstructured":"OpenAI (2023) Introducing chatgpt. https:\/\/openai.com\/blog\/chatgpt"},{"key":"10762_CR42","doi-asserted-by":"publisher","unstructured":"Parvez MR, Ahmad W, Chakraborty S, Ray B, Chang K-W (2021) Retrieval augmented code generation and summarization. In: Findings of the Association for Computational Linguistics (EMNLP 2021), pp 2719\u20132734. https:\/\/doi.org\/10.18653\/v1\/2021.findings-emnlp.232","DOI":"10.18653\/v1\/2021.findings-emnlp.232"},{"key":"10762_CR43","doi-asserted-by":"publisher","unstructured":"Phan HN, Phan HN, Nguyen TN, Bui ND (2024) Repohyper: better context retrieval is all you need for repository-level code completion. arXiv:2403.06095. https:\/\/doi.org\/10.48550\/arXiv.2403.06095","DOI":"10.48550\/arXiv.2403.06095"},{"key":"10762_CR44","doi-asserted-by":"publisher","unstructured":"Ramos R, Martins B, Elliott D, Kementchedjhieva Y (2023) Smallcap: lightweight image captioning prompted with retrieval augmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2023), pp 2840\u20132849. https:\/\/doi.org\/10.1109\/CVPR52729.2023.00278","DOI":"10.1109\/CVPR52729.2023.00278"},{"issue":"4","key":"10762_CR45","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1561\/1500000019","volume":"3","author":"S Robertson","year":"2009","unstructured":"Robertson S, Zaragoza H et al (2009) The probabilistic relevance framework: Bm25 and beyond. Found Trends\u00ae Inf Retriev 3(4):333\u2013389. https:\/\/doi.org\/10.1561\/1500000019","journal-title":"Found Trends\u00ae Inf Retriev"},{"key":"10762_CR46","doi-asserted-by":"publisher","unstructured":"Roziere B, Gehring J, Gloeckle F, Sootla S, Gat I, Tan XE, Adi Y, Liu J, Remez T, Rapin J et al (2023) Code llama: open foundation models for code. arXiv:2308.12950. https:\/\/doi.org\/10.48550\/arXiv.2308.12950","DOI":"10.48550\/arXiv.2308.12950"},{"key":"10762_CR47","doi-asserted-by":"publisher","unstructured":"Shrivastava D, Kocetkov D, Vries H, Bahdanau D, Scholak T (2023) RepoFusion: training code models to understand your repository. https:\/\/doi.org\/10.48550\/arXiv.2306.10998","DOI":"10.48550\/arXiv.2306.10998"},{"key":"10762_CR48","doi-asserted-by":"publisher","unstructured":"Spiess C, Gros D, Pai KS, Pradel M, Rabin MRI, Alipour A, Jha S, Devanbu P, Ahmed T (2025) Calibration and correctness of language models for code. In: 2025 IEEE\/ACM 47th International Conference on Software Engineering (ICSE), pp 540\u2013552. https:\/\/doi.org\/10.1109\/ICSE55347.2025.00040","DOI":"10.1109\/ICSE55347.2025.00040"},{"key":"10762_CR49","doi-asserted-by":"publisher","unstructured":"Tsai Y, Liu M, Ren H (2024) Rtlfixer: automatically fixing rtl syntax errors with large language model. In: Proceedings of the 61st ACM\/IEEE Design Automation Conference (DAC 2024), pp 1\u20136. https:\/\/doi.org\/10.1145\/3649329.3657353","DOI":"10.1145\/3649329.3657353"},{"key":"10762_CR50","doi-asserted-by":"publisher","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst (NIPS 2017) 30. https:\/\/doi.org\/10.5555\/3295222.3295349","DOI":"10.5555\/3295222.3295349"},{"issue":"4","key":"10762_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3464689","volume":"30","author":"H Wang","year":"2021","unstructured":"Wang H, Xia X, Lo D, He Q, Wang X, Grundy J (2021) Context-aware retrieval-based deep commit message generation. ACM Trans Softw Eng Methodology (TOSEM) 30(4):1\u201330. https:\/\/doi.org\/10.1145\/3464689","journal-title":"ACM Trans Softw Eng Methodology (TOSEM)"},{"key":"10762_CR52","doi-asserted-by":"publisher","DOI":"10.1145\/3714462","author":"C Wang","year":"2025","unstructured":"Wang C, Zhang J, Feng Y, Li T, Sun W, Liu Y, Peng X (2025a) Teaching code llms to use autocompletion tools in repository-level code generation. ACM Trans Softw Eng Methodology (TOSEM). https:\/\/doi.org\/10.1145\/3714462","journal-title":"ACM Trans Softw Eng Methodology (TOSEM)"},{"key":"10762_CR53","doi-asserted-by":"publisher","unstructured":"Wang Y, Le H, Gotmare AD, Bui ND, Li J, Hoi S (2023) Codet5+: open code large language models for code understanding and generation. In: The 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP 2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.68","DOI":"10.18653\/v1\/2023.emnlp-main.68"},{"key":"10762_CR54","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang Y, Guo D, Chen J, Zhang R, Ma Y, Zheng Z (2025b) Rlcoder: reinforcement learning for repository-level code completion. In: 2025 IEEE\/ACM 47th International Conference on Software Engineering (ICSE 2025), pp 165\u2013177. https:\/\/conf.researchr.org\/details\/icse-2025\/icse-2025-research-track\/24\/RLCoder-Reinforcement-Learning-for-Repository-Level-Code-Completion","DOI":"10.1109\/ICSE55347.2025.00014"},{"key":"10762_CR55","doi-asserted-by":"publisher","unstructured":"Wang Y, Wang W, Joty S, Hoi SCH (2021) CodeT5: identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP 2021). Association for Computational Linguistics, Online and Punta Cana, Dominican Republic, pp 8696\u20138708. https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.685","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"10762_CR56","doi-asserted-by":"publisher","unstructured":"Wu D, Ahmad WU, Zhang D, Ramanathan MK, Ma X (2024) Repoformer: selective retrieval for repository-level code completion. arXiv:2403.10059. https:\/\/doi.org\/10.48550\/arXiv.2403.10059","DOI":"10.48550\/arXiv.2403.10059"},{"key":"10762_CR57","doi-asserted-by":"publisher","unstructured":"Wu M, Cao S (2024) LLM-augmented retrieval: enhancing retrieval models through language models and doc-level embedding. https:\/\/doi.org\/10.48550\/arXiv.2404.05825","DOI":"10.48550\/arXiv.2404.05825"},{"key":"10762_CR58","doi-asserted-by":"publisher","unstructured":"Xue Z, Gao Z, Wang S, Hu X, Xia X, Li S (2024) Selfpico: self-guided partial code execution with llms. In: Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis. ISSTA 2024, pp 1389\u20131401. https:\/\/doi.org\/10.1145\/3650212.3680368","DOI":"10.1145\/3650212.3680368"},{"key":"10762_CR59","doi-asserted-by":"publisher","unstructured":"Zhang F, Chen B, Zhang Y, Keung J, Liu J, Zan D, Mao Y, Lou J-G, Chen W (2023) Repocoder: repository-level code completion through iterative retrieval and generation. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP 2023), pp 2471\u20132484. https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.151","DOI":"10.18653\/v1\/2023.emnlp-main.151"},{"key":"10762_CR60","doi-asserted-by":"publisher","unstructured":"Zhao P, Zhang H, Yu Q, Wang Z, Geng Y, Fu F, Yang L, Zhang W, Cui B (2024) Retrieval-augmented generation for ai-generated content: a survey. arXiv:2402.19473. https:\/\/doi.org\/10.48550\/arXiv.2402.19473","DOI":"10.48550\/arXiv.2402.19473"},{"key":"10762_CR61","doi-asserted-by":"publisher","unstructured":"Zheng Q, Xia X, Zou X, Dong Y, Wang S, Xue Y, Shen L, Wang Z, Wang A, Li Y, Su T, Yang Z, Tang J (2023) Codegeex: a pre-trained model for code generation with multilingual benchmarking on humaneval-x. In: Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD 2023), pp 5673\u20135684. https:\/\/doi.org\/10.1145\/3580305.3599790","DOI":"10.1145\/3580305.3599790"},{"key":"10762_CR62","doi-asserted-by":"publisher","unstructured":"Zhu T, Li Z, Pan M, Shi C, Zhang T, Pei Y, Li X (2024) Deep is better? an empirical comparison of information retrieval and deep learning approaches to code summarization. ACM Trans Softw Eng Methodology (TOSEM) 33(3). https:\/\/doi.org\/10.1145\/3631975","DOI":"10.1145\/3631975"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10762-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-025-10762-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10762-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T04:33:56Z","timestamp":1768365236000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-025-10762-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,5]]},"references-count":62,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["10762"],"URL":"https:\/\/doi.org\/10.1007\/s10664-025-10762-7","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,5]]},"assertion":[{"value":"8 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed Consent"}},{"value":"No conflict of interest exists in the submission of this manuscript, and manuscript is approved by all authors for publication. The work described was original research that has not been published previously, and not under consideration for publication elsewhere, in whole or in part.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Clinical Trial Number"}}],"article-number":"26"}}