{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T21:17:39Z","timestamp":1780694259246,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T00:00:00Z","timestamp":1726012800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,11]]},"DOI":"10.1145\/3650212.3652130","type":"proceedings-article","created":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T11:44:25Z","timestamp":1726055065000},"page":"313-324","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["FT2Ra: A Fine-Tuning-Inspired Approach to Retrieval-Augmented Code Completion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-8002-8068","authenticated-orcid":false,"given":"Qi","family":"Guo","sequence":"first","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0752-6764","authenticated-orcid":false,"given":"Xiaohong","family":"Li","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1288-6502","authenticated-orcid":false,"given":"Xiaofei","family":"Xie","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5598-4006","authenticated-orcid":false,"given":"Shangqing","family":"Liu","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8062-9986","authenticated-orcid":false,"given":"Ze","family":"Tang","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9080-6865","authenticated-orcid":false,"given":"Ruitao","family":"Feng","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3847-6760","authenticated-orcid":false,"given":"Junjie","family":"Wang","sequence":"additional","affiliation":[{"name":"Tianjin University, Tianjin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1773-0942","authenticated-orcid":false,"given":"Jidong","family":"Ge","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0517-7801","authenticated-orcid":false,"given":"Lei","family":"Bu","sequence":"additional","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2022. GitHub Copilot. https:\/\/github.com\/features\/copilot"},{"key":"e_1_3_2_1_2_1","unstructured":"2022. intellicode. https:\/\/visualstudio.microsoft.com\/services\/intellicode"},{"key":"e_1_3_2_1_3_1","unstructured":"2023. ft2ra website. https:\/\/sites.google.com\/view\/ft2ra\/home"},{"key":"e_1_3_2_1_4_1","unstructured":"2023. Stanford University CS229: Machine Learning. https:\/\/cs229.stanford.edu\/ Accessed: 2023-12-10"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639183"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2013.6624029"},{"key":"e_1_3_2_1_7_1","volume-title":"International Conference on Machine Learning. 468\u2013485","author":"Alon Uri","year":"2022","unstructured":"Uri Alon, Frank Xu, Junxian He, Sudipta Sengupta, Dan Roth, and Graham Neubig. 2022. Neuro-symbolic language modeling with automaton-augmented retrieval. In International Conference on Machine Learning. 468\u2013485."},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning. 2206\u20132240","author":"Borgeaud Sebastian","year":"2022","unstructured":"Sebastian Borgeaud, Arthur Mensch, Jordan Hoffmann, Trevor Cai, Eliza Rutherford, Katie Millican, George Bm Van Den Driessche, Jean-Baptiste Lespiau, Bogdan Damoc, and Aidan Clark. 2022. Improving language models by retrieving from trillions of tokens. In International conference on machine learning. 2206\u20132240."},{"key":"e_1_3_2_1_9_1","first-page":"23908","article-title":"Decoupling knowledge from memorization: Retrieval-augmented prompt learning","volume":"35","author":"Chen Xiang","year":"2022","unstructured":"Xiang Chen, Lei Li, Ningyu Zhang, Xiaozhuan Liang, Shumin Deng, Chuanqi Tan, Fei Huang, Luo Si, and Huajun Chen. 2022. Decoupling knowledge from memorization: Retrieval-augmented prompt learning. Advances in Neural Information Processing Systems, 35 (2022), 23908\u201323922.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_10_1","unstructured":"Michiel De Jong Yury Zemlyanskiy Nicholas FitzGerald Fei Sha and William Cohen. 2021. Mention memory: incorporating textual knowledge into transformers through entity mention attention. arXiv preprint arXiv:2110.06176."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Andrew Drozdov Shufan Wang Razieh Rahimi Andrew McCallum Hamed Zamani and Mohit Iyyer. 2022. You can\u2019t pick your neighbors or can you? When and how to rely on retrieval in the k NN-LM. arXiv preprint arXiv:2210.15859.","DOI":"10.18653\/v1\/2022.findings-emnlp.218"},{"key":"e_1_3_2_1_12_1","volume-title":"Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155.","author":"Feng Zhangyin","year":"2020","unstructured":"Zhangyin Feng, Daya Guo, Duyu Tang, Nan Duan, Xiaocheng Feng, Ming Gong, Linjun Shou, Bing Qin, Ting Liu, and Daxin Jiang. 2020. Codebert: A pre-trained model for programming and natural languages. arXiv preprint arXiv:2002.08155."},{"key":"e_1_3_2_1_13_1","volume-title":"Unixcoder: Unified cross-modal pre-training for code representation. arXiv preprint arXiv:2203.03850.","author":"Guo Daya","year":"2022","unstructured":"Daya Guo, Shuai Lu, Nan Duan, Yanlin Wang, Ming Zhou, and Jian Yin. 2022. Unixcoder: Unified cross-modal pre-training for code representation. arXiv preprint arXiv:2203.03850."},{"key":"e_1_3_2_1_14_1","volume-title":"International conference on machine learning. 3929\u20133938","author":"Guu Kelvin","year":"2020","unstructured":"Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat, and Mingwei Chang. 2020. Retrieval augmented language model pre-training. In International conference on machine learning. 3929\u20133938."},{"key":"e_1_3_2_1_15_1","unstructured":"Junxian He Graham Neubig and Taylor Berg-Kirkpatrick. 2021. Efficient nearest neighbor language models. arXiv preprint arXiv:2109.04212."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2902362"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591687"},{"key":"e_1_3_2_1_18_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685.","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685."},{"key":"e_1_3_2_1_19_1","unstructured":"Hamel Husain Ho-Hsiang Wu Tiferet Gazit Miltiadis Allamanis and Marc Brockschmidt. 2019. Codesearchnet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436."},{"key":"e_1_3_2_1_20_1","unstructured":"Gautier Izacard Patrick Lewis Maria Lomeli Lucas Hosseini Fabio Petroni Timo Schick Jane Dwivedi-Yu Armand Joulin Sebastian Riedel and Edouard Grave. 2022. Few-shot learning with retrieval augmented language models. arXiv preprint arXiv:2208.03299."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Zhengbao Jiang Frank F Xu Luyu Gao Zhiqing Sun Qian Liu Jane Dwivedi-Yu Yiming Yang Jamie Callan and Graham Neubig. 2023. Active retrieval augmented generation. arXiv preprint arXiv:2305.06983.","DOI":"10.18653\/v1\/2023.emnlp-main.495"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25642"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Vladimir Karpukhin Barlas O\u011fuz Sewon Min Patrick Lewis Ledell Wu Sergey Edunov Danqi Chen and Wen-tau Yih. 2020. Dense passage retrieval for open-domain question answering. arXiv preprint arXiv:2004.04906.","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=HklBjCEKvH","author":"Khandelwal Urvashi","year":"2020","unstructured":"Urvashi Khandelwal, Omer Levy, Dan Jurafsky, Luke Zettlemoyer, and Mike Lewis. 2020. Generalization through Memorization: Nearest Neighbor Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=HklBjCEKvH"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00026"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Brian Lester Rami Al-Rfou and Noah Constant. 2021. The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691.","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"e_1_3_2_1_27_1","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, and Tim Rockt\u00e4schel. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems, 33 (2020), 9459\u20139474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_28_1","unstructured":"Jian Li Yue Wang Michael R Lyu and Irwin King. 2017. Code completion with neural attention and pointer networks. arXiv preprint arXiv:1711.09573."},{"key":"e_1_3_2_1_29_1","volume-title":"Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, Jia Li, and Jenny Chim.","author":"Li Raymond","year":"2023","unstructured":"Raymond Li, Loubna Ben Allal, Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, Jia Li, and Jenny Chim. 2023. StarCoder: may the source be with you!. arXiv preprint arXiv:2305.06161."},{"key":"e_1_3_2_1_30_1","unstructured":"Chang Liu Xin Wang Richard Shin Joseph E Gonzalez and Dawn Song. 2016. Neural code completion."},{"key":"e_1_3_2_1_31_1","unstructured":"Shangqing Liu Yu Chen Xiaofei Xie Jingkai Siow and Yang Liu. 2020. Retrieval-augmented generation for code summarization via hybrid gnn. arXiv preprint arXiv:2006.05405."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2020.3038681"},{"key":"e_1_3_2_1_33_1","volume-title":"Commitbart: A large pre-trained model for github commits. arXiv preprint arXiv:2208.08100.","author":"Liu Shangqing","year":"2022","unstructured":"Shangqing Liu, Yanzhou Li, Xiaofei Xie, and Yang Liu. 2022. Commitbart: A large pre-trained model for github commits. arXiv preprint arXiv:2208.08100."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00207"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Xiao Liu Yanan Zheng Zhengxiao Du Ming Ding Yujie Qian Zhilin Yang and Jie Tang. 2023. GPT understands too. AI Open.","DOI":"10.1016\/j.aiopen.2023.08.012"},{"key":"e_1_3_2_1_36_1","volume-title":"Reacc: A retrieval-augmented code completion framework. arXiv preprint arXiv:2203.07722.","author":"Lu Shuai","year":"2022","unstructured":"Shuai Lu, Nan Duan, Hojae Han, Daya Guo, Seung-won Hwang, and Alexey Svyatkovskiy. 2022. Reacc: A retrieval-augmented code completion framework. arXiv preprint arXiv:2203.07722."},{"key":"e_1_3_2_1_37_1","volume-title":"Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664.","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, and Duyu Tang. 2021. Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:2102.04664."},{"key":"e_1_3_2_1_38_1","volume-title":"Codegen: An open large language model for code with multi-turn program synthesis. arXiv preprint arXiv:2203.13474.","author":"Nijkamp Erik","year":"2022","unstructured":"Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong. 2022. Codegen: An open large language model for code with multi-turn program synthesis. arXiv preprint arXiv:2203.13474."},{"key":"e_1_3_2_1_39_1","unstructured":"OpenAI. 2023. ChatGPTblog. https:\/\/openai.com\/blog\/chatgpt"},{"key":"e_1_3_2_1_40_1","volume-title":"Saikat Chakraborty, Baishakhi Ray, and Kai-Wei Chang.","author":"Parvez Md Rizwan","year":"2021","unstructured":"Md Rizwan Parvez, Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, and Kai-Wei Chang. 2021. Retrieval augmented code generation and summarization. arXiv preprint arXiv:2108.11601."},{"key":"e_1_3_2_1_41_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog, 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. 2019. Language models are unsupervised multitask learners. OpenAI blog, 1, 8 (2019), 9."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Ori Ram Yoav Levine Itay Dalmedigos Dor Muhlgay Amnon Shashua Kevin Leyton-Brown and Yoav Shoham. 2023. In-context retrieval-augmented language models. arXiv preprint arXiv:2302.00083.","DOI":"10.1162\/tacl_a_00605"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Veselin Raychev Pavol Bielik and Martin Vechev. 2016. Probabilistic Model for Code with Decision Trees. ACM SIGPLAN Notices 731\u2013747.","DOI":"10.1145\/3022671.2984041"},{"key":"e_1_3_2_1_44_1","volume-title":"The probabilistic relevance framework: BM25 and beyond. Foundations and Trends\u00ae in Information Retrieval, 3, 4","author":"Robertson Stephen","year":"2009","unstructured":"Stephen Robertson and Hugo Zaragoza. 2009. The probabilistic relevance framework: BM25 and beyond. Foundations and Trends\u00ae in Information Retrieval, 3, 4 (2009), 333\u2013389."},{"key":"e_1_3_2_1_45_1","volume-title":"Replug: Retrieval-augmented black-box language models. arXiv preprint arXiv:2301.12652.","author":"Shi Weijia","year":"2023","unstructured":"Weijia Shi, Sewon Min, Michihiro Yasunaga, Minjoon Seo, Rich James, Mike Lewis, Luke Zettlemoyer, and Wen-tau Yih. 2023. Replug: Retrieval-augmented black-box language models. arXiv preprint arXiv:2301.12652."},{"key":"e_1_3_2_1_46_1","volume-title":"International Conference on Machine Learning. 31693\u201331715","author":"Shrivastava Disha","year":"2023","unstructured":"Disha Shrivastava, Hugo Larochelle, and Daniel Tarlow. 2023. Repository-level prompt generation for large language models of code. In International Conference on Machine Learning. 31693\u201331715."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Ze Tang Jidong Ge Shangqing Liu Tingwei Zhu Tongtong Xu Liguo Huang and Bin Luo. 2023. Domain Adaptive Code Completion via Language Models and Decoupled Domain Databases. arXiv preprint arXiv:2308.09313.","DOI":"10.1109\/ASE56229.2023.00076"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Yue Wang Weishi Wang Shafiq Joty and Steven CH Hoi. 2021. Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv preprint arXiv:2109.00859.","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"e_1_3_2_1_49_1","volume-title":"Md Rizwan Parvez, and Graham Neubig","author":"Wang Zhiruo","year":"2023","unstructured":"Zhiruo Wang, Jun Araki, Zhengbao Jiang, Md Rizwan Parvez, and Graham Neubig. 2023. Learning to Filter Context for Retrieval-Augmented Generation. arXiv preprint arXiv:2311.08377."},{"key":"e_1_3_2_1_50_1","unstructured":"Wikipedia. 2023. Empirical Probability. https:\/\/en.wikipedia.org\/wiki\/Empirical_probability"},{"key":"e_1_3_2_1_51_1","unstructured":"Wikipedia. 2023. Frequency (statistics). https:\/\/en.wikipedia.org\/wiki\/Frequency_(statistics)"},{"key":"e_1_3_2_1_52_1","unstructured":"Frank F Xu Uri Alon and Graham Neubig. 2023. Why do Nearest Neighbor Language Models Work? arXiv preprint arXiv:2301.02828."},{"key":"e_1_3_2_1_53_1","volume-title":"Leandojo: Theorem proving with retrieval-augmented language models. arXiv preprint arXiv:2306.15626.","author":"Yang Kaiyu","year":"2023","unstructured":"Kaiyu Yang, Aidan M Swope, Alex Gu, Rahul Chalamala, Peiyang Song, Shixing Yu, Saad Godil, Ryan Prenger, and Anima Anandkumar. 2023. Leandojo: Theorem proving with retrieval-augmented language models. arXiv preprint arXiv:2306.15626."},{"key":"e_1_3_2_1_54_1","volume-title":"Repocoder: Repository-level code completion through iterative retrieval and generation. arXiv preprint arXiv:2303.12570.","author":"Zhang Fengji","year":"2023","unstructured":"Fengji Zhang, Bei Chen, Yue Zhang, Jin Liu, Daoguang Zan, Yi Mao, Jian-Guang Lou, and Weizhu Chen. 2023. Repocoder: Repository-level code completion through iterative retrieval and generation. arXiv preprint arXiv:2303.12570."}],"event":{"name":"ISSTA '24: 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","location":"Vienna Austria","acronym":"ISSTA '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","AITO"]},"container-title":["Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650212.3652130","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3650212.3652130","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:50:06Z","timestamp":1750287006000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650212.3652130"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,11]]},"references-count":54,"alternative-id":["10.1145\/3650212.3652130","10.1145\/3650212"],"URL":"https:\/\/doi.org\/10.1145\/3650212.3652130","relation":{},"subject":[],"published":{"date-parts":[[2024,9,11]]},"assertion":[{"value":"2024-09-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}