{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T02:12:03Z","timestamp":1775873523671,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T00:00:00Z","timestamp":1712880000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,12]]},"DOI":"10.1145\/3597503.3639138","type":"proceedings-article","created":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T16:43:26Z","timestamp":1712940206000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":49,"title":["Language Models for Code Completion: A Practical Evaluation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5093-5523","authenticated-orcid":false,"given":"Maliheh","family":"Izadi","sequence":"first","affiliation":[{"name":"Delft University of Technology, Delft, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9574-2414","authenticated-orcid":false,"given":"Jonathan","family":"Katzy","sequence":"additional","affiliation":[{"name":"Delft University of Technology, Delft, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3659-7068","authenticated-orcid":false,"given":"Tim","family":"Van Dam","sequence":"additional","affiliation":[{"name":"Delft University of Technology, Delft, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6525-8127","authenticated-orcid":false,"given":"Marc","family":"Otten","sequence":"additional","affiliation":[{"name":"Delft University of Technology, Delft, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6251-770X","authenticated-orcid":false,"given":"Razvan Mihai","family":"Popescu","sequence":"additional","affiliation":[{"name":"Delft University of Technology, Delft, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4850-3312","authenticated-orcid":false,"given":"Arie","family":"Van Deursen","sequence":"additional","affiliation":[{"name":"Delft University of Technology, Delft, Netherlands"}]}],"member":"320","published-online":{"date-parts":[[2024,4,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510172"},{"key":"e_1_3_2_1_2_1","volume-title":"The Eleventh International Conference on Learning Representations","author":"Fried Daniel","year":"2023","unstructured":"Daniel Fried, Armen Aghajanyan, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong, Scott Yih, Luke Zettlemoyer, and Mike Lewis. Incoder: A generative model for code infilling and synthesis. In The Eleventh International Conference on Learning Representations, 2023."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.499"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"e_1_3_2_1_5_1","volume-title":"Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, Jia Li, Jenny Chim, et al. Starcoder: may the source be with you! arXiv preprint arXiv:2305.06161","author":"Li Raymond","year":"2023","unstructured":"Raymond Li, Loubna Ben Allal, Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, Jia Li, Jenny Chim, et al. Starcoder: may the source be with you! arXiv preprint arXiv:2305.06161, 2023."},{"key":"e_1_3_2_1_6_1","volume-title":"Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero","author":"Allal Loubna Ben","year":"2023","unstructured":"Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo Garcia del Rio, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, and Leandro von Werra. Santacoder: don't reach for the stars! In Deep Learning for Code Workshop (DL4C), 2023."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417058"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00026"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"e_1_3_2_1_10_1","volume-title":"International Conference on Learning Representations","author":"Nijkamp Erik","year":"2022","unstructured":"Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Haiquan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong. Codegen: An open large language model for code with multi-turn program synthesis. In International Conference on Learning Representations, 2022."},{"key":"e_1_3_2_1_11_1","volume-title":"Gpt-neox-20b: An open-source autoregressive language model. arXiv preprint arXiv:2204.06745","author":"Black Sid","year":"2022","unstructured":"Sid Black, Stella Biderman, Eric Hallahan, Quentin Anthony, Leo Gao, Laurence Golding, Horace He, Connor Leahy, Kyle McDonell, Jason Phang, et al. Gpt-neox-20b: An open-source autoregressive language model. arXiv preprint arXiv:2204.06745, 2022."},{"key":"e_1_3_2_1_12_1","first-page":"2397","volume-title":"International Conference on Machine Learning","author":"Biderman Stella","year":"2023","unstructured":"Stella Biderman, Hailey Schoelkopf, Quentin Gregory Anthony, Herbie Bradley, Kyle O' Brien, Eric Hallahan, Mohammad Aflah Khan, Shivanshu Purohit, USVSN Sai Prashanth, Edward Raff, et al. Pythia: A suite for analyzing large language models across training and scaling. In International Conference on Machine Learning, pages 2397--2430. PMLR, 2023."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3520312.3534862"},{"key":"e_1_3_2_1_14_1","unstructured":"Github. Github copilot. https:\/\/github.com\/features\/copilot [Accessed: 2023]."},{"key":"e_1_3_2_1_15_1","unstructured":"Google. Ml enhanced code completion. https:\/\/ai.googleblog.com\/2022\/07\/ml-enhanced-code-completion-improves.html [Accessed: 2023]."},{"key":"e_1_3_2_1_16_1","unstructured":"Amazon. Amazon codewhisperer. https:\/\/aws.amazon.com\/codewhisperer [Accessed: 2023]."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3558968"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP52600.2021.00022"},{"key":"e_1_3_2_1_19_1","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, Ge Li, Lidong Zhou, Linjun Shou, Long Zhou, Michele Tufano, MING GONG, Ming Zhou, Nan Duan, Neel Sundaresan, Shao Kun Deng, Shengyu Fu, and Shujie LIU. CodeXGLUE: A machine learning benchmark dataset for code understanding and generation. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1), 2021."},{"key":"e_1_3_2_1_20_1","volume-title":"CodeSearchNet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436","author":"Husain Hamel","year":"2019","unstructured":"Hamel Husain, Ho-Hsiang Wu, Tiferet Gazit, Miltiadis Allamanis, and Marc Brockschmidt. CodeSearchNet challenge: Evaluating the state of semantic code search. arXiv preprint arXiv:1909.09436, 2019."},{"key":"e_1_3_2_1_21_1","unstructured":"Microsoft. Microsoft intellicode. https:\/\/visualstudio.microsoft.com\/services\/intellicode [Accessed: 2023]."},{"key":"e_1_3_2_1_22_1","unstructured":"Maliheh Izadi Jonathan Katzy Tim van Dam Marc Otten Razvan Mihai Popescu and Arie van Deursen. Study material including source code and data. https:\/\/github.com\/AISE-TUDelft\/Code4MeEvaluation [Accessed: 2024]."},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. BERT: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 4171--4186, Minneapolis, Minnesota, June 2019. Association for Computational Linguistics."},{"key":"e_1_3_2_1_24_1","volume-title":"July","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arXiv e-prints, page arXiv:1907.11692, July 2019."},{"key":"e_1_3_2_1_25_1","volume-title":"Improving language understanding by generative pre-training","author":"Radford Alec","year":"2018","unstructured":"Alec Radford and Karthik Narasimhan. Improving language understanding by generative pre-training. 2018."},{"key":"e_1_3_2_1_26_1","volume-title":"Language models are unsupervised multitask learners","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeff Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. Language models are unsupervised multitask learners. 2019."},{"key":"e_1_3_2_1_27_1","first-page":"1877","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. Language models are few-shot learners. In H. Larochelle, M. Ranzato, R. Hadsell, M.F. Balcan, and H. Lin, editors, Advances in Neural Information Processing Systems, volume 33, pages 1877--1901. Curran Associates, Inc., 2020."},{"key":"e_1_3_2_1_28_1","volume-title":"Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res., 21(1), jan","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res., 21(1), jan 2020."},{"key":"e_1_3_2_1_29_1","volume-title":"International Conference on Learning Representations","author":"Guo Daya","year":"2021","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie LIU, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, Michele Tufano, Shao Kun Deng, Colin Clement, Dawn Drain, Neel Sundaresan, Jian Yin, Daxin Jiang, and Ming Zhou. Graphcodebert: Pre-training code representations with data flow. In International Conference on Learning Representations, 2021."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/SCAM59687.2023.00038"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER56733.2023.00033"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-021-09976-2"},{"key":"e_1_3_2_1_33_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374, 2021."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2021.3128234"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR52588.2021.00024"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17650"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2970276.2970330"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00101"},{"key":"e_1_3_2_1_39_1","first-page":"59","volume-title":"An empirical investigation of code completion usage by professional software developers","author":"M\u0103r\u0103\u015foiu Mariana","year":"2015","unstructured":"Mariana M\u0103r\u0103\u015foiu, Luke Church, and Alan Blackwell. An empirical investigation of code completion usage by professional software developers. Psychology of Programming Interest Group (PPIG 2015), pages 59--68, 2015."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR59073.2023.00035"},{"key":"e_1_3_2_1_41_1","volume-title":"Practitioners' expectations on code completion","author":"Wang Chaozheng","year":"2023","unstructured":"Chaozheng Wang, Junhao Hu, Cuiyun Gao, Yu Jin, Tao Xie, Hailiang Huang, Zhenyu Lei, and Yuetang Deng. Practitioners' expectations on code completion, 2023."},{"key":"e_1_3_2_1_42_1","first-page":"2161","volume-title":"Source code recommender systems: The practitioners' perspective","author":"Ciniselli Matteo","year":"2023","unstructured":"Matteo Ciniselli, Luca Pascarella, Emad Aghajani, Simone Scalabrino, Rocco Oliveto, and Gabriele Bavota. Source code recommender systems: The practitioners' perspective. pages 2161--2172, 05 2023."},{"key":"e_1_3_2_1_43_1","unstructured":"IntelliJ. Intellij platform plugin template. https:\/\/github.com\/JetBrains\/intellij-platform-plugin-template [Accessed: 2022]."},{"key":"e_1_3_2_1_44_1","unstructured":"VS Code. Code - extension and customization generator. https:\/\/www.npmjs.com\/package\/generator-code [Accessed: 2022]."},{"key":"e_1_3_2_1_45_1","volume-title":"Cm3: A causal masked multimodal model of the internet. arXiv preprint arXiv:2201.07520","author":"Aghajanyan Armen","year":"2022","unstructured":"Armen Aghajanyan, Bernie Huang, Candace Ross, Vladimir Karpukhin, Hu Xu, Naman Goyal, Dmytro Okhonko, Mandar Joshi, Gargi Ghosh, Mike Lewis, et al. Cm3: A causal masked multimodal model of the internet. arXiv preprint arXiv:2201.07520, 2022."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1539"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528588.3528665"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2023.111741"},{"key":"e_1_3_2_1_49_1","first-page":"74","volume-title":"Text Summarization Branches Out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. Rouge: A package for automatic evaluation of summaries. In Text Summarization Branches Out, pages 74--81, Barcelona, Spain, July 2004. Association for Computational Linguistics."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3520312.3534864"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359591.3359735"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2020.110800"}],"event":{"name":"ICSE '24: IEEE\/ACM 46th International Conference on Software Engineering","location":"Lisbon Portugal","acronym":"ICSE '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597503.3639138","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3597503.3639138","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:49:12Z","timestamp":1750286952000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597503.3639138"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,12]]},"references-count":52,"alternative-id":["10.1145\/3597503.3639138","10.1145\/3597503"],"URL":"https:\/\/doi.org\/10.1145\/3597503.3639138","relation":{},"subject":[],"published":{"date-parts":[[2024,4,12]]},"assertion":[{"value":"2024-04-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}