{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T02:12:42Z","timestamp":1775873562583,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,2,6]],"date-time":"2024-02-06T00:00:00Z","timestamp":1707177600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Natural Science Foundation of China","award":["No.62161146003"],"award-info":[{"award-number":["No.62161146003"]}]},{"name":"National key research and development program Project","award":["2021YFF0704202"],"award-info":[{"award-number":["2021YFF0704202"]}]},{"name":"Tencent Foundation\/XPLORER PRIZE"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,2,6]]},"DOI":"10.1145\/3597503.3623316","type":"proceedings-article","created":{"date-parts":[[2024,2,6]],"date-time":"2024-02-06T20:53:16Z","timestamp":1707252796000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":98,"title":["CoderEval: A Benchmark of Pragmatic Code Generation with Generative Pre-trained Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3828-7612","authenticated-orcid":false,"given":"Hao","family":"Yu","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0490-2071","authenticated-orcid":false,"given":"Bo","family":"Shen","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Co., Ltd., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7916-255X","authenticated-orcid":false,"given":"Dezhi","family":"Ran","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6438-360X","authenticated-orcid":false,"given":"Jiaxin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Co., Ltd., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6021-1447","authenticated-orcid":false,"given":"Qi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Co., Ltd., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3304-1389","authenticated-orcid":false,"given":"Yuchi","family":"Ma","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Co., Ltd., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2454-1706","authenticated-orcid":false,"given":"Guangtai","family":"Liang","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Co., Ltd., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6278-2357","authenticated-orcid":false,"given":"Ying","family":"Li","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6598-0041","authenticated-orcid":false,"given":"Qianxiang","family":"Wang","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Co., Ltd., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6731-216X","authenticated-orcid":false,"given":"Tao","family":"Xie","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,2,6]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2022. https:\/\/github.com\/CoderEval\/CoderEval."},{"key":"e_1_3_2_1_2_1","unstructured":"Jacob Austin Augustus Odena Maxwell Nye Maarten Bosma Henryk Michalewski David Dohan Ellen Jiang Carrie Cai Michael Terry Quoc Le and Charles Sutton. 2021. Program Synthesis with Large Language Models. arXiv:cs.PL\/2108.07732"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","unstructured":"Sid Black Leo Gao Phil Wang Connor Leahy and Stella Biderman. 2021. GPT-Neo: Large Scale Autoregressive Language Modeling with Mesh-Tensorflow. 10.5281\/zenodo.5297715","DOI":"10.5281\/zenodo.5297715"},{"key":"e_1_3_2_1_4_1","volume-title":"Advances in Neural Information Processing Systems (2020)","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language Models Are Few-shot Learners. Advances in Neural Information Processing Systems (2020), 1877--1901."},{"key":"e_1_3_2_1_5_1","volume-title":"Molly Q Feldman, et al.","author":"Cassano Federico","year":"2022","unstructured":"Federico Cassano, John Gouwar, Daniel Nguyen, Sydney Nguyen, Luna Phipps-Costin, Donald Pinckney, Ming-Ho Yee, Yangtian Zi, Carolyn Jane Anderson, Molly Q Feldman, et al. 2022. MultiPL-E: A Scalable and Extensible Approach to Benchmarking Neural Code Generation. arXiv preprint arXiv:2208.08227 (2022)."},{"key":"e_1_3_2_1_6_1","volume-title":"Training and Evaluating a Jupyter Notebook Data Science Assistant. arXiv preprint arXiv:2201.12901","author":"Chandel Shubham","year":"2022","unstructured":"Shubham Chandel, Colin B Clement, Guillermo Serrato, and Neel Sundaresan. 2022. Training and Evaluating a Jupyter Notebook Data Science Assistant. arXiv preprint arXiv:2201.12901 (2022)."},{"key":"e_1_3_2_1_7_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al.","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. 2021. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)."},{"key":"e_1_3_2_1_8_1","volume-title":"PanGu-Coder: Program Synthesis with Function-Level Language Modeling. arXiv preprint arXiv:2207.11280","author":"Christopoulou Fenia","year":"2022","unstructured":"Fenia Christopoulou, Gerasimos Lampouras, Milan Gritta, Guchun Zhang, Yinpeng Guo, Zhongqi Li, Qi Zhang, Meng Xiao, Bo Shen, Lin Li, Hao Yu, Li Yan, Pingyi Zhou, Xin Wang, Yuchi Ma, Ignacio Iacobacci, Yasheng Wang, Guangtai Liang, Jiansheng Wei, Xin Jiang, Qianxiang Wang, and Qun Liu. 2022. PanGu-Coder: Program Synthesis with Function-Level Language Modeling. arXiv preprint arXiv:2207.11280 (2022)."},{"key":"e_1_3_2_1_9_1","volume-title":"InCoder: A Generative Model for Code Infilling and Synthesis. arXiv preprint arXiv:2204.05999","author":"Fried Daniel","year":"2022","unstructured":"Daniel Fried, Armen Aghajanyan, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong, Wen-tau Yih, Luke Zettlemoyer, and Mike Lewis. 2022. InCoder: A Generative Model for Code Infilling and Synthesis. arXiv preprint arXiv:2204.05999 (2022)."},{"key":"e_1_3_2_1_10_1","volume-title":"Language Models Can Teach Themselves to Program Better. arXiv preprint arXiv:2207.14502","author":"Haluptzok Patrick","year":"2022","unstructured":"Patrick Haluptzok, Matthew Bowers, and Adam Tauman Kalai. 2022. Language Models Can Teach Themselves to Program Better. arXiv preprint arXiv:2207.14502 (2022)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","unstructured":"Yiyang Hao Ge Li Yongqiang Liu Xiaowei Miao He Zong Siyuan Jiang Yang Liu and He Wei. 2022. AixBench: A Code Generation Benchmark Dataset. 10.48550\/arXiv.2206.13179","DOI":"10.48550\/arXiv.2206.13179"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks","volume":"1","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Steven Basart, Saurav Kadavath, Mantas Mazeika, Akul Arora, Ethan Guo, Collin Burns, Samir Puranik, Horace He, Dawn Song, and Jacob Steinhardt. 2021. Measuring Coding Challenge Competence With APPS. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks, Vol. 1."},{"key":"e_1_3_2_1_13_1","volume-title":"The Curious Case of Neural Text Degeneration. arXiv preprint arXiv:1904.09751","author":"Holtzman Ari","year":"2019","unstructured":"Ari Holtzman, Jan Buys, Li Du, Maxwell Forbes, and Yejin Choi. 2019. The Curious Case of Neural Text Degeneration. arXiv preprint arXiv:1904.09751 (2019)."},{"key":"e_1_3_2_1_14_1","unstructured":"Srinivasan Iyer Ioannis Konstas Alvin Cheung and Luke Zettlemoyer. 2018. Mapping Language to Code in Programmatic Context. arXiv:cs.CL\/1808.09588"},{"key":"e_1_3_2_1_15_1","volume-title":"Transfer Learning for Natural Language Processing Workshop. 52--62","author":"Jang Joel","year":"2023","unstructured":"Joel Jang, Seonghyeon Ye, and Minjoon Seo. 2023. Can Large Language Models Truly Understand Prompts? A Case Study with Negated Prompts. In Transfer Learning for Natural Language Processing Workshop. 52--62."},{"key":"e_1_3_2_1_16_1","volume-title":"Daniel Fried, Sida Wang, and Tao Yu.","author":"Lai Yuhang","year":"2022","unstructured":"Yuhang Lai, Chengxi Li, Yiming Wang, Tianyi Zhang, Ruiqi Zhong, Luke Zettlemoyer, Scott Wen tau Yih, Daniel Fried, Sida Wang, and Tao Yu. 2022. DS-1000: A Natural and Reliable Benchmark for Data Science Code Generation. arXiv:cs.SE\/2211.11501"},{"key":"e_1_3_2_1_17_1","volume-title":"Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, Jia Li, Jenny Chim, et al.","author":"Li Raymond","year":"2023","unstructured":"Raymond Li, Loubna Ben Allal, Yangtian Zi, Niklas Muennighoff, Denis Kocetkov, Chenghao Mou, Marc Marone, Christopher Akiki, Jia Li, Jenny Chim, et al. 2023. StarCoder: May the Source Be with You! arXiv preprint arXiv:2305.06161 (2023)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","unstructured":"Yujia Li David Choi Junyoung Chung Nate Kushman Julian Schrittwieser R\u00e9mi Leblond Tom Eccles James Keeling Felix Gimeno Agustin Dal Lago Thomas Hubert Peter Choy Cyprien de Masson d'Autume Igor Babuschkin Xinyun Chen Po-Sen Huang Johannes Welbl Sven Gowal Alexey Cherepanov James Molloy Daniel J. Mankowitz Esme Sutherland Robson Pushmeet Kohli Nando de Freitas Koray Kavukcuoglu and Oriol Vinyals. 2022. Competition-Level Code Generation with AlphaCode. 10.48550\/ARXIV.2203.07814","DOI":"10.48550\/ARXIV.2203.07814"},{"key":"e_1_3_2_1_19_1","volume-title":"WizardCoder: Empowering Code Large Language Models with Evol-Instruct. arXiv preprint arXiv:2306.08568","author":"Luo Ziyang","year":"2023","unstructured":"Ziyang Luo, Can Xu, Pu Zhao, Qingfeng Sun, Xiubo Geng, Wenxiang Hu, Chongyang Tao, Jing Ma, Qingwei Lin, and Daxin Jiang. 2023. WizardCoder: Empowering Code Large Language Models with Evol-Instruct. arXiv preprint arXiv:2306.08568 (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1910.13461"},{"key":"e_1_3_2_1_21_1","volume-title":"A Conversational Paradigm for Program Synthesis. arXiv preprint arXiv:2203.13474","author":"Nijkamp Erik","year":"2022","unstructured":"Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong. 2022. A Conversational Paradigm for Program Synthesis. arXiv preprint arXiv:2203.13474 (2022)."},{"key":"e_1_3_2_1_22_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. 2018. Improving Language Understanding by Generative Pre-training. (2018)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2208.06213"},{"key":"e_1_3_2_1_24_1","volume-title":"Liam Fedus, Luke Metz, Michael Pokorny, et al.","author":"Schulman John","year":"2022","unstructured":"John Schulman, Barret Zoph, Christina Kim, Jacob Hilton, Jacob Menick, Jiayi Weng, Juan Felipe Ceron Uribe, Liam Fedus, Luke Metz, Michael Pokorny, et al. 2022. ChatGPT: Optimizing Language Models for Dialogue. OpenAI blog (2022)."},{"key":"e_1_3_2_1_25_1","volume-title":"Repository-level Prompt Generation for Large Language Models of Code. arXiv preprint arXiv:2206.12839","author":"Shrivastava Disha","year":"2022","unstructured":"Disha Shrivastava, Hugo Larochelle, and Daniel Tarlow. 2022. Repository-level Prompt Generation for Large Language Models of Code. arXiv preprint arXiv:2206.12839 (2022)."},{"key":"e_1_3_2_1_26_1","volume-title":"Advances in Neural Information Processing Systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention Is All You Need. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_2_1_27_1","volume-title":"Hyung Won Chung, Iz Beltagy, Julien Launay, and Colin Raffel.","author":"Wang Thomas","year":"2022","unstructured":"Thomas Wang, Adam Roberts, Daniel Hesslow, Teven Le Scao, Hyung Won Chung, Iz Beltagy, Julien Launay, and Colin Raffel. 2022. What Language Model Architecture and Pretraining Objective Work Best for Zero-Shot Generalization? arXiv preprint arXiv:2204.05832 (2022)."},{"key":"e_1_3_2_1_28_1","volume-title":"Execution-Based Evaluation for Open-Domain Code Generation. arXiv preprint arXiv:2212.10481","author":"Wang Zhiruo","year":"2022","unstructured":"Zhiruo Wang, Shuyan Zhou, Daniel Fried, and Graham Neubig. 2022. Execution-Based Evaluation for Open-Domain Code Generation. arXiv preprint arXiv:2212.10481 (2022)."},{"key":"e_1_3_2_1_29_1","volume-title":"PanGu-\u03b1: Large-scale Autoregressive Pretrained Chinese Language Models with Auto-parallel Computation. CoRR abs\/2104.12369","author":"Zeng Wei","year":"2021","unstructured":"Wei Zeng, Xiaozhe Ren, Teng Su, Hui Wang, Yi Liao, Zhiwei Wang, Xin Jiang, ZhenZhang Yang, Kaisheng Wang, Xiaoda Zhang, Chen Li, Ziyan Gong, Yifan Yao, Xinjing Huang, Jun Wang, Jianfeng Yu, Qi Guo, Yue Yu, Yan Zhang, Jin Wang, Hengtao Tao, Dasen Yan, Zexuan Yi, Fang Peng, Fangqing Jiang, Han Zhang, Lingfeng Deng, Yehong Zhang, Zhe Lin, Chao Zhang, Shaojie Zhang, Mingyue Guo, Shanzhi Gu, Gaojun Fan, Yaowei Wang, Xuefeng Jin, Qun Liu, and Yonghong Tian. 2021. PanGu-\u03b1: Large-scale Autoregressive Pretrained Chinese Language Models with Auto-parallel Computation. CoRR abs\/2104.12369 (2021). arXiv:2104.12369 https:\/\/arxiv.org\/abs\/2104.12369"},{"key":"e_1_3_2_1_30_1","volume-title":"Chen Change Loy, and Ziwei Liu","author":"Zhou Kaiyang","year":"2022","unstructured":"Kaiyang Zhou, Jingkang Yang, Chen Change Loy, and Ziwei Liu. 2022. Learning to Prompt for Vision-language Models. International Journal of Computer Vision (2022), 2337--2348."},{"key":"e_1_3_2_1_31_1","volume-title":"Ziwen Han, Keiran Paster, Silviu Pitis, Harris Chan, and Jimmy Ba.","author":"Zhou Yongchao","year":"2022","unstructured":"Yongchao Zhou, Andrei Ioan Muresanu, Ziwen Han, Keiran Paster, Silviu Pitis, Harris Chan, and Jimmy Ba. 2022. Large Language Lodels Are Human-level Prompt Engineers. arXiv preprint arXiv:2211.01910 (2022)."}],"event":{"name":"ICSE '24: IEEE\/ACM 46th International Conference on Software Engineering","location":"Lisbon Portugal","acronym":"ICSE '24","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the IEEE\/ACM 46th International Conference on Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597503.3623316","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3597503.3623316","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:48:45Z","timestamp":1750182525000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597503.3623316"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,6]]},"references-count":31,"alternative-id":["10.1145\/3597503.3623316","10.1145\/3597503"],"URL":"https:\/\/doi.org\/10.1145\/3597503.3623316","relation":{},"subject":[],"published":{"date-parts":[[2024,2,6]]},"assertion":[{"value":"2024-02-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}