{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:40:01Z","timestamp":1755866401097,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006374","name":"Natural Science Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2023A1515011959"],"award-info":[{"award-number":["2023A1515011959"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen International Science and Technology Cooperation Project","award":["GJHZ20220913143008015"],"award-info":[{"award-number":["GJHZ20220913143008015"]}]},{"name":"National Key R&D Program of China","award":["2022YFB3103900"],"award-info":[{"award-number":["2022YFB3103900"]}]},{"name":"National Natural Science Foundation of China under project","award":["62472126"],"award-info":[{"award-number":["62472126"]}]},{"name":"Shenzhen Basic Research","award":["JCYJ20220531095214031"],"award-info":[{"award-number":["JCYJ20220531095214031"]}]},{"name":"Shenzhen-Hong Kong Jointly Funded Project","award":["SGDX20230116091246007"],"award-info":[{"award-number":["SGDX20230116091246007"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3730262","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:25:28Z","timestamp":1752456328000},"page":"3025-3029","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Understanding Large Language Model Performance in Software Engineering: A Large-scale Question Answering Benchmark"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-9221-2788","authenticated-orcid":false,"given":"Ruida","family":"Hu","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2843-0689","authenticated-orcid":false,"given":"Chao","family":"Peng","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5687-1026","authenticated-orcid":false,"given":"Jingyi","family":"Ren","sequence":"additional","affiliation":[{"name":"ByteDance, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1080-3278","authenticated-orcid":false,"given":"Bo","family":"Jiang","sequence":"additional","affiliation":[{"name":"ByteDance, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5630-4810","authenticated-orcid":false,"given":"Xiangxin","family":"Meng","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7371-1036","authenticated-orcid":false,"given":"Qinyun","family":"Wu","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3800-2565","authenticated-orcid":false,"given":"Pengfei","family":"Gao","sequence":"additional","affiliation":[{"name":"ByteDance, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8915-6392","authenticated-orcid":false,"given":"Xinchen","family":"Wang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4774-2434","authenticated-orcid":false,"given":"Cuiyun","family":"Gao","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00128"},{"key":"e_1_3_2_1_2_1","volume-title":"Codeapex: A bilingual programming evaluation benchmark for large language models. arXiv preprint arXiv:2309.01940","author":"Fu Lingyue","year":"2023","unstructured":"Lingyue Fu, Huacan Chai, Shuang Luo, Kounianhua Du, Weiming Zhang, Longteng Fan, Jiayi Lei, Renting Rui, Jianghao Lin, Yuchen Fang, et al. 2023. Codeapex: A bilingual programming evaluation benchmark for large language models. arXiv preprint arXiv:2309.01940 (2023)."},{"volume-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. ACL, 7421-7454","author":"Ge Bai Xingyuan Bu","key":"e_1_3_2_1_3_1","unstructured":"Xingyuan Bu Ge Bai, Jie Liu et al. 2024. MT-Bench-101: A Fine-Grained Benchmark for Evaluating Large Language Models in Multi-Turn Dialogues. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics. ACL, 7421-7454."},{"key":"e_1_3_2_1_4_1","unstructured":"github. 2022. GitHub REST API documentation. https:\/\/docs.github.com\/en\/rest?apiVersion=2022-11-28."},{"key":"e_1_3_2_1_5_1","unstructured":"github. [n.d.]. marking-issues-or-pull-requests-as-a-duplicate. https:\/\/docs.github.com\/en\/issues\/tracking-your-work-with-issues\/administering-issues."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. Proceedings of the International Conference on Learning Representations (ICLR) (2021)."},{"key":"e_1_3_2_1_7_1","volume-title":"Large language models for software engineering: A systematic literature review. arXiv preprint arXiv:2308.10620","author":"Hou Xinyi","year":"2023","unstructured":"Xinyi Hou, Yanjie Zhao, Yue Liu, Zhou Yang, Kailong Wang, Li Li, Xiapu Luo, David Lo, John Grundy, and Haoyu Wang. 2023. Large language models for software engineering: A systematic literature review. arXiv preprint arXiv:2308.10620 (2023)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.148"},{"key":"e_1_3_2_1_9_1","volume-title":"CodeQA: A question answering dataset for source code comprehension. arXiv preprint arXiv:2109.08365","author":"Liu Chenxiao","year":"2021","unstructured":"Chenxiao Liu and Xiaojun Wan. 2021. CodeQA: A question answering dataset for source code comprehension. arXiv preprint arXiv:2109.08365 (2021)."},{"key":"e_1_3_2_1_10_1","volume-title":"Large Language Model-Based Agents for Software Engineering: A Survey. arXiv preprint arXiv:2409.02977","author":"Liu Junwei","year":"2024","unstructured":"Junwei Liu, Kaixin Wang, Yixuan Chen, Xin Peng, Zhenpeng Chen, Lingming Zhang, and Yiling Lou. 2024b. Large Language Model-Based Agents for Software Engineering: A Survey. arXiv preprint arXiv:2409.02977 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"MarsCode Agent: AI-native Automated Bug Fixing. arXiv preprint arXiv:2409.00899","author":"Liu Yizhou","year":"2024","unstructured":"Yizhou Liu, Pengfei Gao, Xinchen Wang, Chao Peng, and Zhao Zhang. 2024a. MarsCode Agent: AI-native Automated Bug Fixing. arXiv preprint arXiv:2409.00899 (2024)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545945.3569785"},{"key":"e_1_3_2_1_13_1","volume-title":"Loubna Ben Allal et al","author":"Raymond Li Yangtian Zi","year":"2023","unstructured":"Yangtian Zi Raymond Li, Loubna Ben Allal et al. 2023. Starcoder: may the source be with you! arXiv preprint arXiv:2305.06161 (2023)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581641.3584037"},{"key":"e_1_3_2_1_15_1","volume-title":"Swe-agent: Agent-computer interfaces enable automated software engineering. arXiv preprint arXiv:2405.15793","author":"Yang John","year":"2024","unstructured":"John Yang, Carlos E Jimenez, Alexander Wettig, Kilian Lieret, Shunyu Yao, Karthik Narasimhan, and Ofir Press. 2024. Swe-agent: Agent-computer interfaces enable automated software engineering. arXiv preprint arXiv:2405.15793 (2024)."}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"SIGIR '25"},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3730262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:15:02Z","timestamp":1755864902000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3730262"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":15,"alternative-id":["10.1145\/3726302.3730262","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3730262","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}