{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T12:18:19Z","timestamp":1779365899424,"version":"3.53.0"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T00:00:00Z","timestamp":1775952000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,12]]},"DOI":"10.1145\/3786167.3788410","type":"proceedings-article","created":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T11:40:19Z","timestamp":1779363619000},"page":"165-172","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["PerfBench: Can Agents Resolve Real-World Performance Bugs?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0140-2785","authenticated-orcid":false,"given":"Spandan","family":"Garg","sequence":"first","affiliation":[{"name":"Microsoft Corporation, Redmond, Washington, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2268-5897","authenticated-orcid":false,"given":"Roshanak Zilouchian","family":"Moghaddam","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, Washington, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0394-7588","authenticated-orcid":false,"given":"Neel","family":"Sundaresan","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, Washington, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,21]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Reem Aleithan Haoran Xue Mohammad\u00a0Mahdi Mohajer Elijah Nnorom Gias Uddin and Song Wang. 2024. SWE-Bench+: Enhanced Coding Benchmark for LLMs. arxiv:https:\/\/arXiv.org\/abs\/2410.06992\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2410.06992"},{"key":"e_1_3_3_2_3_2","unstructured":"Anthropic. 2024. Claude for Coding. https:\/\/www.anthropic.com\/claude-code. Accessed: 2025-07-14."},{"key":"e_1_3_3_2_4_2","first-page":"307","volume-title":"10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12)","author":"Attariyan Mona","year":"2012","unstructured":"Mona Attariyan, Michael Chow, and Jason Flinn. 2012. X-ray: Automating { Root-Cause} Diagnosis of Performance Anomalies in Production Software. In 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12). 307\u2013320."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","unstructured":"Thomas\u00a0H. Austin and Cormac Flanagan. 2012. Multiple Facets for Dynamic Information Flow. SIGPLAN Not. 47 1 (jan 2012) 165\u2013178. 10.1145\/2103621.2103677","DOI":"10.1145\/2103621.2103677"},{"key":"e_1_3_3_2_6_2","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique\u00a0Ponde de Oliveira\u00a0Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe\u00a0Petroski Such Dave Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William\u00a0Hebgen Guss Alex Nichol Alex Paino Nikolas Tezak Jie Tang Igor Babuschkin Suchir Balaji Shantanu Jain William Saunders Christopher Hesse Andrew\u00a0N. Carr Jan Leike Josh Achiam Vedant Misra Evan Morikawa Alec Radford Matthew Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. arxiv:https:\/\/arXiv.org\/abs\/2107.03374\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2107.03374"},{"key":"e_1_3_3_2_7_2","unstructured":"Cognition.ai. 2024. Introducing DEVIN. https:\/\/www.cognition.ai\/blog\/introducing-devin Accessed: 2024-08-08."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670987"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670987"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","unstructured":"Luca Della\u00a0Toffola Michael Pradel and Thomas\u00a0R. Gross. 2015. Performance Problems You Can Fix: A Dynamic Analysis of Memoization Opportunities. SIGPLAN Not. 50 10 (oct 2015) 607\u2013622. 10.1145\/2858965.2814290","DOI":"10.1145\/2858965.2814290"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","unstructured":"Spandan Garg Roshanak\u00a0Zilouchian Moghaddam Colin\u00a0B. Clement Neel Sundaresan and Chen Wu. 2022. DeepDev-PERF: A Deep Learning-Based Approach for Improving Software Performance(ESEC\/FSE 2022). Association for Computing Machinery New York NY USA 948\u2013958. 10.1145\/3540250.3549096","DOI":"10.1145\/3540250.3549096"},{"key":"e_1_3_3_2_12_2","unstructured":"Spandan Garg Roshanak\u00a0Zilouchian Moghaddam and Neel Sundaresan. 2025. RAPGen: An Approach for Fixing Code Inefficiencies in Zero-Shot. arxiv:https:\/\/arXiv.org\/abs\/2306.17077\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2306.17077"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3460946.3464318"},{"key":"e_1_3_3_2_14_2","unstructured":"Dhruv Gautam Spandan Garg Jinu Jang Neel Sundaresan and Roshanak\u00a0Zilouchian Moghaddam. 2025. RefactorBench: Evaluating Stateful Reasoning in Language Agents Through Code. arxiv:https:\/\/arXiv.org\/abs\/2503.07832\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2503.07832"},{"key":"e_1_3_3_2_15_2","unstructured":"GitHub. 2024. GitHub Copilot Agent. https:\/\/github.blog\/news-insights\/product-news\/github-copilot-meet-the-new-coding-agent\/. Accessed: 2025-07-14."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.5555\/2337223.2337241"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","unstructured":"Md\u00a0Shahriar Iqbal Rahul Krishna Mohammad\u00a0Ali Javidian Baishakhi Ray and Pooyan Jamshidi. 2021. CADET: Debugging and Fixing Misconfigurations using Counterfactual Reasoning. 10.48550\/arXiv.2010.06061","DOI":"10.48550\/arXiv.2010.06061"},{"key":"e_1_3_3_2_18_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Jimenez Carlos\u00a0E","year":"2024","unstructured":"Carlos\u00a0E Jimenez, John Yang, Alexander Wettig, Shunyu Yao, Kexin Pei, Ofir Press, and Karthik\u00a0R Narasimhan. 2024. SWE-bench: Can Language Models Resolve Real-world Github Issues?. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=VTF8yNQM66"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","unstructured":"Milan Jovic Andrea Adamoli and Matthias Hauswirth. 2011. Catch Me If You Can: Performance Bug Detection in the Wild. SIGPLAN Not. 46 10 (oct 2011) 155\u2013170. 10.1145\/2076021.2048081","DOI":"10.1145\/2076021.2048081"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2013.6606626"},{"key":"e_1_3_3_2_21_2","unstructured":"Bowen Li Wenhan Wu Ziwei Tang Lin Shi John Yang Jinyang Li Shunyu Yao Chen Qian Binyuan Hui Qicheng Zhang Zhiyin Yu He Du Ping Yang Dahua Lin Chao Peng and Kai Chen. 2024. Prompting Large Language Models to Tackle the Full Software Development Lifecycle: A Case Study. arxiv:https:\/\/arXiv.org\/abs\/2403.08604\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2403.08604"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Pengfei Liu Weizhe Yuan Jinlan Fu Zhengbao Jiang Hiroaki Hayashi and Graham Neubig. 2023. Pre-train prompt and predict: A systematic survey of prompting methods in natural language processing. Comput. Surveys 55 9 (2023) 1\u201335.","DOI":"10.1145\/3560815"},{"key":"e_1_3_3_2_23_2","unstructured":".NET Foundation. 2024. BenchmarkDotNet. https:\/\/github.com\/dotnet\/BenchmarkDotNet Accessed: 2025-09-20."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","unstructured":"Adrian Nistor Tian Jiang and Lin Tan. 2013. Discovering reporting and fixing performance bugs. 2013 10th Working Conference on Mining Software Repositories (MSR) (2013) 237\u2013246. 10.1109\/MSR.2013.6624035","DOI":"10.1109\/MSR.2013.6624035"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/2660193.2660234"},{"key":"e_1_3_3_2_26_2","unstructured":"Xingyao Wang Boxuan Li Yufan Song Frank\u00a0F. Xu Xiangru Tang Mingchen Zhuge Jiayi Pan Yueqi Song Bowen Li Jaskirat Singh Hoang\u00a0H. Tran Fuqiang Li Ren Ma Mingzhang Zheng Bill Qian Yanjun Shao Niklas Muennighoff Yizhe Zhang Binyuan Hui Junyang Lin Robert Brennan Hao Peng Heng Ji and Graham Neubig. 2024. OpenDevin: An Open Platform for AI Software Developers as Generalist Agents. arxiv:https:\/\/arXiv.org\/abs\/2407.16741\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2407.16741"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2009.5070536"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180233"},{"key":"e_1_3_3_2_29_2","unstructured":"Windsurf. 2024. https:\/\/windsurf.com\/. Accessed: 2025-07-14."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3468264.3468600"},{"key":"e_1_3_3_2_31_2","unstructured":"Chunqiu\u00a0Steven Xia Yinlin Deng Soren Dunn and Lingming Zhang. 2024. Agentless: Demystifying LLM-based Software Engineering Agents. arxiv:https:\/\/arXiv.org\/abs\/2407.01489\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2407.01489"},{"key":"e_1_3_3_2_32_2","unstructured":"John Yang Carlos\u00a0E. Jimenez Alexander Wettig Kilian Lieret Shunyu Yao Karthik Narasimhan and Ofir Press. 2024. SWE-agent: Agent-Computer Interfaces Enable Automated Software Engineering. arxiv:https:\/\/arXiv.org\/abs\/2405.15793\u00a0[cs.SE]"},{"key":"e_1_3_3_2_33_2","unstructured":"Yuntong Zhang Haifeng Ruan Zhiyu Fan and Abhik Roychoudhury. 2024. AutoCodeRover: Autonomous Program Improvement. arxiv:https:\/\/arXiv.org\/abs\/2404.05427\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2404.05427"},{"key":"e_1_3_3_2_34_2","unstructured":"Terry\u00a0Yue Zhuo Minh\u00a0Chien Vu Jenny Chim Han Hu Wenhao Yu Ratnadira Widyasari Imam Nur\u00a0Bani Yusuf Haolan Zhan Junda He Indraneil Paul Simon Brunner Chen Gong Thong Hoang Armel\u00a0Randy Zebaze Xiaoheng Hong Wen-Ding Li Jean Kaddour Ming Xu Zhihan Zhang Prateek Yadav Naman Jain Alex Gu Zhoujun Cheng Jiawei Liu Qian Liu Zijian Wang Binyuan Hui Niklas Muennighoff David Lo Daniel Fried Xiaoning Du Harm de Vries and Leandro\u00a0Von Werra. 2025. BigCodeBench: Benchmarking Code Generation with Diverse Function Calls and Complex Instructions. arxiv:https:\/\/arXiv.org\/abs\/2406.15877\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2406.15877"}],"event":{"name":"AGENT '26: International Workshop on Agentic Engineering","location":"Rio de Janeiro Brazil","acronym":"AGENT '26","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the 2026 International Workshop on Agentic Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3786167.3788410","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T12:02:44Z","timestamp":1779364964000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3786167.3788410"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":33,"alternative-id":["10.1145\/3786167.3788410","10.1145\/3786167"],"URL":"https:\/\/doi.org\/10.1145\/3786167.3788410","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-05-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}