{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T11:44:08Z","timestamp":1766231048960,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3750720.3757280","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T11:42:38Z","timestamp":1766230958000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Comprehensive Evaluation of LLMs in HPC Code Performance Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7261-2548","authenticated-orcid":false,"given":"Bowen","family":"Cui","sequence":"first","affiliation":[{"name":"Computer Science, George Mason University, Fairfax, Virginia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9522-6518","authenticated-orcid":false,"given":"Tejas","family":"Ramesh","sequence":"additional","affiliation":[{"name":"Computer Science, George Mason University, Fairfax, Virginia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5380-6951","authenticated-orcid":false,"given":"Oscar","family":"Hernandez","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, Tennessee, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7977-3182","authenticated-orcid":false,"given":"Keren","family":"Zhou","sequence":"additional","affiliation":[{"name":"Computer Science, George Mason University, Fairfax, Virginia, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. GPT-4 Documentation. https:\/\/platform.openai.com\/docs\/models\/gpt-4-turbo-and-gpt-4."},{"key":"e_1_3_3_2_3_2","unstructured":"[n. d.]. Llama3.2 Documentation. https:\/\/github.com\/meta-llama\/llama-models?tab=readme-ov-file."},{"key":"e_1_3_3_2_4_2","unstructured":"[n. d.]. XSBench. https:\/\/proxyapps.exascaleproject.org\/app\/xsbench\/."},{"key":"e_1_3_3_2_5_2","unstructured":"2024. SPEChpc\u2122 2021 Benchmark Suites. https:\/\/www.spec.org\/hpc2021\/ Accessed: 2024-10-25."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3604915.3610647"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"L. Adhianto S. Banerjee M. Fagan M. Krentel G. Marin J. Mellor-Crummey and N.\u00a0R. Tallent. 2010. HPCTOOLKIT: tools for performance analysis of optimized parallel programs http:\/\/hpctoolkit.org. Concurr. Comput.: Pract. Exper. 22 6 (apr 2010) 685\u2013701.","DOI":"10.1002\/cpe.1553"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Toufique Ahmed and Prem Devanbu. 2022. Few-shot training LLMs for project-specific code-summarization. Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering (2022). https:\/\/api.semanticscholar.org\/CorpusID:250426482","DOI":"10.1145\/3551349.3559555"},{"key":"e_1_3_3_2_9_2","unstructured":"Anthropic. 2024. Introducing the next generation of Claude. https:\/\/www.anthropic.com\/news\/claude-3-family"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356219"},{"key":"e_1_3_3_2_11_2","unstructured":"Aman Chaturvedi Daniel Nichols Siddharth Singh and Abhinav Bhatele. 2024. HPC-Coder-v2-16B. https:\/\/huggingface.co\/hpcgroup\/hpc-coder-v2-16b. Accessed: 2025-04-14."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_3_2_13_2","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde Jared Kaplan Harrison Edwards Yura Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe\u00a0Petroski Such David\u00a0W. Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William\u00a0H. Guss Alex Nichol Igor Babuschkin Suchir Balaji Shantanu Jain Andrew Carr Jan Leike Joshua Achiam Vedant Misra Evan Morikawa Alec Radford Matthew\u00a0M. Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. ArXiv abs\/2107.03374 (2021). https:\/\/api.semanticscholar.org\/CorpusID:235755472"},{"key":"e_1_3_3_2_14_2","unstructured":"Codee. 2024. Codee: Automated Code Performance Tuning for C\/C++. https:\/\/www.codee.com\/ Accessed: 2024-08-30."},{"key":"e_1_3_3_2_15_2","unstructured":"Codee. 2024. Open Catalog. https:\/\/github.com\/codee-com\/open-catalog Accessed: 2024-08-27."},{"key":"e_1_3_3_2_16_2","unstructured":"Codee. 2024. Performance Demos Repository. https:\/\/github.com\/codee-com\/performance-demos\/tree\/main Accessed: 2024-08-23."},{"key":"e_1_3_3_2_17_2","unstructured":"Codee Community. 2025. Open Catalog. https:\/\/github.com\/codee-com\/open-catalog\/ Accessed: 2025-01-08."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624172"},{"key":"e_1_3_3_2_19_2","unstructured":"Lamia Djoudi Denis Barthou Patrick Carribault Christophe Lemuet Jean-Thomas Acquaviva and William Jalby. 2005. MAQAO : Modular Assembler Quality Analyzer and Optimizer for Itanium 2. https:\/\/api.semanticscholar.org\/CorpusID:5408423"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC.2009.98"},{"key":"e_1_3_3_2_21_2","unstructured":"Denis Kocetkov Raymond Li Loubna\u00a0Ben Allal Jia Li Chenghao Mou Carlos\u00a0Mu\u00f1oz Ferrandis Yacine Jernite Margaret Mitchell Sean Hughes Thomas Wolf Dzmitry Bahdanau Leandro von Werra and Harm de Vries. 2022. The Stack: 3 TB of permissively licensed source code. arxiv:https:\/\/arXiv.org\/abs\/2211.15533\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2211.15533"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"key":"e_1_3_3_2_23_2","unstructured":"NERSC. 2025. Codee Documentation. https:\/\/docs.nersc.gov\/tools\/performance\/codee\/. Accessed: 2025-01-04."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3625549.3658689"},{"key":"e_1_3_3_2_25_2","unstructured":"Daniel Nichols Joshua\u00a0Hoke Davis Zhaojun Xie Arjun Rajaram and Abhinav Bhatele. 2024. Can Large Language Models Write Parallel Code? ArXiv abs\/2401.12554 (2024). https:\/\/api.semanticscholar.org\/CorpusID:267094846"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.23919\/ISC.2024.10528929"},{"key":"e_1_3_3_2_27_2","unstructured":"Daniel Nichols Pranav Polasam Harshitha Menon Aniruddha Marathe Todd Gamblin and Abhinav Bhatele. 2024. Performance-Aligned LLMs for Generating Fast Code. ArXiv abs\/2404.18864 (2024). https:\/\/api.semanticscholar.org\/CorpusID:269449297"},{"key":"e_1_3_3_2_28_2","unstructured":"Changan Niu Ting Zhang Chuanyi Li Bin Luo and Vincent Ng. 2024. On Evaluating the Efficiency of Source Code Generated by LLMs. arxiv:https:\/\/arXiv.org\/abs\/2404.06041\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2404.06041"},{"key":"e_1_3_3_2_29_2","unstructured":"Catherine Olsson Nelson Elhage Neel Nanda Nicholas Joseph Nova Dassarma Tom Henighan Benjamin Mann Amanda Askell Yuntao Bai Anna Chen Tom Conerly Dawn Drain Deep Ganguli Zac Hatfield-Dodds Danny Hernandez Scott Johnston Andy Jones John Kernion Liane Lovitt Kamal Ndousse Dario Amodei Tom\u00a0B. Brown Jack Clark Jared Kaplan Sam McCandlish and Christopher Olah. 2022. In-context Learning and Induction Heads. ArXiv abs\/2209.11895 (2022). https:\/\/api.semanticscholar.org\/CorpusID:252532078"},{"key":"e_1_3_3_2_30_2","unstructured":"Louis-Noel Pouchet and Tomofumi Yuki. 2024. PolyBenchC-4.2.1. https:\/\/github.com\/MatthiasJReisinger\/PolyBenchC-4.2.1\/tree\/master Accessed: 2024-9-23."},{"key":"e_1_3_3_2_31_2","unstructured":"Baptiste Rozi\u00e8re Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing\u00a0Ellen Tan Yossi Adi Jingyu Liu Romain Sauvestre Tal Remez J\u00e9r\u00e9my Rapin Artyom Kozhevnikov Ivan Evtimov Joanna Bitton Manish Bhatt Cristian\u00a0Canton Ferrer Aaron Grattafiori Wenhan Xiong Alexandre D\u00e9fossez Jade Copet Faisal Azhar Hugo Touvron Louis Martin Nicolas Usunier Thomas Scialom and Gabriel Synnaeve. 2024. Code Llama: Open Foundation Models for Code. arxiv:https:\/\/arXiv.org\/abs\/2308.12950\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2308.12950"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Sameer\u00a0S. Shende and Allen\u00a0D. Malony. 2006. The Tau Parallel Performance System. The International Journal of High Performance Computing Applications 20 (2006) 287 \u2013 311. https:\/\/api.semanticscholar.org\/CorpusID:16032955","DOI":"10.1177\/1094342006064482"},{"key":"e_1_3_3_2_33_2","unstructured":"Sofia\u00a0Eleni Spatharioti David\u00a0M. Rothschild Daniel\u00a0G. Goldstein and Jake\u00a0M. Hofman. 2023. Comparing Traditional and LLM-based Search for Consumer Choice: A Randomized Experiment. ArXiv abs\/2307.03744 (2023). https:\/\/api.semanticscholar.org\/CorpusID:259375527"},{"key":"e_1_3_3_2_34_2","unstructured":"Together.ai. 2025. Together.ai - Advancing Collaborative AI Development. https:\/\/together.ai Accessed: 2025-01-08."},{"key":"e_1_3_3_2_35_2","unstructured":"Pedro Valero-Lara Alexis Huante Mustafa\u00a0Al Lail William\u00a0F. Godoy Keita Teranishi Prasanna Balaprakash and Jeffrey\u00a0S. Vetter. 2023. Comparing Llama-2 and GPT-3 LLMs for HPC kernels generation. ArXiv abs\/2309.07103 (2023). https:\/\/api.semanticscholar.org\/CorpusID:261705926"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Balaji Venkatachalam Dan Gusfield and Yelena Frid. 2014. Faster algorithms for RNA-folding using the Four-Russians method. Algorithms for Molecular Biology 9 (2014) 1\u201312.","DOI":"10.1186\/1748-7188-9-5"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"crossref","unstructured":"Lei Wang Chen Ma Xueyang Feng Zeyu Zhang Hao Yang Jingsen Zhang Zhiyuan Chen Jiakai Tang Xu Chen Yankai Lin et\u00a0al. 2024. A survey on large language model based autonomous agents. Frontiers of Computer Science 18 6 (2024) 186345.","DOI":"10.1007\/s11704-024-40231-1"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Chun Xia and Lingming Zhang. 2022. Less training more repairing please: revisiting automated program repair via zero-shot learning. Proceedings of the 30th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering (2022). https:\/\/api.semanticscholar.org\/CorpusID:250627519","DOI":"10.1145\/3540250.3549101"},{"key":"e_1_3_3_2_39_2","unstructured":"Ziwei Xu Sanjay Jain and Mohan Kankanhalli. 2024. Hallucination is Inevitable: An Innate Limitation of Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2401.11817\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2401.11817"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","unstructured":"Yongjian Yu and S.T. Acton. 2002. Speckle reducing anisotropic diffusion. IEEE Transactions on Image Processing 11 11 (2002) 1260\u20131270. 10.1109\/TIP.2002.804276","DOI":"10.1109\/TIP.2002.804276"},{"key":"e_1_3_3_2_41_2","unstructured":"Wenhao Zhu Hongyi Liu Qingxiu Dong Jingjing Xu Lingpeng Kong Jiajun Chen Lei Li and Shujian Huang. 2023. Multilingual Machine Translation with Large Language Models: Empirical Results and Analysis. ArXiv abs\/2304.04675 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258048937"}],"event":{"name":"ICPP Workshops '25: The 54th International Conference on Parallel Processing Workshops","location":"San Diego CA USA","acronym":"ICPP Workshops '25"},"container-title":["Workshop Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3750720.3757280","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T11:42:48Z","timestamp":1766230968000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3750720.3757280"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":40,"alternative-id":["10.1145\/3750720.3757280","10.1145\/3750720"],"URL":"https:\/\/doi.org\/10.1145\/3750720.3757280","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}