{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,21]],"date-time":"2025-12-21T08:33:43Z","timestamp":1766306023289,"version":"3.48.0"},"reference-count":26,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T00:00:00Z","timestamp":1758844800000},"content-version":"vor","delay-in-days":268,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Procedia Computer Science"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1016\/j.procs.2025.09.460","type":"journal-article","created":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T22:13:24Z","timestamp":1762467204000},"page":"3352-3361","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Automated Transformation of OpenMP to CUDA Kernels Using AI Models"],"prefix":"10.1016","volume":"270","author":[{"given":"Mateusz","family":"Gruzewski","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.procs.2025.09.460_bib1","unstructured":"Sgrauerg (no date) polybenchgpu, PolyBench\/GPU 1.0: PolyBench Benchmarks on the GPU using CUDA, OpenCL, HMPP, and OpenACC. Available at: https:\/\/github.com\/sgrauerg\/polybenchGpu (Accessed: 03 March 2025)."},{"key":"10.1016\/j.procs.2025.09.460_bib2","doi-asserted-by":"crossref","unstructured":"Nichols, D. et al. (2024) \u2018HPC-Coder: Modeling Parallel Programs using large language models\u2019, ISC High Performance 2024 Research Paper Proceedings (39th International Conference), pp. 1\u201312. doi: 10.23919\/isc.2024.10528929.","DOI":"10.23919\/ISC.2024.10528929"},{"key":"10.1016\/j.procs.2025.09.460_bib3","doi-asserted-by":"crossref","unstructured":"Chen, L. et al. (2023) \u2018LM4HPC: Towards effective language model application in high-performance computing\u2019, Lecture Notes in Computer Science, pp. 18\u201333. doi: 10.1007\/978-3-031-40744-42.","DOI":"10.1007\/978-3-031-40744-4_2"},{"key":"10.1016\/j.procs.2025.09.460_bib4","doi-asserted-by":"crossref","unstructured":"Godoy, W. et al. (2023) \u2018Evaluation of openai codex for HPC parallel programming models kernel generation\u2019, Proceedings of the 52nd International Conference on Parallel Processing Workshops, pp. 136\u2013144. doi: 10.1145\/3605731.3605886.","DOI":"10.1145\/3605731.3605886"},{"key":"10.1016\/j.procs.2025.09.460_bib5","doi-asserted-by":"crossref","unstructured":"Palkowski, M. and Gruzewski, M. (2024) \u2018GPT-driven source-to-source transformation for generating compilable parallel CUDA code for Nussinov\u2019s algorithm\u2019, Electronics, 13(3), p. 488. doi: 10.3390\/electronics13030488.","DOI":"10.3390\/electronics13030488"},{"key":"10.1016\/j.procs.2025.09.460_bib6","unstructured":"(2025) CUDA C++ Programming Guide. Available at: https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html (Accessed: 03 March 2025)."},{"key":"10.1016\/j.procs.2025.09.460_bib7","doi-asserted-by":"crossref","unstructured":"Verdoolaege, S. (2010) \u2018ISL: An integer set library for the polyhedral model\u2019, Lecture Notes in Computer Science, pp. 299\u2013302. doi: 10.1007\/978-3-642-15582-6_49.","DOI":"10.1007\/978-3-642-15582-6_49"},{"key":"10.1016\/j.procs.2025.09.460_bib8","unstructured":"Kernel profiling guide (2024) 2. Kernel Profiling Guide - NsightCompute 12.8 documentation. Available at: https:\/\/docs.nvidia.com\/nsight-compute\/ProflingGuide\/index.html (Accessed: 04 March 2025)."},{"key":"10.1016\/j.procs.2025.09.460_bib9","unstructured":"Fried, D. et al. (2023) \u2018InCoder: A Generative Model for Code Infilling and Synthesis\u2019, arXiv preprint. arXiv:2204.05999."},{"key":"10.1016\/j.procs.2025.09.460_bib10","unstructured":"Nijkamp, E. et al. (2023) \u2018CodeGen: An Open Large Language Model for Code with Multi-Turn Program Synthesis\u2019, arXiv preprint.arXiv:2203.13474."},{"key":"10.1016\/j.procs.2025.09.460_bib11","unstructured":"Li, Y. et al. (2023) \u2018StarCoder: May the Source Be with You!\u2019, arXiv preprint. arXiv:2305.06161."},{"key":"10.1016\/j.procs.2025.09.460_bib12","unstructured":"Meta AI (2023) Code Llama: Open Foundation Models for Code. Available at: https:\/\/codellama.dev\/about (Accessed: 04 March 2025)."},{"key":"10.1016\/j.procs.2025.09.460_bib13","doi-asserted-by":"crossref","unstructured":"Che, S. et al. (2009) \u2018Rodinia: A benchmark suite for heterogeneous computing\u2019, Proceedings of the 2009 IEEE International Symposium on Workload Characterization (IISWC), pp. 44\u201354. doi: 10.1109\/IISWC.2009.5306797.","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"10.1016\/j.procs.2025.09.460_bib14","doi-asserted-by":"crossref","unstructured":"Huber, M. et al. (2022) \u2018Efficient Execution of OpenMP on GPUs\u2019, 2022 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), doi: 10.1109\/CGO53902.2022.9741290.","DOI":"10.1109\/CGO53902.2022.9741290"},{"key":"10.1016\/j.procs.2025.09.460_bib15","unstructured":"Vaswani, A. et al. (2017) \u2018Attention is All You Need\u2019, *Advances in Neural Information Processing Systems*, 30. doi: 10.48550\/arXiv.1706.03762."},{"key":"10.1016\/j.procs.2025.09.460_bib16","unstructured":"Chen, M. et al. (2021) \u2018Evaluating Large Language Models Trained on Code\u2019, *arXiv preprint arXiv:2107.03374*. doi: 10.48550\/arXiv.2107.03374."},{"key":"10.1016\/j.procs.2025.09.460_bib17","doi-asserted-by":"crossref","unstructured":"Ahmad, W.U. et al. (2021) \u2018Unified Pre-training for Program Understanding and Generation\u2019, *Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies*, pp. 2655\u20132668. doi: 10.18653\/v1\/2021.naacl-main.210.","DOI":"10.18653\/v1\/2021.naacl-main.211"},{"key":"10.1016\/j.procs.2025.09.460_bib18","unstructured":"NVIDIA Corporation (2023) \u2018CUDA C Programming Guide\u2019, Version 12.3. Available at: https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html."},{"key":"10.1016\/j.procs.2025.09.460_bib19","doi-asserted-by":"crossref","unstructured":"Palkowski, M. and Gruzewski, M. (2024) \u2018Automatic Generation of OpenCL Code through Polyhedral Compilation with LLM\u2019, *Proceedings of the 19th Conference on Computer Science and Intelligence Systems (FedCSIS)*, pp. 671\u2013676. doi: 10.15439\/2024F6469.","DOI":"10.15439\/2024F6469"},{"key":"10.1016\/j.procs.2025.09.460_bib20","doi-asserted-by":"crossref","unstructured":"Palkowski, M. and Gruzewski, M. (2023) \u2018Time and energy benefits of using automatic optimization compilers for NPDP tasks\u2019, *Electronics*, 12(17), p. 3579. doi: 10.3390\/electronics12173579.","DOI":"10.3390\/electronics12173579"},{"key":"10.1016\/j.procs.2025.09.460_bib21","doi-asserted-by":"crossref","unstructured":"Lee, S., Min, S.-J. and Eigenmann, R. (2009) \u2018OpenMP to GPGPU: A Compiler Framework for Automatic Translation and Optimization\u2019, *Proceedings of the 14th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP)*, pp. 101\u2013110. doi: 10.1145\/1504176.1504194.","DOI":"10.1145\/1504176.1504194"},{"key":"10.1016\/j.procs.2025.09.460_bib22","doi-asserted-by":"crossref","unstructured":"Dolz, M. F. et al. (2012) \u2018Automatic translation of OpenMP code to HMPP for GPU computing\u2019, *Procedia Computer Science*, 9, pp. 1667\u20131676. doi: 10.1016\/j.procs.2012.04.184.","DOI":"10.1016\/j.procs.2012.04.184"},{"key":"10.1016\/j.procs.2025.09.460_bib23","doi-asserted-by":"crossref","unstructured":"Dave, C. et al. (2009) \u2018Cetus: A Source-to-Source Compiler Infrastructure for Multicores\u2019, *Computer*, 42(12), pp. 36\u201342. doi: 10.1109\/MC.2009.408.","DOI":"10.1109\/MC.2009.385"},{"key":"10.1016\/j.procs.2025.09.460_bib24","doi-asserted-by":"crossref","unstructured":"Alias, C. et al. (2011) \u2018Par4All: From Convex Array Regions to Heterogeneous Computing\u2019, *Proceedings of LCPC 2011*. doi: 10.1007\/978-3-642-33158-9_9.","DOI":"10.1007\/978-3-642-33158-9_9"},{"key":"10.1016\/j.procs.2025.09.460_bib25","doi-asserted-by":"crossref","unstructured":"Yu, C., Royuela, S. and Qui\u00f1ones, E. (2020) \u2018OpenMP to Cuda graphs\u2019, Proceedings of the 23th International Workshop on Software and Compilers for Embedded Systems, pp. 42\u201347. doi: 10.1145\/3378678.3391881.","DOI":"10.1145\/3378678.3391881"},{"key":"10.1016\/j.procs.2025.09.460_bib26","unstructured":"Jiang, Z., Liu, X., Tang, D., Zhou, Y., Li, S., Zhou, H. and Jiang, D. (2024) \u2018A Survey on Large Language Models for Code Generation\u2019, arXiv preprint arXiv:2406.00515. Available at: https:\/\/arxiv.org\/abs\/2406.00515."}],"container-title":["Procedia Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050925031333?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050925031333?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,12,21]],"date-time":"2025-12-21T08:30:49Z","timestamp":1766305849000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050925031333"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":26,"alternative-id":["S1877050925031333"],"URL":"https:\/\/doi.org\/10.1016\/j.procs.2025.09.460","relation":{},"ISSN":["1877-0509"],"issn-type":[{"type":"print","value":"1877-0509"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Automated Transformation of OpenMP to CUDA Kernels Using AI Models","name":"articletitle","label":"Article Title"},{"value":"Procedia Computer Science","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.procs.2025.09.460","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}