{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:06:38Z","timestamp":1750309598050,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1145\/3725798.3725799","type":"proceedings-article","created":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T10:40:50Z","timestamp":1747132850000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Auto-tuning of OpenMP Offload kernels for performance and power"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5547-814X","authenticated-orcid":false,"given":"Nafis","family":"Mustakin","sequence":"first","affiliation":[{"name":"University of California Riverside, Riverside, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5376-7868","authenticated-orcid":false,"given":"Daniel","family":"Wong","sequence":"additional","affiliation":[{"name":"University of California, Riverside, Riverside, California, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,5,13]]},"reference":[{"unstructured":"[n. d.]. AMD Radeon Instinct MI300X Specs. https:\/\/www.techpowerup.com\/gpu-specs\/radeon-instinct-mi300x.c4179. Accessed: 2025-3-25.","key":"e_1_3_3_1_2_2"},{"unstructured":"[n. d.]. CUDA C++ Best Practices Guide 12.8 documentation. https:\/\/docs.nvidia.com\/cuda\/cuda-c-best-practices-guide\/#device-memory-spaces. Accessed: 2025-3-25.","key":"e_1_3_3_1_3_2"},{"unstructured":"[n. d.]. NVIDIA H800 SXM5 Specs. https:\/\/www.techpowerup.com\/gpu-specs\/h800-sxm5.c3975. Accessed: 2025-3-25.","key":"e_1_3_3_1_4_2"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_5_2","DOI":"10.1145\/2540708.2540719"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_6_2","DOI":"10.1145\/2925426.2926281"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_7_2","DOI":"10.1109\/LLVM-HPC.2016.006"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_8_2","DOI":"10.1145\/3624062.3624064"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_9_2","DOI":"10.1109\/MICRO.2014.11"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_10_2","DOI":"10.1109\/HPCA56546.2023.10071121"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_11_2","DOI":"10.1145\/3634769.3634808"},{"doi-asserted-by":"crossref","unstructured":"Leonardo Dagum and Ramesh Menon. 1998. OpenMP: an industry standard API for shared-memory programming. IEEE computational science and engineering 5 1 (1998) 46\u201355.","key":"e_1_3_3_1_12_2","DOI":"10.1109\/99.660313"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_13_2","DOI":"10.1007\/978-3-642-28145-7_11"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_14_2","DOI":"10.1145\/3634769.3634807"},{"doi-asserted-by":"crossref","unstructured":"Ali Jahanshahi Hadi\u00a0Zamani Sabzi Chester Lau and Daniel Wong. 2020. Gpu-nest: Characterizing energy efficiency of multi-gpu inference servers. IEEE Computer Architecture Letters 19 2 (2020) 139\u2013142.","key":"e_1_3_3_1_15_2","DOI":"10.1109\/LCA.2020.3023723"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_16_2","DOI":"10.1109\/ICCD63220.2024.00023"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_17_2","DOI":"10.2172\/1059462"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_18_2","DOI":"10.1109\/IPDPS.2013.115"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_19_2","DOI":"10.2172\/1090032"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_20_2","DOI":"10.1145\/3650200.3656626"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_21_2","DOI":"10.1109\/ICPP.2017.61"},{"doi-asserted-by":"crossref","unstructured":"Chih-Sheng Lin Shih-Meng Teng and Pao-Ann Hsiung. 2016. Auto-tuning for GPGPU applications using performance and energy model. Int. J. High Perform. Syst. Archit. 62 (Jan. 2016) 40\u201353.","key":"e_1_3_3_1_22_2","DOI":"10.1016\/j.sysarc.2015.11.012"},{"doi-asserted-by":"crossref","unstructured":"Paul\u00a0T Lin Michael\u00a0A Heroux Richard\u00a0F Barrett and Alan\u00a0B Williams. 2015. Assessing a mini-application as a performance proxy for a finite element method engineering application. Concurrency and Computation: Practice and Experience 27 17 (2015) 5374\u20135389.","key":"e_1_3_3_1_23_2","DOI":"10.1002\/cpe.3587"},{"doi-asserted-by":"crossref","unstructured":"Sparsh Mittal and Jeffrey\u00a0S Vetter. 2014. A survey of methods for analyzing and improving GPU energy efficiency. ACM Computing Surveys (CSUR) 47 2 (2014) 1\u201323.","key":"e_1_3_3_1_24_2","DOI":"10.1145\/2636342"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_25_2","DOI":"10.1145\/3581784.3607098"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_26_2","DOI":"10.1145\/3620666.3651329"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_27_2","DOI":"10.1145\/3458817.3480853"},{"doi-asserted-by":"crossref","unstructured":"R Schoonhoven Ben van Werkhoven and K Batenburg. 2022. Benchmarking optimization algorithms for auto-tuning GPU kernels. IEEE Trans. Evol. Comput. 27 (Oct. 2022) 550\u2013564.","key":"e_1_3_3_1_28_2","DOI":"10.1109\/TEVC.2022.3210654"},{"doi-asserted-by":"crossref","unstructured":"R Schoonhoven B Veenboer B\u00a0V Werkhoven and K Batenburg. 2022. Going green: optimizing GPUs for energy efficiency through model-steered auto-tuning. PMBS (Nov. 2022) 48\u201359.","key":"e_1_3_3_1_29_2","DOI":"10.1109\/PMBS56514.2022.00010"},{"unstructured":"Yifan Sun Nicolas\u00a0Bohm Agostini Shi Dong and David Kaeli. 2024. CHIP Dataset. https:\/\/chip-dataset.vercel.app\/.","key":"e_1_3_3_1_30_2"},{"doi-asserted-by":"crossref","unstructured":"Ben van Werkhoven. 2019. Kernel Tuner: A search-optimizing GPU code auto-tuner. Future Gener. Comput. Syst. 90 (Jan. 2019) 347\u2013358.","key":"e_1_3_3_1_31_2","DOI":"10.1016\/j.future.2018.08.004"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_32_2","DOI":"10.1109\/RTSS52674.2021.00021"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_33_2","DOI":"10.1109\/PMBS54543.2021.00017"},{"doi-asserted-by":"publisher","key":"e_1_3_3_1_34_2","DOI":"10.1109\/HPCA.2016.7446063"}],"event":{"acronym":"GPGPU 2025","name":"GPGPU 2025: 17th Workshop on General Purpose Processing Using GPU","location":"Las Vegas NV USA"},"container-title":["Proceedings of the 17th Workshop on General Purpose Processing Using GPU"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3725798.3725799","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3725798.3725799","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:04Z","timestamp":1750298224000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3725798.3725799"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":33,"alternative-id":["10.1145\/3725798.3725799","10.1145\/3725798"],"URL":"https:\/\/doi.org\/10.1145\/3725798.3725799","relation":{},"subject":[],"published":{"date-parts":[[2025,3]]},"assertion":[{"value":"2025-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}