{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T19:22:53Z","timestamp":1774120973372,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T00:00:00Z","timestamp":1699747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624605","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1918-1928","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Specialized Kernels for Optimizing GPU Offload in OpenMP"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4660-7584","authenticated-orcid":false,"given":"Dhruva","family":"Chakrabarti","sequence":"first","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3549-8135","authenticated-orcid":false,"given":"Gregory","family":"Rodgers","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6852-1445","authenticated-orcid":false,"given":"Carlo","family":"Bertolli","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4331-4360","authenticated-orcid":false,"given":"Gheorghe-Teodor","family":"Bercea","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6330-4816","authenticated-orcid":false,"given":"Jan-Patrick","family":"Lehr","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5938-067X","authenticated-orcid":false,"given":"Lynd","family":"Stringer","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6657-9719","authenticated-orcid":false,"given":"Jan","family":"Leyonberg","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7205-2250","authenticated-orcid":false,"given":"Dan","family":"Palermo","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7111-0549","authenticated-orcid":false,"given":"Ron","family":"Lieberman","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices (AMD) Inc, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2833157.2833161"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","unstructured":"Carlo Bertolli Samuel\u00a0F. Antao Alexandre\u00a0E. Eichenberger Kevin O\u2019Brien Zehra Sura Arpith\u00a0C. Jacob Tong Chen and Olivier Sallenave. 2014. Coordinating GPU Threads for OpenMP 4.0 in LLVM. In 2014 LLVM Compiler Infrastructure in HPC. 12\u201321. https:\/\/doi.org\/10.1109\/LLVM-HPC.2014.10","DOI":"10.1109\/LLVM-HPC.2014.10"},{"key":"e_1_3_2_2_3_1","unstructured":"OpenMP Architecture\u00a0Review Board. 2023. The OpenMP API specification for parallel programming.https:\/\/www.openmp.org\/specifications\/"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid54584.2022.00077"},{"key":"e_1_3_2_2_5_1","volume-title":"Performance Assessment of OpenMP Compilers Targeting NVIDIA V100 GPUs","author":"Davis Joshua\u00a0Hoke","unstructured":"Joshua\u00a0Hoke Davis, Christopher Daley, Swaroop Pophale, Thomas Huber, Sunita Chandrasekaran, and Nicholas\u00a0J. Wright. 2021. Performance Assessment of OpenMP Compilers Targeting NVIDIA V100 GPUs. In Accelerator Programming Using Directives, Sridutt Bhalachandra, Sandra Wienke, Sunita Chandrasekaran, and Guido Juckeland (Eds.). Springer International Publishing, 25\u201344."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1504\/IJCSE.2018.095847"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-28596-8_11"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00055"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","unstructured":"Simon Garcia\u00a0De Gonzalo Sitao Huang Juan G\u00f3mez-Luna Simon Hammond Onur Mutlu and Wen-mei Hwu. 2019. Automatic Generation of Warp-Level Primitives and Atomic Instructions for Fast and Portable Parallel Reduction on GPUs. In 2019 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO). 73\u201384. https:\/\/doi.org\/10.1109\/CGO.2019.8661187","DOI":"10.1109\/CGO.2019.8661187"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO53902.2022.9741290"},{"key":"e_1_3_2_2_11_1","unstructured":"LLVM 2023. The LLVM Compiler Infrastructure.https:\/\/llvm.org"},{"key":"e_1_3_2_2_12_1","unstructured":"Justin Luitjens. 2014. Faster Parallel Reductions on Kepler. https:\/\/developer.nvidia.com\/blog\/faster-parallel-reductions-kepler"},{"key":"e_1_3_2_2_13_1","unstructured":"ROCm 2023. AMD ROCmTM Platform.https:\/\/rocm.docs.amd.com\/en\/latest\/"},{"key":"e_1_3_2_2_14_1","unstructured":"Ben Sander. 2016. AMD GCN Assembly: Cross-Lane Operations. https:\/\/gpuopen.com\/learn\/amd-gcn-assembly-cross-lane-operations\/"},{"key":"e_1_3_2_2_15_1","unstructured":"SPEC 2023. Standard Performance Evaluation Corporation. https:\/\/www.spec.org\/"},{"key":"e_1_3_2_2_16_1","volume-title":"ACCEL","author":"SPEC","year":"2019","unstructured":"SPEC ACCEL 2019. Standard Performance Evaluation Corporation ACCEL benchmark suite. https:\/\/www.spec.org\/accel\/"},{"key":"e_1_3_2_2_17_1","unstructured":"SPEChpc 2021. Standard Performance Evaluation Corporation SPEChpc 2021 benchmark suite. https:\/\/www.spec.org\/hpc2021\/"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-85262-7_11"}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","location":"Denver CO USA","acronym":"SC-W 2023"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624605","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624605","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:01:30Z","timestamp":1755745290000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624605"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":18,"alternative-id":["10.1145\/3624062.3624605","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624605","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}