{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T13:12:01Z","timestamp":1769346721105,"version":"3.49.0"},"publisher-location":"Cham","reference-count":58,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031408427","type":"print"},{"value":"9783031408434","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-40843-4_28","type":"book-chapter","created":{"date-parts":[[2023,8,24]],"date-time":"2023-08-24T12:02:32Z","timestamp":1692878552000},"page":"378-390","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Portability and\u00a0Scalability of\u00a0OpenMP Offloading on\u00a0State-of-the-Art Accelerators"],"prefix":"10.1007","author":[{"given":"Yehonatan","family":"Fridman","sequence":"first","affiliation":[]},{"given":"Guy","family":"Tamir","sequence":"additional","affiliation":[]},{"given":"Gal","family":"Oren","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,25]]},"reference":[{"issue":"2\u20133","key":"28_CR1","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1016\/0167-739X(89)90038-1","volume":"5","author":"KG Wilson","year":"1989","unstructured":"Wilson, K.G.: Grand challenges to computational science. Future Gener. Comput. Syst. 5(2\u20133), 171\u2013189 (1989)","journal-title":"Future Gener. Comput. Syst."},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Liu, B., Zydek, D., Selvaraj, H., Gewali, L.: Accelerating high performance computing applications: using CPUs, GPUs, hybrid CPU\/GPU, and FPGAs. In: 2012 13th International Conference on Parallel and Distributed Computing, Applications and Technologies, pp. 337\u2013342. IEEE (2012)","DOI":"10.1109\/PDCAT.2012.34"},{"issue":"2","key":"28_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-031-01759-9","volume":"13","author":"TM Aamodt","year":"2018","unstructured":"Aamodt, T.M., et al.: General-purpose graphics processor architectures. Synthesis Lect. Comput. Archit. 13(2), 1\u2013140 (2018)","journal-title":"Synthesis Lect. Comput. Archit."},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Y., Cui, X., Mei, H.: Large-scale fast Fourier transform. In: GPU Computing Gems Emerald Edition, pp. 629\u2013642. Elsevier (2011)","DOI":"10.1016\/B978-0-12-384988-5.00039-5"},{"issue":"3","key":"28_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2962131","volume":"49","author":"RA Bridges","year":"2016","unstructured":"Bridges, R.A., Imam, N., Mintz, T.M.: Understanding GPU power: a survey of profiling, modeling, and simulation methods. ACM Comput. Surv. (CSUR) 49(3), 1\u201327 (2016)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"28_CR6","doi-asserted-by":"publisher","first-page":"528","DOI":"10.1007\/s11227-013-1015-7","volume":"67","author":"KE Niemeyer","year":"2014","unstructured":"Niemeyer, K.E., Sung, C.-J.: Recent progress and challenges in exploiting graphics processors in computational fluid dynamics. J. Supercomput. 67, 528\u2013564 (2014)","journal-title":"J. Supercomput."},{"key":"28_CR7","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1007\/s11227-011-0631-3","volume":"61","author":"VK Pallipuram","year":"2012","unstructured":"Pallipuram, V.K., Bhuiyan, M., Smith, M.C.: A comparative study of GPU programming models and architectures using neural networks. J. Supercomput. 61, 673\u2013718 (2012)","journal-title":"J. Supercomput."},{"key":"28_CR8","doi-asserted-by":"crossref","unstructured":"Lee, S., Vetter, J.S.: Early evaluation of directive-based GPU programming models for productive exascale computing. In: SC 2012: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, pp. 1\u201311. IEEE (2012)","DOI":"10.1109\/SC.2012.51"},{"key":"28_CR9","unstructured":"Qureshi, Z., et al.: Tearing down the memory wall. arXiv preprint arXiv:2008.10169 (2020)"},{"issue":"1","key":"28_CR10","first-page":"91","volume":"7","author":"M Knobloch","year":"2020","unstructured":"Knobloch, M., Mohr, B.: Tools for GPU computing-debugging and performance analysis of heterogenous HPC applications. Supercomput. Front. Innov. 7(1), 91\u2013111 (2020)","journal-title":"Supercomput. Front. Innov."},{"issue":"1","key":"28_CR11","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1109\/99.660313","volume":"5","author":"L Dagum","year":"1998","unstructured":"Dagum, L., Menon, R.: OpenMP: an industry standard API for shared-memory programming. IEEE Comput. Sci. Eng. 5(1), 46\u201355 (1998)","journal-title":"IEEE Comput. Sci. Eng."},{"key":"28_CR12","volume-title":"The OpenMP Common Core: Making OpenMP Simple Again","author":"TG Mattson","year":"2019","unstructured":"Mattson, T.G., He, Y.H., Koniges, A.E.: The OpenMP Common Core: Making OpenMP Simple Again. MIT Press, Cambridge (2019)"},{"key":"28_CR13","volume-title":"Using OpenMP-the Next Step: Affinity, Accelerators, Tasking, and SIMD","author":"R Van der Pas","year":"2017","unstructured":"Van der Pas, R., et al.: Using OpenMP-the Next Step: Affinity, Accelerators, Tasking, and SIMD. MIT Press, Cambridge (2017)"},{"key":"28_CR14","unstructured":"OpenMP Architecture Review Board. OpenMP application program interface, version 4.0 (2013)"},{"key":"28_CR15","unstructured":"OpenMP Architecture Review Board. OpenMP offload in applications of the exascale computing project (2022)"},{"key":"28_CR16","unstructured":"OpenMP Architecture Review Board. OpenMP application program interface version 5.0 (2018)"},{"key":"28_CR17","unstructured":"OpenMP Architecture Review Board. OpenMP application program interface version 5.2 (2021)"},{"key":"28_CR18","unstructured":"GNU Project. GNU offloading and multi-processing project (GOMP) (2023)"},{"key":"28_CR19","unstructured":"Intel. oneAPI GPU optimization guide (2023)"},{"key":"28_CR20","unstructured":"Hammond, J.: OpenMP in NVIDIA\u2019s HPC compilers (2021)"},{"key":"28_CR21","unstructured":"The Clang Team. Clang 17.0.0git documentation (2023)"},{"key":"28_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-74224-9_1","volume-title":"Accelerator Programming Using Directives","author":"NA Mehta","year":"2021","unstructured":"Mehta, N.A., Gayatri, R., Ghadar, Y., Knight, C., Deslippe, J.: Evaluating performance portability of\u00a0OpenMP for SNAP on NVIDIA, Intel, and AMD GPUs using the roofline methodology. In: Bhalachandra, S., Wienke, S., Chandrasekaran, S., Juckeland, G. (eds.) WACCPD 2020. LNCS, vol. 12655, pp. 3\u201324. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-74224-9_1"},{"key":"28_CR23","doi-asserted-by":"crossref","unstructured":"Jun, H., et al.: HBM (high bandwidth memory) dram technology and architecture. In: 2017 IEEE International Memory Workshop (IMW), pp. 1\u20134. IEEE (2017)","DOI":"10.1109\/IMW.2017.7939084"},{"key":"28_CR24","unstructured":"Intel. oneAPI DPC++ compiler (2023)"},{"key":"28_CR25","unstructured":"NVIDIA. Nvidia HPC SDK (2023)"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Bercea, G.T., et al.: Performance analysis of OpenMP on a GPU using a coral proxy application. In: Proceedings of the 6th International Workshop on Performance Modeling, Benchmarking, and Simulation of High Performance Computing Systems, pp. 1\u201311 (2015)","DOI":"10.1145\/2832087.2832089"},{"key":"28_CR27","unstructured":"AMD. Lulesh 2.0 using OpenMP 4.0 (2015)"},{"key":"28_CR28","unstructured":"Intel. Intel\u00aeData Center GPU Max Series (2023). https:\/\/www.intel.com\/content\/www\/us\/en\/products\/details\/discrete-gpus\/data-center-gpu\/max-series.html"},{"key":"28_CR29","unstructured":"Intel. A new era of chipmaking to meet the world\u2019s demand for compute (2023)"},{"key":"28_CR30","unstructured":"Intel. Intel iris Xe GPU architecture (2023)"},{"key":"28_CR31","unstructured":"Intel oneAPI. A new era of accelerated computing (2023)"},{"key":"28_CR32","unstructured":"Intel. Intel data center GPU max series (2023)"},{"key":"28_CR33","unstructured":"NVIDIA. NVIDIA A100 GPU (2020). https:\/\/www.nvidia.com\/en-us\/data-center\/a100\/"},{"issue":"2","key":"28_CR34","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/MM.2021.3061394","volume":"41","author":"J Choquette","year":"2021","unstructured":"Choquette, J., Gandhi, W., Giroux, O., Stam, N., Krashinsky, R.: NVIDIA A100 tensor core GPU: performance and innovation. IEEE Micro 41(2), 29\u201335 (2021)","journal-title":"IEEE Micro"},{"key":"28_CR35","unstructured":"ECP (Exascale Computing Project. OPENMP VALIDATION AND VERIFICATION TESTSUITE (2023). https:\/\/crpl.cis.udel.edu\/ompvvsollve\/"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Huber, T., et al.: ECP SOLLVE: validation and verification testsuite status update and compiler insight for openMP. arXiv preprint arXiv:2208.13301, 2022","DOI":"10.1109\/P3HPC56579.2022.00017"},{"key":"28_CR37","unstructured":"Kamm, J.R.: Evaluation of the Sedov-von Neumann-Taylor blast wave solution. Astrophys. J. Suppl. 46 (2000, submitted)"},{"key":"28_CR38","doi-asserted-by":"crossref","unstructured":"Wen, S., et al.: ProfDP: a lightweight profiler to guide data placement in heterogeneous memory systems. In: Proceedings of the 2018 International Conference on Supercomputing, pp. 263\u2013273 (2018)","DOI":"10.1145\/3205289.3205320"},{"key":"28_CR39","unstructured":"LLNL. LULESH webpage (2012). https:\/\/asc.llnl.gov\/codes\/proxy-apps\/lulesh"},{"key":"28_CR40","unstructured":"Feldman, M.: DARPA sets ubiquitous HPC program in motion (2010). https:\/\/www.hpcwire.com\/2010\/08\/10\/darpa_sets_ubiquitous_hpc_program_in_motion\/"},{"key":"28_CR41","unstructured":"Noble, C.R., et al.: ALE3D: an arbitrary Lagrangian-Eulerian multi-physics code. Technical report, Lawrence Livermore National Lab. (LLNL), Livermore, CA, USA (2017)"},{"key":"28_CR42","unstructured":"Karlin, I., et al.: Tuning the Lulesh mini-app for current and future hardware. Technical report, Lawrence Livermore National Lab. (LLNL), Livermore, CA, USA (2013)"},{"key":"28_CR43","unstructured":"Hornung, R.D., et al.: Hydrodynamics challenge problem. Technical report, Lawrence Livermore National Lab. (LLNL), Livermore, CA, USA (2011)"},{"key":"28_CR44","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1007\/978-3-030-34356-9_26","volume-title":"High Performance Computing","author":"VG Vergara Larrea","year":"2019","unstructured":"Vergara Larrea, V.G., et al.: Scaling the summit: deploying the world\u2019s fastest supercomputer. In: Weiland, M., Juckeland, G., Alam, S., Jagode, H. (eds.) ISC High Performance 2019. LNCS, vol. 11887, pp. 330\u2013351. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-34356-9_26"},{"key":"28_CR45","doi-asserted-by":"crossref","unstructured":"Laney, D., et al.: Assessing the effects of data compression in simulations using physically motivated metrics. In: SC 2013: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312. IEEE (2013)","DOI":"10.1145\/2503210.2503283"},{"key":"28_CR46","unstructured":"technical.city. Intel\u00aeData Center GPU Max 1100 v.s. NVIDIA A100 PCIe (2023). https:\/\/technical.city\/en\/video\/A100-PCIe-vs-Data-Center-GPU-Max-1100"},{"key":"28_CR47","unstructured":"Intel. Intel Data Center GPU Max 1100 (2023). https:\/\/ark.intel.com\/content\/www\/us\/en\/ark\/products\/232876\/intel-data-center-gpu-max-1100.html"},{"key":"28_CR48","unstructured":"NVIDIA. NVIDIA Ampere Architecture In-Depth (2015). https:\/\/developer.nvidia.com\/blog\/nvidia-ampere-architecture-in-depth\/"},{"key":"28_CR49","unstructured":"Intel. Intel 4th Gen Xeon Scalable Processors (2023). https:\/\/www.intel.com\/content\/www\/us\/en\/newsroom\/news\/4th-gen-xeon-scalable-processors-max-series-cpus-gpus.html#gs.ti3gm6"},{"key":"28_CR50","unstructured":"Intel. Intel Xeon Gold Processors (2023). https:\/\/www.intel.com\/content\/www\/us\/en\/products\/sku\/212285\/intel-xeon-gold-6338-processor-48m-cache-2-00-ghz\/specifications.html"},{"key":"28_CR51","unstructured":"Wells, J., et al.: Announcing supercomputer summit. Technical report, Oak Ridge National Lab. (ORNL), Oak Ridge, TN, USA (2016)"},{"key":"28_CR52","unstructured":"Github, LULESH OpenMP v4 Offloading GitHub Page (2015). https:\/\/github.com\/AMDComputeLibraries\/OpenMPApps\/tree\/master\/lulesh-mp4"},{"key":"28_CR53","unstructured":"LLNL. CORAL Benchmark Codes (2014). https:\/\/asc.llnl.gov\/coral-benchmarks"},{"key":"28_CR54","unstructured":"Github. OpenMPApps mini-apps GitHub Page (2015). https:\/\/github.com\/AMDComputeLibraries\/OpenMPApps"},{"key":"28_CR55","unstructured":"Harel, R., et al.: ScalSALE: Scalable sale benchmark framework for supercomputers. arXiv preprint arXiv:2209.01983 (2022)"},{"key":"28_CR56","unstructured":"Rusanovsky, M., et al.: Backus: comprehensive high-performance research software engineering approach for simulations in supercomputing systems. arXiv preprint arXiv:1910.06415 (2019)"},{"key":"28_CR57","unstructured":"NegevHPC Project. https:\/\/www.negevhpc.com"},{"key":"28_CR58","unstructured":"Intel. Intel Developer Cloud (2023). https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/devcloud\/overview.html"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-40843-4_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,24]],"date-time":"2023-08-24T12:06:29Z","timestamp":1692878789000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-40843-4_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031408427","9783031408434"],"references-count":58,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-40843-4_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"25 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hamburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 May 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"38","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"70","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"70% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}