{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:35:05Z","timestamp":1767339305774,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030581435"},{"type":"electronic","value":"9783030581442"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58144-2_3","type":"book-chapter","created":{"date-parts":[[2020,9,1]],"date-time":"2020-09-01T12:03:48Z","timestamp":1598961828000},"page":"37-51","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["A Case Study of Porting HPGMG from CUDA to OpenMP Target Offload"],"prefix":"10.1007","author":[{"given":"Christopher","family":"Daley","sequence":"first","affiliation":[]},{"given":"Hadia","family":"Ahmed","sequence":"additional","affiliation":[]},{"given":"Samuel","family":"Williams","sequence":"additional","affiliation":[]},{"given":"Nicholas","family":"Wright","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,1]]},"reference":[{"key":"3_CR1","unstructured":"Adams, M., Brown, J., Shalf, J., Van Straalen, B., Strohmaier, E., Williams, S.: HPGMG (2020). https:\/\/bitbucket.org\/hpgmg\/hpgmg"},{"key":"3_CR2","doi-asserted-by":"publisher","unstructured":"Adhianto, L., et al.: HPCTOOLKIT: tools for performance analysis of optimized parallel programs. Concurr. Comput.: Pract. Exp. 22(6), 685\u2013701 (2010). https:\/\/doi.org\/10.1002\/cpe.1553","DOI":"10.1002\/cpe.1553"},{"key":"3_CR3","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1088\/0004-637X\/765\/1\/39","volume":"765","author":"AS Almgren","year":"2013","unstructured":"Almgren, A.S., Bell, J.B., Lijewski, M.J., Luki\u0107, Z., Van Andel, E.: Nyx: a massively parallel AMR code for computational cosmology. Astrophys. J. 765, 39 (2013). https:\/\/doi.org\/10.1088\/0004-637X\/765\/1\/39","journal-title":"Astrophys. J."},{"key":"3_CR4","doi-asserted-by":"publisher","unstructured":"Beckingsale, D.A., et al.: RAJA: portable performance for large-scale scientific applications. In: 2019 IEEE\/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC), pp. 71\u201381, November 2019. https:\/\/doi.org\/10.1109\/P3HPC49587.2019.00012","DOI":"10.1109\/P3HPC49587.2019.00012"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Bercea, G.T., Bataev, A., Eichenberger, A.E., Bertolli, C., O\u2019Brien, J.K.: An open-source solution to performance portability for Summit and Sierra supercomputers. IBM J. Res. Dev. 64(3\/4), 12:1\u201312:23 (2020)","DOI":"10.1147\/JRD.2019.2955944"},{"key":"3_CR6","doi-asserted-by":"publisher","unstructured":"Bercea, G.T., et al.: Performance analysis of OpenMP on a GPU using a CORAL proxy application. In: Proceedings of the 6th International Workshop on Performance Modeling, Benchmarking, and Simulation of High Performance Computing Systems, PMBS 2015. Association for Computing Machinery, New York (2015). https:\/\/doi.org\/10.1145\/2832087.2832089","DOI":"10.1145\/2832087.2832089"},{"key":"3_CR7","doi-asserted-by":"publisher","first-page":"102544","DOI":"10.1016\/j.parco.2019.102544","volume":"88","author":"RD Budiardja","year":"2019","unstructured":"Budiardja, R.D., Cardall, C.Y.: Targeting GPUs with OpenMP directives on summit: a simple and effective Fortran experience. Parallel Comput. 88, 102544 (2019)","journal-title":"Parallel Comput."},{"key":"3_CR8","unstructured":"Colgrove, M., Wolfe, M.: Personal Communication, May 2020"},{"key":"3_CR9","unstructured":"Crayport: Case 247291 - Cray CCE-9.0.0 has OpenMP offload bugs when mapping structs (2020). https:\/\/portal.cray.com"},{"key":"3_CR10","unstructured":"Crayport: Case 256571 - Test program must be compiled at -O0 when using CCE\/9.1.0 (2020). https:\/\/portal.cray.com"},{"key":"3_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/978-3-030-28596-8_11","volume-title":"OpenMP: Conquering the Full Hardware Spectrum","author":"J Doerfert","year":"2019","unstructured":"Doerfert, J., Diaz, J.M.M., Finkel, H.: The TRegion interface and compiler optimizations for OpenMP target regions. In: Fan, X., de Supinski, B.R., Sinnen, O., Giacaman, N. (eds.) IWOMP 2019. LNCS, vol. 11718, pp. 153\u2013167. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-28596-8_11"},{"key":"3_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-319-65578-9_1","volume-title":"Scaling OpenMP for Exascale Performance and Portability","author":"L Grinberg","year":"2017","unstructured":"Grinberg, L., Bertolli, C., Haque, R.: Hands on with OpenMP4.5 and unified memory: developing applications for IBM\u2019s Hybrid CPU + GPU systems (part I). In: de Supinski, B.R., Olivier, S.L., Terboven, C., Chapman, B.M., M\u00fcller, M.S. (eds.) IWOMP 2017. LNCS, vol. 10468, pp. 3\u201316. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-65578-9_1"},{"key":"3_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/978-3-319-65578-9_2","volume-title":"Scaling OpenMP for Exascale Performance and Portability","author":"L Grinberg","year":"2017","unstructured":"Grinberg, L., Bertolli, C., Haque, R.: Hands on with OpenMP4.5 and unified memory: developing applications for IBM\u2019s hybrid CPU + GPU systems (part II). In: de Supinski, B.R., Olivier, S.L., Terboven, C., Chapman, B.M., M\u00fcller, M.S. (eds.) IWOMP 2017. LNCS, vol. 10468, pp. 17\u201329. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-65578-9_2"},{"key":"3_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1007\/978-3-319-24595-9_6","volume-title":"OpenMP: Heterogenous Execution and Data Movements","author":"A Hart","year":"2015","unstructured":"Hart, A.: First experiences porting a parallel application to a hybrid supercomputer with OpenMP4.0 device constructs. In: Terboven, C., de Supinski, B.R., Reble, P., Chapman, B.M., M\u00fcller, M.S. (eds.) IWOMP 2015. LNCS, vol. 9342, pp. 73\u201385. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24595-9_6"},{"issue":"1","key":"3_CR15","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1504\/IJHPCN.2019.097051","volume":"13","author":"A Hayashi","year":"2019","unstructured":"Hayashi, A., Shirako, J., Tiotto, E., Ho, R., Sarkar, V.: Performance evaluation of OpenMP\u2019s target construct on GPUS - exploring compiler optimisations. Int. J. High Perform. Comput. Network. 13(1), 54\u201369 (2019). https:\/\/doi.org\/10.1504\/IJHPCN.2019.097051","journal-title":"Int. J. High Perform. Comput. Network."},{"key":"3_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"470","DOI":"10.1007\/978-3-319-46079-6_33","volume-title":"High Performance Computing","author":"G Juckeland","year":"2016","unstructured":"Juckeland, G., et al.: From describing to prescribing parallelism: translating the SPEC ACCEL OpenACC suite to OpenMP target directives. In: Taufer, M., Mohr, B., Kunkel, J.M. (eds.) ISC High Performance 2016. LNCS, vol. 9945, pp. 470\u2013488. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46079-6_33"},{"key":"3_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1007\/978-3-319-45550-1_20","volume-title":"OpenMP: Memory, Devices, and Tasks","author":"I Karlin","year":"2016","unstructured":"Karlin, I., et al.: Early experiences porting three applications to OpenMP 4.5. In: Maruyama, N., de Supinski, B.R., Wahib, M. (eds.) IWOMP 2016. LNCS, vol. 9903, pp. 281\u2013292. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-45550-1_20"},{"key":"3_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1007\/978-3-030-34356-9_26","volume-title":"High Performance Computing","author":"VG Vergara Larrea","year":"2019","unstructured":"Vergara Larrea, V.G., et al.: Scaling the summit: deploying the world\u2019s fastest supercomputer. In: Weiland, M., Juckeland, G., Alam, S., Jagode, H. (eds.) ISC High Performance 2019. LNCS, vol. 11887, pp. 330\u2013351. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-34356-9_26"},{"key":"3_CR19","unstructured":"LLVM Bugzilla: Bug 44390 - Incorrect OpenMP target offload code at $$>$$ -O0 optimization (2020). https:\/\/bugs.llvm.org"},{"key":"3_CR20","unstructured":"LLVM Bugzilla: Bug 46107 - Poor present table performance (2020). https:\/\/bugs.llvm.org"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Martineau, M., McIntosh-Smith, S., Gaudin, W.: Evaluating OpenMP 4.0\u2019s effectiveness as a heterogeneous parallel programming model. In: 2016 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 338\u2013347 (2016)","DOI":"10.1109\/IPDPSW.2016.70"},{"key":"3_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/978-3-319-65578-9_13","volume-title":"Scaling OpenMP for Exascale Performance and Portability","author":"M Martineau","year":"2017","unstructured":"Martineau, M., McIntosh-Smith, S.: The productivity, portability and performance of OpenMP 4.5 for scientific applications targeting Intel CPUs, IBM CPUs, and NVIDIA GPUs. In: de Supinski, B.R., Olivier, S.L., Terboven, C., Chapman, B.M., M\u00fcller, M.S. (eds.) IWOMP 2017. LNCS, vol. 10468, pp. 185\u2013200. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-65578-9_13"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Martineau, M., et al.: Performance analysis and optimization of Clang\u2019s OpenMP 4.5 GPU support. In: Proceedings of the 7th International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computing Systems, PMBS 2016, pp. 54\u201364. IEEE Press (2016)","DOI":"10.1109\/PMBS.2016.011"},{"key":"3_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/978-3-319-45550-1_18","volume-title":"OpenMP: Memory, Devices, and Tasks","author":"M Martineau","year":"2016","unstructured":"Martineau, M., Price, J., McIntosh-Smith, S., Gaudin, W.: Pragmatic performance portability with OpenMP 4.x. In: Maruyama, N., de Supinski, B.R., Wahib, M. (eds.) IWOMP 2016. LNCS, vol. 9903, pp. 253\u2013267. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-45550-1_18"},{"key":"3_CR25","doi-asserted-by":"publisher","unstructured":"Mishra, A., Li, L., Kong, M., Finkel, H., Chapman, B.: Benchmarking and evaluating unified memory for OpenMP GPU offloading. In: Proceedings of the Fourth Workshop on the LLVM Compiler Infrastructure in HPC. LLVM-HPC 2017. Association for Computing Machinery, New York (2017). https:\/\/doi.org\/10.1145\/3148173.3148184","DOI":"10.1145\/3148173.3148184"},{"key":"3_CR26","doi-asserted-by":"publisher","unstructured":"Monsalve Diaz, J.M., Friedline, K., Pophale, S., Hernandez, O., Bernholdt, D., Chandrasekaran, S.: Analysis of OpenMP 4.5 offloading in implementations: correctness and overhead. Parallel Comput. 89, 102546 (2019). https:\/\/doi.org\/10.1016\/j.parco.2019.102546","DOI":"10.1016\/j.parco.2019.102546"},{"key":"3_CR27","unstructured":"NERSC: Cori GPU Nodes (2020). https:\/\/docs-dev.nersc.gov\/cgpu\/"},{"key":"3_CR28","unstructured":"OLCF Support: IBM ticket TS003552272 - IBM compiler OpenMP target offload data management bug (2020)"},{"key":"3_CR29","unstructured":"OpenMP Architecture Review Board: OpenMP application programming interface version 5.0, November 2018. https:\/\/www.openmp.org\/wp-content\/uploads\/OpenMP-API-Specification-5.0.pdf"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Pennycook, S.J., Sewall, J.D., Hammond, J.R.: Evaluating the impact of proposed OpenMP 5.0 features on performance, portability and productivity. In: 2018 IEEE\/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC), pp. 37\u201346 (2018)","DOI":"10.1109\/P3HPC.2018.00007"},{"key":"3_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1007\/978-3-030-49943-3_4","volume-title":"Accelerator Programming Using Directives","author":"F Rabbi","year":"2020","unstructured":"Rabbi, F., Daley, C.S., Aktulga, H.M., Wright, N.J.: Evaluation of directive-based GPU programming models on a block eigensolver with consideration of large sparse matrices. In: Wienke, S., Bhalachandra, S. (eds.) WACCPD 2019. LNCS, vol. 12017, pp. 66\u201388. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-49943-3_4"},{"key":"3_CR32","unstructured":"Sakharnykh, N., Wang, P., Williams, S.: HPGMG-CUDA (2020). https:\/\/bitbucket.org\/nsakharnykh\/hpgmg-cuda"},{"key":"3_CR33","unstructured":"The Clang Team: Clang 11 Documentation, OpenMP Support (2020). https:\/\/clang.llvm.org\/docs\/OpenMPSupport.html"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Tiotto, E., Mahjour, B., Tsang, W., Xue, X., Islam, T., Chen, W.: OpenMP 4.5 compiler optimization for GPU offloading. IBM J. Res. Dev. 64(3\/4), 14:1\u201314:11 (2020)","DOI":"10.1147\/JRD.2019.2962428"},{"key":"3_CR35","doi-asserted-by":"publisher","unstructured":"Vergara Larrea, V.G., Budiardja, R.D., Gayatri, R., Daley, C., Hernandez, O., Joubert, W.: Experiences in porting mini-applications to OpenACC and OpenMP on heterogeneous systems. Concurr. Comput.: Pract. Exp. e5780 (2020). https:\/\/doi.org\/10.1002\/cpe.5780. https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1002\/cpe.5780. [Published online ahead of print (24 April 2020)]","DOI":"10.1002\/cpe.5780"},{"key":"3_CR36","doi-asserted-by":"publisher","unstructured":"Wolfe, M., Lee, S., Kim, J., Tian, X., Xu, R., Chandrasekaran, S., Chapman, B.: Implementing the OpenACC data model. In: 2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 662\u2013672, May 2017. https:\/\/doi.org\/10.1109\/IPDPSW.2017.85","DOI":"10.1109\/IPDPSW.2017.85"}],"container-title":["Lecture Notes in Computer Science","OpenMP: Portable Multi-Level Parallelism on Modern Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58144-2_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,23]],"date-time":"2021-04-23T19:30:33Z","timestamp":1619206233000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58144-2_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030581435","9783030581442"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58144-2_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"1 September 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IWOMP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on OpenMP","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Austin, TX","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iwomp2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.iwomp.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"84% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}