{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T11:54:14Z","timestamp":1768564454376,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T00:00:00Z","timestamp":1641513600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"EXCELLERAT: The European Centre of Excellence for Engineering Applications","award":["823691"],"award-info":[{"award-number":["823691"]}]},{"name":"SSF"},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["951732"],"award-info":[{"award-number":["951732"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"name":"SNIC","award":["2018-05973"],"award-info":[{"award-number":["2018-05973"]}]},{"name":"SESSI"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,1,7]]},"DOI":"10.1145\/3492805.3492818","type":"proceedings-article","created":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T23:45:23Z","timestamp":1641599123000},"page":"94-102","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Strong Scaling of OpenACC enabled Nek5000 on several GPU based HPC systems"],"prefix":"10.1145","author":[{"given":"Jonathan","family":"Vincent","sequence":"first","affiliation":[{"name":"PDC, KTH Royal Institute of Technology, Sweden"}]},{"given":"Jing","family":"Gong","sequence":"additional","affiliation":[{"name":"ENCCS, Uppsala University, Sweden"}]},{"given":"Martin","family":"Karp","sequence":"additional","affiliation":[{"name":"Computer Science and Technology, KTH Royal Institute of Technology, Sweden"}]},{"given":"Adam","family":"Peplinski","sequence":"additional","affiliation":[{"name":"Mechanics, KTH Royal Institute of Technology, Sweden"}]},{"given":"Niclas","family":"Jansson","sequence":"additional","affiliation":[{"name":"PDC, KTH Royal Institute of Technology, Sweden"}]},{"given":"Artur","family":"Podobas","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]},{"given":"Andreas","family":"Jocksch","sequence":"additional","affiliation":[{"name":"CSCS - Swiss National Supercomputing Centre, Switzerland"}]},{"given":"Jie","family":"Yao","sequence":"additional","affiliation":[{"name":"Texas Tech University, USA"}]},{"given":"Fazle","family":"Hussain","sequence":"additional","affiliation":[{"name":"Texas Tech University, USA"}]},{"given":"Stefano","family":"Markidis","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]},{"given":"Matts","family":"Karlsson","sequence":"additional","affiliation":[{"name":"Link\u00f6ping University,, Sweden"}]},{"given":"Dirk","family":"Pleiter","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]},{"given":"Erwin","family":"Laure","sequence":"additional","affiliation":[{"name":"Max Planck Computing and Data Facility, Germany"}]},{"given":"Philipp","family":"Schlatter","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]}],"member":"320","published-online":{"date-parts":[[2022,1,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1465482.1465560"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2018.022071134"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/P3HPC49587.2019.00006"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.1974.1050511"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Jack Dongarra Pete Beckman Terry Moore Patrick Aerts Giovanni Aloisio Jean-Claude Andre David Barkai Jean-Yves Berthou Taisuke Boku Bertrand Braunschweig 2011. The international exascale software project roadmap. The international journal of high performance computing applications 25 1(2011) 3\u201360.  Jack Dongarra Pete Beckman Terry Moore Patrick Aerts Giovanni Aloisio Jean-Claude Andre David Barkai Jean-Yves Berthou Taisuke Boku Bertrand Braunschweig 2011. The international exascale software project roadmap. The international journal of high performance computing applications 25 1(2011) 3\u201360.","DOI":"10.1177\/1094342010391989"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2020.107245"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10494-013-9482-8"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0045-7825(98)00012-7"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Paul Fischer Stefan Kerkemeier Misun Min Yu-Hsiang Lan Malachi Phillips Thilina Rathnayake Elia Merzari Ananias Tomboulides Ali Karakus Noel Chalmers 2021. NekRS a GPU-Accelerated Spectral Element Navier-Stokes Solver. arXiv preprint arXiv:2104.05829(2021).  Paul Fischer Stefan Kerkemeier Misun Min Yu-Hsiang Lan Malachi Phillips Thilina Rathnayake Elia Merzari Ananias Tomboulides Ali Karakus Noel Chalmers 2021. NekRS a GPU-Accelerated Spectral Element Navier-Stokes Solver. arXiv preprint arXiv:2104.05829(2021).","DOI":"10.1016\/j.parco.2022.102982"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342020915762"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"P.\u00a0F. Fischer K. Heisey and M. Min. 2015. Scaling Limits for PDE-Based Simulation (Invited). In AIAA Aviation. American Institute of Aeronautics and Astronautics. AIAA 2015\u20133049.  P.\u00a0F. Fischer K. Heisey and M. Min. 2015. Scaling Limits for PDE-Based Simulation (Invited). In AIAA Aviation. American Institute of Aeronautics and Astronautics. AIAA 2015\u20133049.","DOI":"10.2514\/6.2015-3049"},{"key":"e_1_3_2_1_12_1","unstructured":"Paul\u00a0F Fischer James\u00a0W Lottes and Stefan\u00a0G Kerkemeier. 2008. nek5000 Web page.  Paul\u00a0F Fischer James\u00a0W Lottes and Stefan\u00a0G Kerkemeier. 2008. nek5000 Web page."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.37"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-016-1744-5"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2016.05.008]"},{"key":"e_1_3_2_1_16_1","volume-title":"Neko: A Modern, Portable, and Scalable Framework for High-Fidelity Computational Fluid Dynamics. arXiv preprint arXiv:2107.01243(2021).","author":"Jansson Niclas","year":"2021","unstructured":"Niclas Jansson , Martin Karp , Artur Podobas , Stefano Markidis , and Philipp Schlatter . 2021 . Neko: A Modern, Portable, and Scalable Framework for High-Fidelity Computational Fluid Dynamics. arXiv preprint arXiv:2107.01243(2021). Niclas Jansson, Martin Karp, Artur Podobas, Stefano Markidis, and Philipp Schlatter. 2021. Neko: A Modern, Portable, and Scalable Framework for High-Fidelity Computational Fluid Dynamics. arXiv preprint arXiv:2107.01243(2021)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","first-page":"e4964","DOI":"10.1002\/cpe.4964","article-title":"Optimized all-to-all communication on multicore architectures applied to FFTs with pencil decomposition","volume":"31","author":"Jocksch Andreas","year":"2019","unstructured":"Andreas Jocksch , Matthias Kraushaar , and David Daverio . 2019 . Optimized all-to-all communication on multicore architectures applied to FFTs with pencil decomposition . Concurrency and Computation: Practice and Experience 31 , 16(2019), e4964 . Andreas Jocksch, Matthias Kraushaar, and David Daverio. 2019. Optimized all-to-all communication on multicore architectures applied to FFTs with pencil decomposition. Concurrency and Computation: Practice and Experience 31, 16(2019), e4964.","journal-title":"Concurrency and Computation: Practice and Experience"},{"key":"e_1_3_2_1_18_1","unstructured":"Martin Karp Niclas Jansson Artur Podobas Philipp Schlatter and Stefano Markidis. 2020. Optimization of tensor-product operations in nekbone on gpus. arXiv preprint arXiv:2005.13425(2020).  Martin Karp Niclas Jansson Artur Podobas Philipp Schlatter and Stefano Markidis. 2020. Optimization of tensor-product operations in nekbone on gpus. arXiv preprint arXiv:2005.13425(2020)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00116"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1017\/jfm.2015.268"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342015576846"},{"key":"e_1_3_2_1_22_1","volume-title":"OCCA: A unified approach to multi-threading languages. arXiv preprint arXiv:1403.0968(2014).","author":"Medina S","year":"2014","unstructured":"David\u00a0 S Medina , Amik St-Cyr , and Tim Warburton . 2014 . OCCA: A unified approach to multi-threading languages. arXiv preprint arXiv:1403.0968(2014). David\u00a0S Medina, Amik St-Cyr, and Tim Warburton. 2014. OCCA: A unified approach to multi-threading languages. arXiv preprint arXiv:1403.0968(2014)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.nucengdes.2016.09.028"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2009.7478342"},{"key":"e_1_3_2_1_25_1","unstructured":"Nvidia. 2021. AMGX website. https:\/\/developer.nvidia.com\/amgx.  Nvidia. 2021. AMGX website. https:\/\/developer.nvidia.com\/amgx."},{"key":"e_1_3_2_1_26_1","unstructured":"CUDA Nvidia. 2007. Compute unified device architecture programming guide. (2007).  CUDA Nvidia. 2007. Compute unified device architecture programming guide. (2007)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2938615.2938617"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.05.010"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ocemod.2005.05.006"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/0021-9991(84)90128-1"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1006\/jcph.1993.1162"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3012084"},{"key":"e_1_3_2_1_33_1","unstructured":"Fakgout R T. Kolvev R. Li S. Osborn D. Osei-Kuffuor V.\u00a0P. Magri J. Schroder B. Sjogreen P. Vassilevski and U.\u00a0M. Yang. 2021. HYPRE Website. https:\/\/computing.llnl.gov\/projects\/hypre-scalable-linear-solvers-multigrid-methods.  Fakgout R T. Kolvev R. Li S. Osborn D. Osei-Kuffuor V.\u00a0P. Magri J. Schroder B. Sjogreen P. Vassilevski and U.\u00a0M. Yang. 2021. HYPRE Website. https:\/\/computing.llnl.gov\/projects\/hypre-scalable-linear-solvers-multigrid-methods."},{"key":"e_1_3_2_1_35_1","volume-title":"\u00a0El Khoury","author":"Schlatter Philipp","year":"2012","unstructured":"Philipp Schlatter and George K . \u00a0El Khoury . 2012 . Turbulent flow in pipes. PDC newsletter (2012), 3\u201310. Philipp Schlatter and George K.\u00a0El Khoury. 2012. Turbulent flow in pipes. PDC newsletter (2012), 3\u201310."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPINEB.2017.11"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342018816368"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2017.29"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1006\/jpdc.2000.1676"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/331532.331599"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijheatfluidflow.2018.04.017"},{"key":"e_1_3_2_1_42_1","first-page":"56","article-title":"MPI: a standard message passing interface","volume":"12","author":"Walker W","year":"1996","unstructured":"David\u00a0 W Walker and Jack\u00a0 J Dongarra . 1996 . MPI: a standard message passing interface . Supercomputer 12 (1996), 56 \u2013 68 . David\u00a0W Walker and Jack\u00a0J Dongarra. 1996. MPI: a standard message passing interface. Supercomputer 12(1996), 56\u201368.","journal-title":"Supercomputer"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.softx.2017.05.003"}],"event":{"name":"HPC Asia2022: International Conference on High Performance Computing in Asia-Pacific Region","location":"Virtual Event Japan","acronym":"HPC Asia2022"},"container-title":["International Conference on High Performance Computing in Asia-Pacific Region"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3492805.3492818","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3492805.3492818","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:26Z","timestamp":1750193306000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3492805.3492818"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,7]]},"references-count":42,"alternative-id":["10.1145\/3492805.3492818","10.1145\/3492805"],"URL":"https:\/\/doi.org\/10.1145\/3492805.3492818","relation":{},"subject":[],"published":{"date-parts":[[2022,1,7]]},"assertion":[{"value":"2022-01-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}