{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T13:14:19Z","timestamp":1776950059856,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","funder":[{"name":"German Federal Ministry of Research, Technology and Space","award":["16ME0599K"],"award-info":[{"award-number":["16ME0599K"]}]},{"name":"German Federal Ministry of Research, Technology and Space","award":["16ME0602"],"award-info":[{"award-number":["16ME0602"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,4]]},"DOI":"10.1145\/3777884.3796996","type":"proceedings-article","created":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:27:26Z","timestamp":1776947246000},"page":"5-16","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving Energy Efficiency and Performance of Weather and Climate Simulations by Leveraging the Heterogeneity of Modern Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-6937-0227","authenticated-orcid":false,"given":"Julius","family":"Plehn","sequence":"first","affiliation":[{"name":"Deutsches Klimarechenzentrum GmbH, Hamburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4741-7201","authenticated-orcid":false,"given":"Christian","family":"von Elm","sequence":"additional","affiliation":[{"name":"CIDS, Information Services and High Performance Computing (ZIH), TU Dresden, Dresden, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1227-897X","authenticated-orcid":false,"given":"Pay","family":"Giesselmann","sequence":"additional","affiliation":[{"name":"Deutsches Klimarechenzentrum GmbH, Hamburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9292-4192","authenticated-orcid":false,"given":"Carsten","family":"Clauss","sequence":"additional","affiliation":[{"name":"ParTec AG, Munich, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2753-261X","authenticated-orcid":false,"given":"Hendryk","family":"Bockelmann","sequence":"additional","affiliation":[{"name":"Deutsches Klimarechenzentrum GmbH, Hamburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0666-4166","authenticated-orcid":false,"given":"Robert","family":"Sch\u00f6ne","sequence":"additional","affiliation":[{"name":"CIDS, Information Services and High Performance Computing (ZIH), TU Dresden, Dresden, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3034-333X","authenticated-orcid":false,"given":"Jan Frederik","family":"Engels","sequence":"additional","affiliation":[{"name":"Deutsches Klimarechenzentrum GmbH, Hamburg, Germany"}]}],"member":"320","published-online":{"date-parts":[[2026,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1029\/2023EA002912"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1553"},{"key":"e_1_3_2_1_3_1","volume-title":"Advanced Micro Devices","author":"Inc.","year":"2025","unstructured":"Inc. Advanced Micro Devices. 2025a. AMD Instinct MI300 X Workload Optimization \u2014 ROCProfiler. https:\/\/rocm.docs.amd.com\/en\/latest\/how-to\/rocm-for-ai\/inference-optimization\/workload.html#mi300x-rocprof. Accessed: 2025-11-10."},{"key":"e_1_3_2_1_4_1","volume-title":"Advanced Micro Devices","author":"Inc.","year":"2025","unstructured":"Inc. Advanced Micro Devices. 2025b. rocm_smi_lib: ROCm System Management Interface Library (GitHub Repository). https:\/\/github.com\/RadeonOpenCompute\/rocm_smi_lib. Accessed: 2025-11-10."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.46"},{"key":"e_1_3_2_1_6_1","unstructured":"NVIDIA Corporation. 2022. NVIDIA Nsight\u2122 Compute v2022.3: An Interactive Kernel Profiler for CUDA\u00ae Applications. https:\/\/developer.nvidia.com\/nsight-compute-2022_3. Accessed: 2025-11-10."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1177\/109434209601000201"},{"key":"e_1_3_2_1_8_1","unstructured":"Erich Focht. 2024. Using veprof \u2013 external profiler for VE programs on NEC SX-Aurora TSUBASA. Blog post at SX-Aurora Developer Blog. Available at https:\/\/sx-aurora.github.io\/posts\/Testing-VEOS-DMA-prof\/ (accessed 2025-11-10)."},{"key":"e_1_3_2_1_9_1","volume-title":"The Grid: Blueprint for a New Computing Infrastructure. Morgan Kaufmann","author":"Foster Ian","year":"1999","unstructured":"Ian Foster and Carl Kesselman (Eds.). 1999. The Grid: Blueprint for a New Computing Infrastructure. Morgan Kaufmann, San Francisco, CA."},{"key":"e_1_3_2_1_10_1","unstructured":"Ian Foster and Carl Kesselman (Eds.). 2004. The Grid 2: Blueprint for a New Computing Infrastructure. Morgan Kaufmann Amsterdam; Boston."},{"key":"e_1_3_2_1_11_1","volume-title":"Systems Performance","author":"Gregg Brendan","year":"2020","unstructured":"Brendan Gregg. 2020. Systems Performance: Second Edition. Pearson (Addison-Wesley), Boston, MA."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/E2SC.2014.13"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2013.6557170"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3712285.3771790"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5194\/gmd-9-2755-2016"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5194\/gmd-16-779-2023"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAAC49578.2019.00007"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2017.116"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the Cray User Group (CUG) Conference 2014","author":"Kappel Matthew","year":"2014","unstructured":"Matthew Kappel, Jens Doleschal, Thomas Ilsche, Mario Bielert, Alistair Hart, and Harvey Richardson. 2014. User-level Power Monitoring and Application Performance on Cray XC30 Supercomputers. In Proceedings of the Cray User Group (CUG) Conference 2014, Lugano, Switzerland. Paper 136, available at https:\/\/cug.org\/proceedings\/cug2014_proceedings\/includes\/files\/pap136.pdf."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3712285.3771789"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-68564-7_9"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31476-6_7"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3456669.3456678"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3853"},{"key":"e_1_3_2_1_25_1","volume-title":"MPI: A Message-Passing Interface Standard, Version 1.0. Technical Report. Message Passing Interface Forum. Initial MPI Standard Document.","author":"Interface Message Passing","year":"1994","unstructured":"Message Passing Interface. 1994. MPI: A Message-Passing Interface Standard, Version 1.0. Technical Report. Message Passing Interface Forum. Initial MPI Standard Document."},{"key":"e_1_3_2_1_26_1","volume-title":"MPI: A Message-Passing Interface Standard, Version 5.0. https:\/\/www.mpi-forum.org\/docs\/mpi-5.0\/mpi50-report.pdf","author":"Interface Forum Message Passing","year":"2025","unstructured":"Message Passing Interface Forum. 2025. MPI: A Message-Passing Interface Standard, Version 5.0. https:\/\/www.mpi-forum.org\/docs\/mpi-5.0\/mpi50-report.pdf"},{"key":"e_1_3_2_1_27_1","unstructured":"Maximilian Sander Hannes Tr\u00f6pgen Christian von Elm and Robert Sch\u00f6ne. 2025. Towards Large-Scale Top-Down Microarchitecture Analysis Using the\u00a0Score-P Framework. In Euro-Par 2024: Parallel Processing Workshops Silvina Caino-Lores Demetris Zeinalipour Thaleia Dimitra Doudali David E. Singh Gracia Ester Mart\u00edn Garz\u00f3n Leonel Sousa Diego Andrade Tommaso Cucinotta Donato D'Ambrosio Patrick Diehl Manuel F. Dolz Admela Jukan Raffaele Montella Matteo Nardelli Marta Garcia-Gasulla and Sarah Neuwirth (Eds.). Springer Nature Switzerland Cham 189-200."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-56702-0_4"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3629526.3645040"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00087"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5194\/gmd-18-7735-2025"},{"key":"e_1_3_2_1_32_1","unstructured":"Sameer Shende Allen D. Malony Wyatt Spear and Karen Schuchardt. 2012. Characterizing I\/O Performance Using the TAU Performance System. In Applications Tools and Techniques on the Road to Exascale Computing (Advances in Parallel Computing)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/129888.129892"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578244.3583729"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680256.3721323"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00028"}],"event":{"name":"ICPE '26: 17th ACM\/SPEC International Conference on Performance Engineering","location":"Florence Italy","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","SIGMETRICS ACM Special Interest Group on Measurement and Evaluation","SPEC"]},"container-title":["Proceedings of the 17th ACM\/SPEC International Conference on Performance Engineering"],"original-title":[],"deposited":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T12:29:01Z","timestamp":1776947341000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3777884.3796996"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,3]]},"references-count":36,"alternative-id":["10.1145\/3777884.3796996","10.1145\/3777884"],"URL":"https:\/\/doi.org\/10.1145\/3777884.3796996","relation":{},"subject":[],"published":{"date-parts":[[2026,5,3]]},"assertion":[{"value":"2026-05-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}