{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T14:38:37Z","timestamp":1774449517586,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":12,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,2,19]],"date-time":"2025-02-19T00:00:00Z","timestamp":1739923200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"\"Joint Usage\/Research Center for Interdisciplinary Large-scale Information Infrastructures (JHPCN)\" and \"High Performance Computing Infrastructure (HPCI)\" in Japan","award":["jh240002"],"award-info":[{"award-number":["jh240002"]}]},{"name":"JSPS KAKENHI","award":["JP23K11126"],"award-info":[{"award-number":["JP23K11126"]}]},{"name":"JSPS KAKENHI","award":["JP24K02945"],"award-info":[{"award-number":["JP24K02945"]}]},{"name":"JST SPRING","award":["JPMJSP2125"],"award-info":[{"award-number":["JPMJSP2125"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,2,19]]},"DOI":"10.1145\/3703001.3724385","type":"proceedings-article","created":{"date-parts":[[2025,4,19]],"date-time":"2025-04-19T10:29:56Z","timestamp":1745058596000},"page":"36-44","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Performance Evaluation of Loop Body Splitting for Fast Modal Filtering in SCALE-DG on A64FX"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-2784-4540","authenticated-orcid":false,"given":"Xuanzhengbo","family":"Ren","sequence":"first","affiliation":[{"name":"Graduate School of Informatics, Nagoya University, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9964-955X","authenticated-orcid":false,"given":"Yuta","family":"Kawai","sequence":"additional","affiliation":[{"name":"RIKEN R-CCS, Kobe, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7154-8060","authenticated-orcid":false,"given":"Hirofumi","family":"Tomita","sequence":"additional","affiliation":[{"name":"RIKEN R-CCS, Kobe, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9457-7457","authenticated-orcid":false,"given":"Seiya","family":"Nishizawa","sequence":"additional","affiliation":[{"name":"RIKEN R-CCS, Kobe, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7193-9304","authenticated-orcid":false,"given":"Takahiro","family":"Katagiri","sequence":"additional","affiliation":[{"name":"Information Technology Center, Nagoya University, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5349-6852","authenticated-orcid":false,"given":"Tetsuya","family":"Hoshino","sequence":"additional","affiliation":[{"name":"Information Technology Center, Nagoya University, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0051-6811","authenticated-orcid":false,"given":"Daichi","family":"Mukunoki","sequence":"additional","affiliation":[{"name":"Information Technology Center, Nagoya University, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1454-202X","authenticated-orcid":false,"given":"Masatoshi","family":"Kawai","sequence":"additional","affiliation":[{"name":"Information Technology Center, Nagoya University, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5827-1220","authenticated-orcid":false,"given":"Toru","family":"Nagai","sequence":"additional","affiliation":[{"name":"Information Technology Center, Nagoya University, Nagoya, Japan"}]}],"member":"320","published-online":{"date-parts":[[2025,4,19]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","unstructured":"Christie Alappat Nils Meyer Jan Laukemann Thomas Gruber Georg Hager Gerhard Wellein and Tilo Wettig. 2022. Execution-Cache-Memory modeling and performance tuning of sparse matrix-vector multiplication and Lattice quantum chromodynamics on A64FX. Concurrency and Computation: Practice and Experience 34 20 (2022) e6512. 10.1002\/cpe.6512","DOI":"10.1002\/cpe.6512"},{"key":"e_1_3_3_1_3_2","unstructured":"Intel Corporation. 2023. Intel 64 and IA-32 Architectures Optimization Reference Manual Volume 1. Retrieved Dec. 23 2024 from https:\/\/www.intel.com\/content\/www\/us\/en\/content-details\/671488\/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html"},{"key":"e_1_3_3_1_4_2","first-page":"115","volume-title":"Nodal Discontinuous Galerkin Methods: Algorithms, Analysis, and Applications (1st ed.)","author":"Hesthaven Jan\u00a0S.","year":"2007","unstructured":"Jan\u00a0S. Hesthaven and Tim Warburton. 2007. Nodal Discontinuous Galerkin Methods: Algorithms, Analysis, and Applications (1st ed.). Springer Publishing Company, Incorporated, New York, NY, USA, Chapter\u00a05, 115\u2013168."},{"key":"e_1_3_3_1_5_2","unstructured":"Nagoya\u00a0University ICTS.2024. Supercomputer \"Flow\". Retrieved Dec. 23 2024 from https:\/\/icts.nagoya-u.ac.jp\/en\/sc\/"},{"key":"e_1_3_3_1_6_2","unstructured":"Advanced Micro\u00a0Devices Inc.2023. Software Optimization Guide for the AMD Zen5 Microarchitecture. Retrieved Dec. 23 2024 from https:\/\/www.amd.com\/en\/search\/documentation\/hub.html"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","unstructured":"Yuta Kawai and Hirofumi Tomita. 2023. Numerical Accuracy Necessary for Large-Eddy Simulation of Planetary Boundary Layer Turbulence Using the Discontinuous Galerkin Method. American Meteorological Society Section: Monthly Weather Review 151 6 (2023) 1479\u20131508. 10.1175\/MWR-D-22-0245.1","DOI":"10.1175\/MWR-D-22-0245.1"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2017.93"},{"key":"e_1_3_3_1_9_2","unstructured":"Fujitsu Limited. 2022. A64FX_Microarchitecture_Manual_en_1.8.1. Retrieved Dec. 23 2024 from https:\/\/github.com\/fujitsu\/A64FX\/blob\/master\/doc\/A64FX_Microarchitecture_Manual_en_1.8.1.pdf"},{"key":"e_1_3_3_1_10_2","unstructured":"Fujitsu Limited. 2023. Development Studio Profiler User\u2019s Guide. Retrieved Dec. 23 2024 from https:\/\/software.fujitsu.com\/jp\/manual\/manualfiles\/m230003\/j2ul2483\/02enz012\/j2ul-2483-02enz0.pdf"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTERWorkshops61563.2024.00033"},{"key":"e_1_3_3_1_12_2","unstructured":"Team SCALE. 2024. FE-Project top page. Retrieved Dec. 23 2024 from https:\/\/ywkawai.github.io\/FE-Project_web\/"},{"key":"e_1_3_3_1_13_2","unstructured":"Team SCALE. 2024. SCALE Top page. Retrieved Dec. 23 2024 from https:\/\/scale.riken.jp\/"}],"event":{"name":"HPCASIA '25: 2025 International Conference on High Performance Computing in Asia-Pacific Region Workshops Proceedings","location":"Hsinchu Taiwan","acronym":"HPCASIA '25"},"container-title":["Proceedings of the 2025 International Conference on High Performance Computing in Asia-Pacific Region Workshops"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3703001.3724385","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3703001.3724385","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:18Z","timestamp":1750295418000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3703001.3724385"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,19]]},"references-count":12,"alternative-id":["10.1145\/3703001.3724385","10.1145\/3703001"],"URL":"https:\/\/doi.org\/10.1145\/3703001.3724385","relation":{},"subject":[],"published":{"date-parts":[[2025,2,19]]},"assertion":[{"value":"2025-04-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}