{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:40:01Z","timestamp":1755974401712,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T00:00:00Z","timestamp":1704931200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2211018"],"award-info":[{"award-number":["2211018"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,1,11]]},"DOI":"10.1145\/3636480.3637283","type":"proceedings-article","created":{"date-parts":[[2024,1,8]],"date-time":"2024-01-08T06:23:46Z","timestamp":1704695026000},"page":"24-35","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Impact of Write-Allocate Elimination on Fujitsu A64FX"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8718-1491","authenticated-orcid":false,"given":"Yan","family":"Kang","sequence":"first","affiliation":[{"name":"The Pennsylvania State University, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8758-7657","authenticated-orcid":false,"given":"Sayan","family":"Ghosh","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9940-9951","authenticated-orcid":false,"given":"Mahmut","family":"Kandemir","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4313-1882","authenticated-orcid":false,"given":"Andr\u00e9s","family":"Marquez","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, United States"}]}],"member":"320","published-online":{"date-parts":[[2024,1,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Execution-Cache-Memory modeling and performance tuning of sparse matrix-vector multiplication and Lattice quantum chromodynamics on A64FX. Concurrency and Computation: Practice and Experience","author":"Alappat Christie","year":"2021","unstructured":"Christie Alappat, Nils Meyer, Jan Laukemann, Thomas Gruber, Georg Hager, Gerhard Wellein, and Tilo Wettig. 2021. Execution-Cache-Memory modeling and performance tuning of sparse matrix-vector multiplication and Lattice quantum chromodynamics on A64FX. Concurrency and Computation: Practice and Experience (2021), e6512."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC50251.2020.00029"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00106"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.50"},{"key":"e_1_3_2_1_5_1","volume-title":"The GAP benchmark suite. arXiv preprint arXiv:1508.03619","author":"Beamer Scott","year":"2015","unstructured":"Scott Beamer, Krste Asanovi\u0107, and David Patterson. 2015. The GAP benchmark suite. arXiv preprint arXiv:1508.03619 (2015)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2015.12"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1088\/1742-5468\/2008\/10\/P10008"},{"key":"e_1_3_2_1_8_1","volume-title":"Ookami: Deployment and Initial Experiences.","author":"Burford Andrew","year":"2021","unstructured":"Andrew Burford, Alan Calder, David Carlson, Barbara Chapman, Firat Coskun, Tony Curtis, Catherine Feldman, Robert Harrison, Yan Kang, Benjamin Michalowicz, 2021. Ookami: Deployment and Initial Experiences. (2021), 1\u20138."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00109"},{"key":"e_1_3_2_1_11_1","volume-title":"The LINPACK benchmark: past, present and future. Concurrency and Computation: practice and experience 15, 9","author":"Dongarra J","year":"2003","unstructured":"Jack\u00a0J Dongarra, Piotr Luszczek, and Antoine Petitet. 2003. The LINPACK benchmark: past, present and future. Concurrency and Computation: practice and experience 15, 9 (2003), 803\u2013820."},{"volume-title":"miniVite: A graph analytics benchmarking tool for massively parallel systems. In 2018 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)","author":"Ghosh Sayan","key":"e_1_3_2_1_12_1","unstructured":"Sayan Ghosh, Mahantesh Halappanavar, Antonino Tumeo, Ananth Kalyanaraman, and Assefaw\u00a0H Gebremedhin. 2018. miniVite: A graph analytics benchmarking tool for massively parallel systems. In 2018 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS). IEEE, 51\u201356."},{"key":"e_1_3_2_1_13_1","unstructured":"Georg Hager. [n. d.]. Write-allocate evasion has finally arrived at Intel \u2013 or has it?https:\/\/blogs.fau.de\/hager\/archives\/8997"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER49012.2020.00078"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368474.3368483"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.21105\/joss.01244"},{"key":"e_1_3_2_1_17_1","unstructured":"ARM Limited. 2020. Guide for ARMv8-A. https:\/\/developer.arm.com\/documentation\/den0024\/a."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2015.03.003"},{"key":"e_1_3_2_1_19_1","unstructured":"John McCalpin. 2018. Notes on \u201cnon-temporal\u201d(aka \u201cstreaming\u201d) stores. https:\/\/sites.utexas.edu\/jdm4372\/2018\/01\/01\/notes-on-non-temporal-aka-streaming-stores\/."},{"key":"e_1_3_2_1_20_1","volume-title":"Stream benchmark. Link: www. cs. virginia. edu\/stream\/ref. html# what 22","author":"McCalpin D","year":"1995","unstructured":"John\u00a0D McCalpin. 1995. Stream benchmark. Link: www. cs. virginia. edu\/stream\/ref. html# what 22 (1995), 7."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-85262-7_9"},{"key":"e_1_3_2_1_22_1","first-page":"45","article-title":"Introducing the graph 500","volume":"19","author":"Murphy C","year":"2010","unstructured":"Richard\u00a0C Murphy, Kyle\u00a0B Wheeler, Brian\u00a0W Barrett, and James\u00a0A Ang. 2010. Introducing the graph 500. Cray Users Group (CUG) 19 (2010), 45\u201374.","journal-title":"Cray Users Group (CUG)"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER49012.2020.00075"},{"key":"e_1_3_2_1_24_1","volume-title":"Presentation in AHUG ISC 21 Workshop.","author":"Poenaru Andrei","year":"2021","unstructured":"Andrei Poenaru, Tom Deakin, Simon McIntosh-Smith, Simon\u00a0D Hammond, and Andrew\u00a0J Younge. 2021. An Evaluation of the Fujitsu A64FX for HPC Applications. In Presentation in AHUG ISC 21 Workshop."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433763"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPPW.2010.38"},{"key":"e_1_3_2_1_27_1","volume-title":"LIKWID: Lightweight performance tools. In Competence in High Performance Computing","author":"Treibig Jan","year":"2011","unstructured":"Jan Treibig, Georg Hager, and Gerhard Wellein. 2011. LIKWID: Lightweight performance tools. In Competence in High Performance Computing 2010. Springer, 165\u2013175."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3266444.3266454"}],"event":{"name":"HPCAsiaWS 2024: International Conference on High Performance Computing in Asia-Pacific Region Workshops","acronym":"HPCAsiaWS 2024","location":"Nagoya Japan"},"container-title":["Proceedings of the International Conference on High Performance Computing in Asia-Pacific Region Workshops"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3636480.3637283","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3636480.3637283","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:08:27Z","timestamp":1755972507000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3636480.3637283"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,11]]},"references-count":28,"alternative-id":["10.1145\/3636480.3637283","10.1145\/3636480"],"URL":"https:\/\/doi.org\/10.1145\/3636480.3637283","relation":{},"subject":[],"published":{"date-parts":[[2024,1,11]]},"assertion":[{"value":"2024-01-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}