{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T11:44:11Z","timestamp":1767959051378,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,1,26]]},"DOI":"10.1145\/3773656.3773684","type":"proceedings-article","created":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T10:22:11Z","timestamp":1767954131000},"page":"247-257","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Cloud-Hardware Co-Design for Memory Bandwidth-Bound HPC Workloads: Performance and Characteristics of Azure HBv5 Virtual Machines"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2637-2465","authenticated-orcid":false,"given":"Amirreza","family":"Rastegari","sequence":"first","affiliation":[{"name":"Microsoft Corporation, Saint Paul, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9509-953X","authenticated-orcid":false,"given":"Sai","family":"Kovouri","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Austin, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4526-7275","authenticated-orcid":false,"given":"Michael","family":"Cui","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3622-2960","authenticated-orcid":false,"given":"Zehra","family":"Naz","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9567-8565","authenticated-orcid":false,"given":"Jay","family":"Fleischman","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Fort Colins, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2942-1840","authenticated-orcid":false,"given":"Saurabh","family":"Gupta","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Bengaluru, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6945-4792","authenticated-orcid":false,"given":"Anil","family":"Harwani","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Austin, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4616-0144","authenticated-orcid":false,"given":"Gabriel H.","family":"Loh","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Bellevue, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5552-6061","authenticated-orcid":false,"given":"Joe","family":"Greenseid","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Baltimore, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5645-5706","authenticated-orcid":false,"given":"Evan","family":"Burness","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3281-5186","authenticated-orcid":false,"given":"Prabhat","family":"Ram","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, San Francisco, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5119-0754","authenticated-orcid":false,"given":"Michael F.","family":"Ringenburg","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,1,25]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"2025. NAMD Benchmarking in WOC Benchmarking Repository. https:\/\/github.com\/arstgr\/woc-benchmarking\/tree\/main\/apps\/hpc\/namd. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_3_2","unstructured":"2025. OpenFOAM Benchmarking in WOC Benchmarking Repository. https:\/\/github.com\/arstgr\/woc-benchmarking\/tree\/main\/apps\/hpc\/openfoam. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_4_2","unstructured":"Amazon Web Services. [n. d.]. AWS Graviton Processors. https:\/\/aws.amazon.com\/ec2\/graviton\/. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00062"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"John Backus. 1978. Can programming be liberated from the von Neumann style? a functional style and its algebra of programs. Commun. ACM 21 8 (Aug. 1978) 613\u2013641. 10.1145\/359576.359579","DOI":"10.1145\/359576.359579"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00110"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","unstructured":"Mark Bohr. 2009. A 30 year retrospective on Dennard\u2019s MOSFET scaling paper. IEEE Solid-State Circuits Society Newsletter 12 1 (2009) 11\u201313. 10.1109\/N-SSC.2007.4785534","DOI":"10.1109\/N-SSC.2007.4785534"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","unstructured":"Joo-Hyung Chae. 2024. High-Bandwidth and Energy-Efficient Memory Interfaces for the Data-Centric Era: Recent Advances Design Challenges and Future Prospects. IEEE Open Journal of the Solid-State Circuits Society 4 (2024) 252\u2013264. 10.1109\/OJSSCS.2024.3458900","DOI":"10.1109\/OJSSCS.2024.3458900"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","unstructured":"Jack Choquette. 2023. NVIDIA Hopper H100 GPU: Scaling Performance. IEEE Micro 43 3 (2023) 9\u201317. 10.1109\/MM.2023.3256796","DOI":"10.1109\/MM.2023.3256796"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2015.32"},{"key":"e_1_3_3_1_12_2","unstructured":"Intel Corporation. 2025. Intel Xeon CPU Max Series Product Brief. https:\/\/www.intel.com\/content\/www\/us\/en\/content-details\/765366\/intel-xeon-cpu-max-series-product-brief.html Accessed: 2025-01-25."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","unstructured":"Jack Dongarra Mathieu Faverge Hatem Ltaief and Piotr Luszczek. 2014. Achieving numerical accuracy and high performance using recursive tile LU factorization with partial pivoting. Concurrency and Computation: Practice and Experience 26 7 (2014) 1408\u20131431. 10.1002\/cpe.3110","DOI":"10.1002\/cpe.3110"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","unstructured":"Jack Dongarra Michael\u00a0A Heroux and Piotr Luszczek. 2016. High-performance conjugate-gradient benchmark: A new metric for ranking high-performance computing systems. The International Journal of High Performance Computing Applications 30 1 (2016) 3\u201310. 10.1177\/1094342015593158","DOI":"10.1177\/1094342015593158"},{"key":"e_1_3_3_1_15_2","unstructured":"Jack Dongarra Michael\u00a0A. Heroux and Piotr Luszczek. 2023. High Performance Conjugate Gradient (HPCG). https:\/\/www.hpcg-benchmark.org\/software\/view.html?id=262. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_16_2","unstructured":"Jack Dongarra Petitet\u00a0Antoine Luszczek Piotr and A Cleary. 2018. High Performance Linpack (HPL). https:\/\/www.netlib.org\/benchmark\/hpl\/. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","unstructured":"Jack\u00a0J Dongarra Piotr Luszczek and Antoine Petitet. 2003. The LINPACK benchmark: past present and future. Concurrency and Computation: practice and experience 15 9 (2003) 803\u2013820. 10.1002\/cpe.728","DOI":"10.1002\/cpe.728"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","unstructured":"Lieven Eeckhout. 2017. Is moore\u2019s law slowing down? what\u2019s next? IEEE Micro 37 04 (2017) 4\u20135. 10.1109\/MM.2017.3211123","DOI":"10.1109\/MM.2017.3211123"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-56026-2"},{"key":"e_1_3_3_1_20_2","unstructured":"OpenFOAM Foundation. 2023. Motorbike Benchmark for OpenFOAM. https:\/\/develop.openfoam.com\/Development\/openfoam\/-\/tree\/OpenFOAM-v2306\/tutorials\/incompressible\/simpleFoam\/motorBike. Accessed: 2025-03-18."},{"key":"e_1_3_3_1_21_2","unstructured":"Google. 2025. Multichase Benchmark. https:\/\/github.com\/google\/multichase. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_22_2","unstructured":"Huda Ibeid Vikram Narayana Jeongnim Kim Anthony Nguyen Vitali Morozov and Ye Luo. 2025. Performance Analysis of HPC applications on the Aurora Supercomputer: Exploring the Impact of HBM-Enabled Intel Xeon Max CPUs. arxiv:https:\/\/arXiv.org\/abs\/2504.03632\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2504.03632"},{"key":"e_1_3_3_1_23_2","unstructured":"Advanced Micro\u00a0Devices Inc.2024. AMD AOCL Library. https:\/\/www.amd.com\/en\/developer\/aocl.html. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_24_2","first-page":"1","volume-title":"International workshop on coupled methods in numerical dynamics","author":"Jasak Hrvoje","year":"2007","unstructured":"Hrvoje Jasak, Aleksandar Jemcov, Zeljko Tukovic, et\u00a0al. 2007. OpenFOAM: A C++ library for complex physics simulations. In International workshop on coupled methods in numerical dynamics , Vol.\u00a01000. Dubrovnik, Croatia), 1\u201320."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/IMW.2017.7939084"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/IMW59701.2024.10536972"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER49012.2020.00069"},{"key":"e_1_3_3_1_29_2","first-page":"27","volume-title":"Workshop on Performance Analysis and its Impact on Design held in conjunction with ISCA, Barcelona, Spain","author":"Kogge Peter\u00a0M","year":"1998","unstructured":"Peter\u00a0M Kogge, Jay\u00a0B Brockman, and Vincent Freeh. 1998. Processing-In-Memory Based Systems: Performance Evaluation Considerations. In Workshop on Performance Analysis and its Impact on Design held in conjunction with ISCA, Barcelona, Spain. 27\u201328."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.23919\/VLSICircuits52068.2021.9492415"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-40843-4_30"},{"key":"e_1_3_3_1_32_2","unstructured":"John\u00a0D McCalpin et\u00a0al. 1995. Memory bandwidth and machine balance in current high performance computers. IEEE computer society technical committee on computer architecture (TCCA) newsletter 2 19-25 (1995)."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304031"},{"key":"e_1_3_3_1_34_2","unstructured":"Microsoft Azure. [n. d.]. Microsoft Azure Delivers Purpose-Built Cloud Infrastructure in the Era of AI. https:\/\/azure.microsoft.com\/en-us\/blog\/microsoft-azure-delivers-purpose-built-cloud-infrastructure-in-the-era-of-ai\/. Accessed: 2025-03-11."},{"key":"e_1_3_3_1_35_2","unstructured":"Microsoft Azure. 2025. HBv4-series Azure Virtual Machines Overview. https:\/\/learn.microsoft.com\/en-us\/azure\/virtual-machines\/hbv4-series-overview Accessed: 2025-03-18."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42615.2023.10067540"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","unstructured":"James\u00a0C Phillips Rosemary Braun Wei Wang James Gumbart Emad Tajkhorshid Elizabeth Villa Christophe Chipot Robert\u00a0D Skeel Laxmikant Kale and Klaus Schulten. 2005. Scalable molecular dynamics with NAMD. Journal of computational chemistry 26 16 (2005) 1781\u20131802. 10.1002\/jcc.20289","DOI":"10.1002\/jcc.20289"},{"key":"e_1_3_3_1_38_2","first-page":"15","volume-title":"Presentation in AHUG ISC 21 Workshop","author":"Poenaru Andrei","year":"2021","unstructured":"Andrei Poenaru, Tom Deakin, Simon McIntosh-Smith, Simon\u00a0D Hammond, and Andrew\u00a0J Younge. 2021. An evaluation of the Fujitsu A64FX for HPC applications. In Presentation in AHUG ISC 21 Workshop. 15\u201325."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","unstructured":"Daniel Reed Dennis Gannon and Jack Dongarra. 2023. HPC Forecast: Cloudy and Uncertain. Commun. ACM 66 2 (Jan. 2023) 82\u201390. 10.1145\/3552309","DOI":"10.1145\/3552309"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","unstructured":"Mitsuhisa Sato Yuetsu Kodama Miwako Tsuji and Tesuya Odajima. 2022. Co-Design and System for the Supercomputer \u201cFugaku\u201d. IEEE Micro 42 2 (March 2022) 26\u201334. 10.1109\/MM.2021.3136882","DOI":"10.1109\/MM.2021.3136882"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","unstructured":"Runbin Shi Kaan Kara Christoph Hagleitner Dionysios Diamantopoulos Dimitris Syrivelis and Gustavo Alonso. 2022. Exploiting HBM on FPGAs for data processing. ACM Transactions on Reconfigurable Technology and Systems 15 4 (2022) 1\u201327. 10.1145\/3491238","DOI":"10.1145\/3491238"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","unstructured":"Eva Siegmann Robert\u00a0J Harrison David Carlson Smeet Chheda Anthony Curtis Firat Coskun Raul Gonzalez Daniel Wood and Nikolay\u00a0A Simakov. 2024. First impressions of the sapphire rapids processor with hbm for scientific workloads. SN Computer Science 5 5 (2024) 623. 10.1007\/s42979-024-02958-3","DOI":"10.1007\/s42979-024-02958-3"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3636480.3637097"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/VLSITechnologyandCir46783.2024.10631545"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","unstructured":"Avinash Sodani Roger Gramunt Jesus Corbal Ho-Seop Kim Krishna Vinod Sundaram Chinthamani Steven Hutsell Rajat Agarwal and Yen-Chen Liu. 2016. Knights Landing: Second-Generation Intel Xeon Phi Product. IEEE Micro 36 2 (2016) 34\u201346. 10.1109\/MM.2016.25","DOI":"10.1109\/MM.2016.25"},{"key":"e_1_3_3_1_46_2","unstructured":"Theoretical and Computational\u00a0Biophysics Group. 2018. NAMD STMV Benchmark. https:\/\/www.ks.uiuc.edu\/Research\/namd\/utilities\/stmv\/ Accessed: 2025-03-18."},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","unstructured":"Samuel Williams Andrew Waterman and David Patterson. 2009. Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52 4 (April 2009) 65\u201376. 10.1145\/1498765.1498785","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","unstructured":"Wm.\u00a0A. Wulf and Sally\u00a0A. McKee. 1995. Hitting the memory wall: implications of the obvious. SIGARCH Comput. Archit. News 23 1 (March 1995) 20\u201324. 10.1145\/216585.216588","DOI":"10.1145\/216585.216588"}],"event":{"name":"SCA\/HPCAsia 2026: Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region","location":"Osaka Japan","acronym":"SCA\/HPCAsia 2026"},"container-title":["Proceedings of the Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region"],"original-title":[],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T10:22:45Z","timestamp":1767954165000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3773656.3773684"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,25]]},"references-count":47,"alternative-id":["10.1145\/3773656.3773684","10.1145\/3773656"],"URL":"https:\/\/doi.org\/10.1145\/3773656.3773684","relation":{},"subject":[],"published":{"date-parts":[[2026,1,25]]},"assertion":[{"value":"2026-01-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}