{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T07:42:35Z","timestamp":1774942955711,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3581784.3607030","type":"proceedings-article","created":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T21:39:16Z","timestamp":1699997956000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["5 ExaFlop\/s HPL-MxP Benchmark with Linear Scalability on the 40-Million-Core Sunway Supercomputer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0321-5405","authenticated-orcid":false,"given":"Rongfen","family":"Lin","sequence":"first","affiliation":[{"name":"National Research Center of Parallel Computer Engineering &amp; Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1375-6435","authenticated-orcid":false,"given":"Xinhui","family":"Yuan","sequence":"additional","affiliation":[{"name":"National Research Center of Parallel Computer Engineering &amp; Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9740-6581","authenticated-orcid":false,"given":"Wei","family":"Xue","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"},{"name":"Qinghai University, Xining, Qinghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3680-2681","authenticated-orcid":false,"given":"Wanwang","family":"Yin","sequence":"additional","affiliation":[{"name":"National Research Center of Parallel Computer Engineering &amp; Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6514-3119","authenticated-orcid":false,"given":"Jienan","family":"Yao","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6122-8868","authenticated-orcid":false,"given":"Junda","family":"Shi","sequence":"additional","affiliation":[{"name":"National Research Center of Parallel Computer Engineering &amp; Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7122-4986","authenticated-orcid":false,"given":"Qiang","family":"Sun","sequence":"additional","affiliation":[{"name":"National Research Center of Parallel Computer Engineering &amp; Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0988-2760","authenticated-orcid":false,"given":"Chaobo","family":"Song","sequence":"additional","affiliation":[{"name":"National Research Center of Parallel Computer Engineering &amp; Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5131-2520","authenticated-orcid":false,"given":"Fei","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2023,11,11]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"217","article-title":"A parallel implementation of matrix multiplication and LU factorization on the IBM 3090","volume":"2","author":"Agarwal Ramesh C","year":"1988","unstructured":"Ramesh C Agarwal and Fred G Gustavson . 1988 . A parallel implementation of matrix multiplication and LU factorization on the IBM 3090 . In Proceedings of the IFIP WG , Vol. 2. 217 -- 221 . Ramesh C Agarwal and Fred G Gustavson. 1988. A parallel implementation of matrix multiplication and LU factorization on the IBM 3090. In Proceedings of the IFIP WG, Vol. 2. 217--221.","journal-title":"Proceedings of the IFIP WG"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/AICCSA.2011.6126599"},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2022. AMD cdna2 white-paper. https:\/\/www.amd.com\/system\/files\/documents\/amd-cdna2-white-paper.pdf  AMD. 2022. AMD cdna2 white-paper. https:\/\/www.amd.com\/system\/files\/documents\/amd-cdna2-white-paper.pdf"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00450-011-0161-5"},{"key":"e_1_3_2_1_5_1","volume-title":"Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, and Yi Zhang.","author":"Bubeck Sebastien","year":"2023","unstructured":"Sebastien Bubeck , Varun Chandrasekaran , Ronen Eldan , Johannes Gehrke , Eric Horvitz , Ece Kamar , Peter Lee , Yin Tat Lee , Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, and Yi Zhang. 2023 . Sparks of Artificial General Intelligence: Early experiments with GPT- 4. arXiv:2303.12712v2 Sebastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, and Yi Zhang. 2023. Sparks of Artificial General Intelligence: Early experiments with GPT-4. arXiv:2303.12712v2"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1137\/17M1122918"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00450-011-0169-x"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3110"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1513895.1513901"},{"key":"e_1_3_2_1_10_1","unstructured":"Frontier. 2023. ORNL. https:\/\/www-olcf-ornl-gov-s.libyc.nudt.edu.cn\/olcf-resources\/compute-systems\/frontier\/  Frontier. 2023. ORNL. https:\/\/www-olcf-ornl-gov-s.libyc.nudt.edu.cn\/olcf-resources\/compute-systems\/frontier\/"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-016-5588-7"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2005.42"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00050"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.113"},{"key":"e_1_3_2_1_15_1","unstructured":"(Mike) Tsai Jack Dongarra Piotr LuszczekYaohung. 2023. HPL-MxP MIXED-PRECISION BENCHMARK. https:\/\/hpl-mxp.org\/  (Mike) Tsai Jack Dongarra Piotr LuszczekYaohung. 2023. HPL-MxP MIXED-PRECISION BENCHMARK. https:\/\/hpl-mxp.org\/"},{"key":"e_1_3_2_1_16_1","unstructured":"(Mike) Tsai Jack Dongarra Piotr LuszczekYaohung. 2023. Top 500 List of supercomputers. https:\/\/www.top500.org  (Mike) Tsai Jack Dongarra Piotr LuszczekYaohung. 2023. Top 500 List of supercomputers. https:\/\/www.top500.org"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2014.2321742"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524059.3532370"},{"key":"e_1_3_2_1_19_1","volume-title":"2020 IEEE\/ACM 11th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA).","author":"Kudo S.","unstructured":"S. Kudo , K. Nitadori , T. Ina , and T. Imamura . 2020. Implementation and Numerical Techniques for One EFlop\/s HPL-AI Benchmark on Fugaku . In 2020 IEEE\/ACM 11th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA). S. Kudo, K. Nitadori, T. Ina, and T. Imamura. 2020. Implementation and Numerical Techniques for One EFlop\/s HPL-AI Benchmark on Fugaku. In 2020 IEEE\/ACM 11th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/1272430.1272431"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2012.242"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00083"},{"key":"e_1_3_2_1_23_1","unstructured":"University of Tennesse. 2022. High Performance Linpack Algorithm. http:\/\/www.netlib.org\/benchmark\/hpl\/algorithm.html  University of Tennesse. 2022. High Performance Linpack Algorithm. http:\/\/www.netlib.org\/benchmark\/hpl\/algorithm.html"},{"key":"e_1_3_2_1_24_1","unstructured":"Ornl. 2023. ORNL official Web. https:\/\/www.ornl.gov\/directorate\/ccsd  Ornl. 2023. ORNL official Web. https:\/\/www.ornl.gov\/directorate\/ccsd"},{"key":"e_1_3_2_1_25_1","unstructured":"OSTI. 2021. Fugaku report. https:\/\/science.osti.gov\/-\/media\/ascr\/ascac\/pdf\/meetings\/202107\/ASCAC_meeting_202107-Fugaku.pdf  OSTI. 2021. Fugaku report. https:\/\/science.osti.gov\/-\/media\/ascr\/ascac\/pdf\/meetings\/202107\/ASCAC_meeting_202107-Fugaku.pdf"},{"key":"e_1_3_2_1_26_1","unstructured":"Permutter. 2022. Permutter official Web. https:\/\/docs.nersc.gov\/systems\/perlmutter\/system_details\/  Permutter. 2022. Permutter official Web. https:\/\/docs.nersc.gov\/systems\/perlmutter\/system_details\/"},{"key":"e_1_3_2_1_27_1","unstructured":"Antoine Petitet. 2004. HPL-a portable implementation of the high-performance Linpack benchmark for distributed-memory computers. http:\/\/www.netlib.org\/benchmark\/hpl\/ (2004).  Antoine Petitet. 2004. HPL-a portable implementation of the high-performance Linpack benchmark for distributed-memory computers. http:\/\/www.netlib.org\/benchmark\/hpl\/ (2004)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1377612.1377615"},{"key":"e_1_3_2_1_29_1","unstructured":"RIKEN-RCCS. 2022. RIKEN-offical web. https:\/\/www.riken.jp\/  RIKEN-RCCS. 2022. RIKEN-offical web. https:\/\/www.riken.jp\/"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2015.89"},{"key":"e_1_3_2_1_31_1","volume-title":"SC20: International Conference for High Performance Computing, Networking, Storage and Analysis.","author":"Sato M.","unstructured":"M. Sato , Y. Ishikawa , H. Tomita , Y. Kodama , and T. Shimizu . 2020. Co-Design for A64FX Manycore Processor and \"Fugaku \". In SC20: International Conference for High Performance Computing, Networking, Storage and Analysis. M. Sato, Y. Ishikawa, H. Tomita, Y. Kodama, and T. Shimizu. 2020. Co-Design for A64FX Manycore Processor and \"Fugaku\". In SC20: International Conference for High Performance Computing, Networking, Storage and Analysis."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2013.6702619"},{"key":"e_1_3_2_1_33_1","unstructured":"Summit. 2023. ORNL. https:\/\/www-olcf-ornl-gov-s.libyc.nudt.edu.cn\/olcf-resources\/compute-systems\/summit\/  Summit. 2023. ORNL. https:\/\/www-olcf-ornl-gov-s.libyc.nudt.edu.cn\/olcf-resources\/compute-systems\/summit\/"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3067731"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4330020102"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5214359"}],"event":{"name":"SC '23: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver CO USA","acronym":"SC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607030","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581784.3607030","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:22Z","timestamp":1750178182000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607030"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,11]]},"references-count":36,"alternative-id":["10.1145\/3581784.3607030","10.1145\/3581784"],"URL":"https:\/\/doi.org\/10.1145\/3581784.3607030","relation":{},"subject":[],"published":{"date-parts":[[2023,11,11]]},"assertion":[{"value":"2023-11-11","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}