{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T00:16:21Z","timestamp":1760141781138,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031304446"},{"type":"electronic","value":"9783031304453"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-30445-3_9","type":"book-chapter","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T09:02:52Z","timestamp":1682499772000},"page":"101-114","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Benchmarking a\u00a0High Performance Computing Heterogeneous Cluster"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8521-1645","authenticated-orcid":false,"given":"Luisa","family":"Carracciuolo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2767-3726","authenticated-orcid":false,"given":"Davide","family":"Bottalico","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3142-5346","authenticated-orcid":false,"given":"Davide","family":"Michelino","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9439-8771","authenticated-orcid":false,"given":"Gianluca","family":"Sabella","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1491-6151","authenticated-orcid":false,"given":"Bernardino","family":"Spisso","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,4,27]]},"reference":[{"key":"9_CR1","unstructured":"Petitet, A., et al.: A portable implementation of the high-performance linpack benchmark for distributed-memory computers. https:\/\/www.netlib.org\/benchmark\/hpl\/index.html"},{"key":"9_CR2","doi-asserted-by":"publisher","unstructured":"Barone, G.B., et al.: Designing and implementing a high-performance computing heterogeneous cluster. In: 2022 International Conference on Electrical, Computer and Energy Technologies (ICECET), pp. 1\u20136 (2022). https:\/\/doi.org\/10.1109\/ICECET55527.2022.9872709","DOI":"10.1109\/ICECET55527.2022.9872709"},{"key":"9_CR3","doi-asserted-by":"publisher","unstructured":"Bertero, M., et al.: MedIGrid: a medical imaging application for computational grids. In: Proceedings International Parallel and Distributed Processing Symposium, p. 8 (2003). https:\/\/doi.org\/10.1109\/IPDPS.2003.1213457","DOI":"10.1109\/IPDPS.2003.1213457"},{"key":"9_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"700","DOI":"10.1007\/978-3-642-31464-3_71","volume-title":"Parallel Processing and Applied Mathematics","author":"V Boccia","year":"2012","unstructured":"Boccia, V., Carracciuolo, L., Laccetti, G., Lapegna, M., Mele, V.: HADAB: enabling fault tolerance in parallel applications running in distributed environments. In: Wyrzykowski, R., Dongarra, J., Karczewski, K., Wa\u015bniewski, J. (eds.) PPAM 2011, Part I. LNCS, vol. 7203, pp. 700\u2013709. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-31464-3_71"},{"key":"9_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1007\/978-3-642-33518-1_16","volume-title":"Recent Advances in the Message Passing Interface","author":"D Bureddy","year":"2012","unstructured":"Bureddy, D., Wang, H., Venkatesh, A., Potluri, S., Panda, D.K.: OMB-GPU: a micro-benchmark suite for evaluating MPI libraries on GPU clusters. In: Tr\u00e4ff, J.L., Benkner, S., Dongarra, J.J. (eds.) EuroMPI 2012. LNCS, vol. 7490, pp. 110\u2013120. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33518-1_16"},{"key":"9_CR6","doi-asserted-by":"publisher","unstructured":"Carracciuolo, L., et. al: Implementation of a non-linear solver on heterogeneous architectures. Concurr. Comput. Pract. Exp. 30(24), e4903 (2018). https:\/\/doi.org\/10.1002\/cpe.4903","DOI":"10.1002\/cpe.4903"},{"issue":"4","key":"9_CR7","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.6008","volume":"33","author":"L Carracciuolo","year":"2021","unstructured":"Carracciuolo, L., et al.: About the granularity portability of block-based Krylov methods in heterogeneous computing environments. Concurr. Comput. Pract. Exp. 33(4), e6008 (2021). https:\/\/doi.org\/10.1002\/cpe.6008","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"9_CR8","unstructured":"CORAL procurement benchmarks. https:\/\/asc.llnl.gov\/sites\/asc\/files\/2020-06\/CORALBenchmarksProcedure-v26.pdf"},{"key":"9_CR9","series-title":"The Kluwer International Series in Engineering and Computer Science","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/978-1-4613-1401-1_5","volume-title":"Input\/Output in Parallel and Distributed Computer Systems","author":"P Corbett","year":"1996","unstructured":"Corbett, P., et al.: Overview of the MPI-IO parallel I\/O interface. In: Jain, R., Werth, J., Browne, J.C. (eds.) Input\/Output in Parallel and Distributed Computer Systems. SECS, vol. 362, pp. 127\u2013146. Springer, Boston (1996). https:\/\/doi.org\/10.1007\/978-1-4613-1401-1_5"},{"issue":"2","key":"9_CR10","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/s10915-022-01826-7","volume":"91","author":"L D\u2019Amore","year":"2022","unstructured":"D\u2019Amore, L., et al.: a scalable space-time domain decomposition approach for solving large scale nonlinear regularized inverse ill posed problems in 4D variational data assimilation. J. Sci. Comput. 91(2), 59 (2022). https:\/\/doi.org\/10.1007\/s10915-022-01826-7","journal-title":"J. Sci. Comput."},{"key":"9_CR11","doi-asserted-by":"publisher","unstructured":"Fatica, M.: Accelerating Linpack with CUDA on heterogenous clusters. In: 2nd Workshop on General Purpose Processing on Graphics Processing Units. GPGPU-2, pp. 46\u201351. Association for Computing Machinery, New York (2009). https:\/\/doi.org\/10.1145\/1513895.1513901","DOI":"10.1145\/1513895.1513901"},{"issue":"2","key":"9_CR12","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1109\/MM.2017.37","volume":"37","author":"D Foley","year":"2017","unstructured":"Foley, D., et al.: Ultra-performance pascal GPU and NVLink interconnect. IEEE Micro 37(2), 7\u201317 (2017). https:\/\/doi.org\/10.1109\/MM.2017.37","journal-title":"IEEE Micro"},{"key":"9_CR13","doi-asserted-by":"publisher","unstructured":"Gates, M., et al.: SLATE: design of a modern distributed and accelerated linear algebra library. In: International Conference for High Performance Computing, Networking, Storage and Analysis (2019). https:\/\/doi.org\/10.1145\/3295500.3356223","DOI":"10.1145\/3295500.3356223"},{"key":"9_CR14","unstructured":"GPUDirect RDMA - CUDA Toolkit DOC. https:\/\/docs.nvidia.com\/cuda\/gpudirect-rdma\/index.html"},{"key":"9_CR15","unstructured":"HPC Challenge Benchmark. https:\/\/hpcchallenge.org\/hpcc\/"},{"key":"9_CR16","unstructured":"HPL-AI Mixed-Precision Benchmark. https:\/\/hpl-mxp.org\/"},{"key":"9_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/978-3-030-94437-7_7","volume-title":"Performance Evaluation and Benchmarking","author":"N Ihde","year":"2022","unstructured":"Ihde, N., et al.: A survey of big data, high performance computing, and machine learning benchmarks. In: Nambiar, R., Poess, M. (eds.) TPCTC 2021. LNCS, vol. 13169, pp. 98\u2013118. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-030-94437-7_7"},{"key":"9_CR18","unstructured":"InfiniBand network standard. https:\/\/en.wikipedia.org\/wiki\/InfiniBand"},{"key":"9_CR19","unstructured":"Interprocess Communication - Programming Guide : CUDA Toolkit DOC. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html#interprocess-communication"},{"key":"9_CR20","unstructured":"IOzone Filesystem Benchmark. https:\/\/www.iozone.org\/"},{"key":"9_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1007\/978-3-030-34356-9_28","volume-title":"High Performance Computing","author":"KS Khorassani","year":"2019","unstructured":"Khorassani, K.S., Chu, C.-H., Subramoni, H., Panda, D.K.: Performance evaluation of MPI libraries on GPU-enabled OpenPOWER architectures: early experiences. In: Weiland, M., Juckeland, G., Alam, S., Jagode, H. (eds.) ISC High Performance 2019. LNCS, vol. 11887, pp. 361\u2013378. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-34356-9_28"},{"key":"9_CR22","unstructured":"Kraus, J.: An introduction to CUDA-aware MPI. https:\/\/developer.nvidia.com\/blog\/introduction-cuda-aware-mpi\/"},{"key":"9_CR23","doi-asserted-by":"publisher","unstructured":"Li, A., et al.: Tartan: evaluating modern GPU interconnect via a multi-GPU benchmark suite. In: 2018 IEEE International Symposium on Workload Characterization (IISWC), pp. 191\u2013202 (2018). https:\/\/doi.org\/10.1109\/IISWC.2018.8573483","DOI":"10.1109\/IISWC.2018.8573483"},{"key":"9_CR24","unstructured":"NAS Parallel Benchmarks. https:\/\/www.nas.nasa.gov\/software\/npb.html"},{"issue":"2","key":"9_CR25","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1145\/1365490.1365500","volume":"6","author":"J Nickolls","year":"2008","unstructured":"Nickolls, J., et al.: Scalable parallel programming with CUDA. Queue 6(2), 40\u201353 (2008). https:\/\/doi.org\/10.1145\/1365490.1365500","journal-title":"Queue"},{"key":"9_CR26","unstructured":"NVIDIA Mellanox OFED DOC. https:\/\/docs.mellanox.com\/display\/MLNXOFEDv531001\/NVIDIA+MLNX_OFED+Documentation+Rev+5.3-1.0.0.1"},{"key":"9_CR27","unstructured":"Open MPI: Open Source High Performance Computing. https:\/\/www.open-mpi.org\/"},{"key":"9_CR28","unstructured":"Programma Operativo Nazionale Ricerca e Innovazione 2014\u20132020: Progetto IBiSCo. https:\/\/www.na.infn.it\/fondi-esterni\/pon"},{"key":"9_CR29","doi-asserted-by":"publisher","unstructured":"Shamis, P., et al.: UCX: an open source framework for HPC network APIs and beyond. In: IEEE 23rd Annual Symposium on High-Performance Interconnects, pp. 40\u201343 (2015). https:\/\/doi.org\/10.1109\/HOTI.2015.13","DOI":"10.1109\/HOTI.2015.13"},{"key":"9_CR30","doi-asserted-by":"publisher","unstructured":"Shi, R., et al.: Designing efficient small message transfer mechanism for inter-node MPI communication on InfiniBand GPU clusters. In: 21st International Conference on High Performance Computing (HiPC), pp. 1\u201310 (2014). https:\/\/doi.org\/10.1109\/HiPC.2014.7116873","DOI":"10.1109\/HiPC.2014.7116873"},{"key":"9_CR31","unstructured":"Standard Performance Evaluation Corporation. https:\/\/www.spec.org\/"},{"key":"9_CR32","unstructured":"The Exascale Computing Project Website. https:\/\/www.exascaleproject.org\/"},{"key":"9_CR33","unstructured":"The Lustre file system. https:\/\/www.lustre.org\/"},{"key":"9_CR34","unstructured":"The Top 500 list Website. https:\/\/www.top500.org\/"},{"key":"9_CR35","unstructured":"Sterling, T., et al.: BEOWULF: a parallel workstation for scientific computation. In: 24th International Conference on Parallel Processing, pp. 11\u201314. CRC Press (1995)"},{"key":"9_CR36","unstructured":"Wong, P., et al.: NAS parallel benchmarks I\/O version 2.4. NAS Technical report NAS-03-002 (2003)"}],"container-title":["Lecture Notes in Computer Science","Parallel Processing and Applied Mathematics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-30445-3_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T04:45:16Z","timestamp":1760071516000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-30445-3_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031304446","9783031304453"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-30445-3_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"27 April 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PPAM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Parallel Processing and Applied Mathematics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Gdansk","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ppam2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ppam.edu.pl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}