{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T12:27:18Z","timestamp":1764937638697,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T00:00:00Z","timestamp":1699747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624249","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1688-1696","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Advancing the distributed Multi-GPU ChASE library through algorithm optimization and NCCL library"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5716-3116","authenticated-orcid":false,"given":"Xinzhe","family":"Wu","sequence":"first","affiliation":[{"name":"J\u00fclich Supercomputing Centre, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5821-5897","authenticated-orcid":false,"given":"Edoardo","family":"Di Napoli","sequence":"additional","affiliation":[{"name":"J\u00fclich Supercomputing Centre, Germany"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2015.06.003"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01600502"},{"key":"e_1_3_2_2_3_1","volume-title":"The Eigenvalue Problem for Structural Systems with Statistical Properties.AIAA journal 7, 4","author":"Collins D.","year":"1969","unstructured":"J.\u00a0D. Collins and W.\u00a0T. Thomson. 1969. The Eigenvalue Problem for Structural Systems with Statistical Properties.AIAA journal 7, 4 (1969), 642\u2013648."},{"key":"e_1_3_2_2_4_1","unstructured":"J. Demmel L. Grigori M. Hoemmen and J. Langou. 2008. Communication-avoiding parallel and sequential QR factorizations. CoRR abs\/0806.2159 (2008)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"E. Di\u00a0Napoli S. Bl\u00fcgel and P. Bientinesi. 2012. Correlations in sequences of generalized eigenproblems arising in Density Functional Theory. Computer physics communications 183 8 (2012) 1674\u20131682.","DOI":"10.1016\/j.cpc.2012.03.006"},{"key":"e_1_3_2_2_6_1","unstructured":"ELPA. 2014. Eigenvalue Solvers for Petaflop-Applications (ELPA). https:\/\/elpa.mpcdf.mpg.de\/"},{"key":"e_1_3_2_2_7_1","unstructured":"FLEUR. 2023. FLEUR a feature-full freely available FLAPW (full-potential linearized augmented planewave) code based on density-functional theory.. https:\/\/www.flapw.de\/MaX-6.0"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1137\/18M1218212"},{"volume-title":"2014 5th workshop on latest advances in scalable algorithms for large-scale systems. IEEE, 31\u201338","author":"Fukaya T.","key":"e_1_3_2_2_9_1","unstructured":"T. Fukaya, Y. Nakatsukasa, Y. Yanagisawa, and Y. Yamamoto. 2014. CholeskyQR2: a simple and communication-avoiding algorithm for computing a tall-skinny QR factorization on a large-scale parallel system. In 2014 5th workshop on latest advances in scalable algorithms for large-scale systems. IEEE, 31\u201338."},{"key":"e_1_3_2_2_10_1","volume-title":"GPU Technology Conference (GTC), Vol.\u00a02.","author":"Jeaugey S.","year":"2017","unstructured":"S. Jeaugey. 2017. Nccl 2.0. In GPU Technology Conference (GTC), Vol.\u00a02."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1088\/0953-8984\/26\/21\/213201"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1377603.1377611"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF02219773"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1137\/S1064827598336951"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2764454"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"E. Wang Q. Zhang B. Shen G. Zhang X. Lu Q. Wu and Y. Wang. 2014. Intel Math Kernel Library. In High-Performance Computing on the Intel\u00ae Xeon Phi\u2122. Springer 167\u2013188.","DOI":"10.1007\/978-3-319-06486-4_7"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313828"},{"volume-title":"Proceedings of the Platform for Advanced Scientific Computing Conference. 1\u201312","author":"Wu X.","key":"e_1_3_2_2_18_1","unstructured":"X. Wu, D. Davidovi\u0107, S Achilles, and E. Di\u00a0Napoli. 2022. ChASE: a distributed hybrid CPU-GPU eigensolver for large-scale hermitian eigenvalue problems. In Proceedings of the Platform for Advanced Scientific Computing Conference. 1\u201312."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"V.\u00a0W. Yu J. Moussa P. K\u016fs A. Marek P. Messmer M. Yoon H. Lederer and V. Blum. 2021. GPU-acceleration of the ELPA2 Distributed Eigensolver for Dense Symmetric and Hermitian Eigenproblems. Computer Physics Communications 262 (5 2021) 107808.","DOI":"10.1016\/j.cpc.2020.107808"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2021.108081"},{"key":"e_1_3_2_2_21_1","unstructured":"X. Zhang Q. Wang and C. Zaheer. 2012. OpenBLAS. URL: http:\/\/xianyi. github. io\/OpenBLAS 88 (2012)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2014.06.056"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.74.066704"}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","acronym":"SC-W 2023","location":"Denver CO USA"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624249","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624249","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:02:28Z","timestamp":1755745348000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624249"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":23,"alternative-id":["10.1145\/3624062.3624249","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624249","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}