{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T07:40:01Z","timestamp":1743752401855,"version":"3.40.3"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T00:00:00Z","timestamp":1727049600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T00:00:00Z","timestamp":1727049600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000993","name":"Battelle Memorial Institute","doi-asserted-by":"publisher","award":["DE-AC06-76RL01830"],"award-info":[{"award-number":["DE-AC06-76RL01830"]}],"id":[{"id":"10.13039\/100000993","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,23]]},"DOI":"10.1109\/hpec62836.2024.10938434","type":"proceedings-article","created":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T19:07:19Z","timestamp":1743707239000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Distributed-Memory Sparse Deep Neural Network Inference Using Global Arrays"],"prefix":"10.1109","author":[{"given":"Bruce","family":"Palmer","sequence":"first","affiliation":[{"name":"Pacific Northwest National Laboratory,Richland,WA,USA"}]},{"given":"Sayan","family":"Ghosh","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory,Richland,WA,USA"}]},{"given":"Andr\u00e9s","family":"M\u00e1rquez","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory,Richland,WA,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/superc.1990.129995"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3544559"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1504\/IJHPCN.2004.007569"},{"key":"ref4","article-title":"Introduction to upc and language specification. Technical report","volume-title":"Technical Report CCS-T R-99\u2013157, IDA Center for Computing Sciences","author":"Carlson","year":"1999"},{"key":"ref5","first-page":"249","volume-title":"Padua et al.[32]","author":"Chamberlain","year":"2005"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2020373.2020375"},{"issue":"10","key":"ref7","doi-asserted-by":"crossref","first-page":"519","DOI":"10.1145\/1103845.1094852","article-title":"X10: an object-oriented approach to non-uniform cluster computing","volume":"40","author":"Charles","year":"2005","journal-title":"Acm Sigplan Notices"},{"key":"ref8","first-page":"95","article-title":"ScaLAPACK: A portable linear algebra library for distributed memory computers - design issues and performance","volume-title":"Applied Parallel Computing Computations in Physics, Chemistry and Engineering Science","author":"Jaeyoung\u2019Choi","year":"1996"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3276493"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2014.7116712"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2211477"},{"key":"ref12","article-title":"Sparse networks from scratch: Faster training without losing performance","author":"Dettmers","year":"2019","journal-title":"arXiv preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2012.72"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/567806.567810"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI49596.2019.00009"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC43674.2020.9286206"},{"issue":"241","key":"ref17","first-page":"1","article-title":"Sparsity in deep learning: Pruning and growth for efficient inference and training in neural networks","volume":"22","author":"Hoefler","year":"2021","journal-title":"Journal of Machine Learning Research"},{"volume-title":"Metis: A software package for partitioning unstructured graphs, partitioning meshes, and computing fill-reducing orderings of sparse matrices","year":"1997","author":"Karypis","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2016.7761646"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2019.8916336"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICPPW.2010.62"},{"key":"ref22","article-title":"Exploring the regularity of sparse structure in convolutional neural networks","author":"Mao","year":"2017","journal-title":"ar Xiv preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC43674.2020.9286189"},{"key":"ref24","article-title":"Armci: A portable aggregate remote memory copy interface","author":"Nieplocha","year":"2000","journal-title":"Citeseer. Citeseer"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/SUPERC.1994.344297"},{"issue":"2","key":"ref26","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1007\/BF00130708","article-title":"Global Arrays: A nonuniform memory access programming model for high-performance computers","volume":"10","author":"Nieplocha","year":"1996","journal-title":"The Journal of Supercomputing"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/289918.289920","article-title":"Co-array fortran for parallel programming","volume":"17","author":"Numrich","year":"1998","journal-title":"ACM Sigplan Fortran Forum"},{"issue":"27","key":"ref28","first-page":"31","article-title":"CUDA Nvidia. Cublas library","volume":"15","year":"2008","journal-title":"NVIDIA Corporation, Santa Clara, California"},{"key":"ref29","article-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism","author":"Shoeybi","year":"2019","journal-title":"arXiv preprint"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2015.48"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.35"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC55821.2022.9926300"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2010.04.018"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1002\/wcms.62"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC49654.2021.9622791"},{"key":"ref36","article-title":"Accelerate science on perlmutter with nersc","volume":"65","author":"Yang","year":"2020","journal-title":"Bulletin of the American Physical Society"}],"event":{"name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","start":{"date-parts":[[2024,9,23]]},"location":"Wakefield, MA, USA","end":{"date-parts":[[2024,9,27]]}},"container-title":["2024 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10938401\/10938415\/10938434.pdf?arnumber=10938434","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T07:04:00Z","timestamp":1743750240000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10938434\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,23]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/hpec62836.2024.10938434","relation":{},"subject":[],"published":{"date-parts":[[2024,9,23]]}}}