{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:01:07Z","timestamp":1768028467874,"version":"3.49.0"},"reference-count":39,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1109\/ipdps.2004.1303000","type":"proceedings-article","created":{"date-parts":[[2004,6,10]],"date-time":"2004-06-10T10:19:45Z","timestamp":1086862785000},"page":"70-79","source":"Crossref","is-referenced-by-count":30,"title":["SRUMMA: a matrix multiplication algorithm suitable for clusters and scalable shared memory systems"],"prefix":"10.1109","author":[{"given":"M.","family":"Krishnan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Nieplocha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1096-9128(199704)9:4<255::AID-CPE250>3.0.CO;2-2"},{"key":"35","article-title":"Protocols and strategies for optimizing remote memory operations on clusters","author":"nieplocha","year":"2002","journal-title":"Proc CAC\/IPDPS'02"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4330060703"},{"key":"36","year":"0","journal-title":"Tom Dunigan's Evaluation of Early Systems Webpage"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1147\/rd.386.0673"},{"key":"33","year":"0","journal-title":"ARMCI Web Page"},{"key":"15","author":"ranka","year":"1990","journal-title":"Hypercube Algorithms for Image Processing and Pattern Recognition"},{"key":"34","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1023\/A:1022800521563","article-title":"One-sided communication on myrinet","volume":"6","author":"nieplocha","year":"2003","journal-title":"Cluster Computing"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4330060702"},{"key":"39","article-title":"Where's the overlap? Overlapping communication and computation in several popular MPI implementations","author":"white","year":"1999","journal-title":"Proc Third MPI Developer's and User's Conference"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1096-9128(199705)9:5<345::AID-CPE258>3.0.CO;2-7"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1137\/0210049"},{"key":"37","article-title":"Exploiting non-blocking remote memory access communication in scientific benchmarks","author":"tipparaju","year":"2003","journal-title":"Proc HiPC'2003"},{"key":"11","first-page":"447","article-title":"Matrix multiplication on hypercubes using full bandwidth and constant storage","author":"ho","year":"1991","journal-title":"Sixth Distributed Memory Computing Conference"},{"key":"38","article-title":"COMB: A portable benchmark suite for assessing MPI overlap","author":"lawry","year":"2002","journal-title":"IEEE Cluster"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1145\/181014.181434"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719642"},{"key":"20","article-title":"A proposal for a set of parallel basic linear algebra subprograms","volume":"cs 95 292","author":"choi","year":"1995","journal-title":"University of Tennessee Technical Report"},{"key":"22","article-title":"A fast scalable universal matrix multiplication algorithm on distributed-memory concurrent computers","author":"choi","year":"1997","journal-title":"Proc of IPPS'97"},{"key":"23","article-title":"OpenMP issues arising in the development of parallel BLAS and LAPACK libraries","author":"addison","year":"2001","journal-title":"Proceedings EWOMP'01"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.588"},{"key":"25","doi-asserted-by":"crossref","DOI":"10.1023\/A:1015186928868","article-title":"Mixed mode matrix multiplication","author":"wu","year":"2002","journal-title":"IEEE Cluster'02"},{"key":"26","article-title":"Performance analysis of various parallelization methods for BLAS3 routines on cluster architectures","author":"betcke","year":"2000","journal-title":"John von Neumann-Instituts fu?r Computing Tech Rep"},{"key":"27","article-title":"The implementation of MPI-2 one-sided communication for the NEC SX-5","author":"tra?ff","year":"2000","journal-title":"Proceedings of Supercomputing"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1145\/782814.782855"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTR.2003.1253309"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1016\/0167-8191(87)90060-3"},{"key":"2","author":"cannon","year":"1969","journal-title":"A cellular computer to implement the Kalman filter algorithm"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1109\/SPLC.1993.365573"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.1998.10004"},{"key":"30","author":"grama","year":"2003","journal-title":"Introduction to Parallel Computing"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.1993.160"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1016\/0167-8191(89)90091-4"},{"key":"32","article-title":"ARMCI: A portable remote memory copy library for distributed array libraries and compiler run-time systems","author":"nieplocha","year":"1999","journal-title":"Proc RTSPP IPPS\/SDP"},{"key":"5","author":"golub","year":"1989","journal-title":"Matrix Computations"},{"key":"31","year":"0","journal-title":"Optimizing Applications on the Cray X1TM System"},{"key":"4","volume":"1","author":"fox","year":"1988","journal-title":"Solving Problems on Concurrent Processors"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1145\/315891.315965"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/SHPCC.1992.232648"}],"event":{"name":"18th International Parallel and Distributed Processing Symposium, 2004.","location":"Santa Fe, NM, USA"},"container-title":["18th International Parallel and Distributed Processing Symposium, 2004. Proceedings."],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/9132\/28950\/01303000.pdf?arnumber=1303000","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,16]],"date-time":"2017-06-16T03:57:40Z","timestamp":1497585460000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/1303000\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/ipdps.2004.1303000","relation":{},"subject":[]}}