{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:39:50Z","timestamp":1750307990867,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2006,5,3]],"date-time":"2006-05-03T00:00:00Z","timestamp":1146614400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2006,5,3]]},"DOI":"10.1145\/1128022.1128054","type":"proceedings-article","created":{"date-parts":[[2006,5,8]],"date-time":"2006-05-08T21:40:43Z","timestamp":1147124443000},"page":"229-240","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Memory efficient parallel matrix multiplication operation for irregular problems"],"prefix":"10.1145","author":[{"given":"Manojkumar","family":"Krishnan","sequence":"first","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, WA"}]},{"given":"Jarek","family":"Nieplocha","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, WA"}]}],"member":"320","published-online":{"date-parts":[[2006,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2004.1303000"},{"key":"e_1_3_2_1_2_1","volume-title":"dissertation","author":"Cannon L. E.","year":"1969","unstructured":"L. E. Cannon , \" A cellular computer to implement the Kalman Filter Algorithm\", Ph. D. dissertation , Montana State University , 1969 .]] L. E. Cannon, \"A cellular computer to implement the Kalman Filter Algorithm\", Ph.D. dissertation, Montana State University, 1969.]]"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/0167-8191(87)90060-3"},{"key":"e_1_3_2_1_4_1","volume-title":"Solving Problems on Concurrent Processors","author":"Fox G. C.","year":"1988","unstructured":"G. C. Fox , M. Johnson , G. Lyzenga , S. Otto , J. Salmon , and D. Walker , Solving Problems on Concurrent Processors . vol. 1 , Prentice Hall , 1988 .]] G. C. Fox, M. Johnson, G. Lyzenga, S. Otto, J. Salmon, and D. Walker, Solving Problems on Concurrent Processors. vol. 1, Prentice Hall, 1988.]]"},{"key":"e_1_3_2_1_5_1","volume-title":"Matrix Computations","author":"Golub G.H.","year":"1989","unstructured":"G.H. Golub , C.H Van Loan . Matrix Computations . Johns Hopkins University Press , 1989 .]] G.H. Golub, C.H Van Loan. Matrix Computations. Johns Hopkins University Press, 1989.]]"},{"key":"e_1_3_2_1_6_1","volume-title":"Parallel Computing","volume":"12","author":"Berntsen J.","year":"1989","unstructured":"J. Berntsen , Communication efficient matrix multiplication on hypercubes , Parallel Computing , vol. 12 , 1989 .]] J. Berntsen, Communication efficient matrix multiplication on hypercubes, Parallel Computing, vol. 12, 1989.]]"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.1993.160"},{"volume-title":"SHPCC '92","author":"Lin C.","key":"e_1_3_2_1_8_1","unstructured":"C. Lin and L. Snyder , \"A matrix product algorithm and its comparative performance on hypercubes \", SHPCC '92 .]] C. Lin and L.Snyder, \"A matrix product algorithm and its comparative performance on hypercubes\", SHPCC '92.]]"},{"key":"e_1_3_2_1_9_1","unstructured":"Q. Luo and J. B. Drake A Scalable Parallel Strassen's Matrix Multiply Algorithm for Distributed Memory Computers http:\/\/citeseer.nj.nec.com\/517382.html]]  Q. Luo and J. B. Drake A Scalable Parallel Strassen's Matrix Multiply Algorithm for Distributed Memory Computers http:\/\/citeseer.nj.nec.com\/517382.html]]"},{"key":"e_1_3_2_1_10_1","volume-title":"Comparison of Scalable Parallel Matrix Multiplication Libraries,\" in Proceedings of the Scalable Parallel Libraries Conference","author":"Huss-Lederman S.","year":"1994","unstructured":"S. Huss-Lederman , E. M. Jacobson , and A. Tsao , \" Comparison of Scalable Parallel Matrix Multiplication Libraries,\" in Proceedings of the Scalable Parallel Libraries Conference , 1994 .]] S. Huss-Lederman, E. M. Jacobson, and A. Tsao, \"Comparison of Scalable Parallel Matrix Multiplication Libraries,\" in Proceedings of the Scalable Parallel Libraries Conference, 1994.]]"},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. 6 Distributed Memory Computing Conference.","author":"Ho C. T.","year":"1991","unstructured":"C. T. Ho , S. L. Johnsson , A. Edelman, Matrix multiplication on hypercubes using full bandwidth and constant storage , Proc. 6 Distributed Memory Computing Conference. 1991 .]] C. T. Ho, S. L. Johnsson, A. Edelman, Matrix multiplication on hypercubes using full bandwidth and constant storage, Proc. 6 Distributed Memory Computing Conference. 1991.]]"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/181014.181434"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1096-9128(199705)9:5<345::AID-CPE258>3.0.CO;2-7"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1137\/0210049"},{"key":"e_1_3_2_1_15_1","volume-title":"Hypercube Algorithms for Image Processing and Pattern Recognition","author":"Ranka S.","year":"1990","unstructured":"S. Ranka , S. Sahni . Hypercube Algorithms for Image Processing and Pattern Recognition . Springer-Verlag , 1990 .]] S. Ranka, S. Sahni. Hypercube Algorithms for Image Processing and Pattern Recognition. Springer-Verlag, 1990.]]"},{"volume-title":"PUMMA: Parallel Universal Matrix Multiplication Algorithms on distributed memory concurrent computers,\" Concurrency: Practice and Experience","author":"Choi J.","key":"e_1_3_2_1_16_1","unstructured":"J. Choi , J. Dongarra , and D. W. Walker , \" PUMMA: Parallel Universal Matrix Multiplication Algorithms on distributed memory concurrent computers,\" Concurrency: Practice and Experience , vol. 6(7), 1994 .]] J. Choi, J. Dongarra, and D. W. Walker, \"PUMMA: Parallel Universal Matrix Multiplication Algorithms on distributed memory concurrent computers,\" Concurrency: Practice and Experience, vol. 6(7), 1994.]]"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4330060703"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.386.0673"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/899248"},{"key":"e_1_3_2_1_21_1","volume-title":"ScaLAPACK Users' Guide","author":"L. S.","year":"1997","unstructured":"L. S. Blackford et. al. , ScaLAPACK Users' Guide , SIAM , 1997 , Philadelphia, PA.]] L. S. Blackford et. al., ScaLAPACK Users' Guide, SIAM, 1997, Philadelphia, PA.]]"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 11th International Parallel Processing Symposium (IPPS '97)","author":"Choi J.","year":"1997","unstructured":"J. Choi , \" A Fast Scalable Universal Matrix Multiplication Algorithm on Distributed-Memory Concurrent Computers\" , in Proceedings of the 11th International Parallel Processing Symposium (IPPS '97) , 1997 .]] J. Choi, \"A Fast Scalable Universal Matrix Multiplication Algorithm on Distributed-Memory Concurrent Computers\", in Proceedings of the 11th International Parallel Processing Symposium (IPPS '97), 1997.]]"},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings EWOMP'01","author":"Addison C.","year":"2001","unstructured":"C. Addison and Y. Ren , \" OpenMP Issues Arising in the Development of Parallel BLAS and LAPACK libraries \", in Proceedings EWOMP'01 . 2001 .]] C. Addison and Y. Ren, \"OpenMP Issues Arising in the Development of Parallel BLAS and LAPACK libraries\", in Proceedings EWOMP'01. 2001.]]"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.588"},{"volume-title":"Intl. Conf. Cluster Computing '02","author":"Wu M.","key":"e_1_3_2_1_25_1","unstructured":"M. Wu , S. Aluru , and R. A. Kendall , \" Mixed Mode Matrix Multiplication \", Intl. Conf. Cluster Computing '02 .]] M. Wu, S. Aluru, and R. A. Kendall, \"Mixed Mode Matrix Multiplication\", Intl. Conf. Cluster Computing '02.]]"},{"key":"e_1_3_2_1_26_1","unstructured":"T. Betcke \"Performance analysis of various parallelization methods for BLAS3 routines on cluster architectures\" John von Neumann-Instituts fur Computing Tech. Rep. FZJ-ZAM-IB-2000-15 2000.]]  T. Betcke \"Performance analysis of various parallelization methods for BLAS3 routines on cluster architectures\" John von Neumann-Instituts fur Computing Tech. Rep. FZJ-ZAM-IB-2000-15 2000.]]"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of Supercomputing","author":"Tr\u00e4ff J. L.","year":"2000","unstructured":"J. L. Tr\u00e4ff , H. Ritzdorf , R. Hempel \" The Implementation of MPI-2 One-Sided Communication for the NEC SX-5 \", in Proceedings of Supercomputing , 2000 .]] J. L. Tr\u00e4ff, H. Ritzdorf, R. Hempel \"The Implementation of MPI-2 One-Sided Communication for the NEC SX-5\", in Proceedings of Supercomputing, 2000.]]"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/782814.782855"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTR.2003.1253309"},{"key":"e_1_3_2_1_30_1","volume-title":"Introduction to Parallel Computing","author":"Grama A.","year":"2003","unstructured":"A. Grama , A. Gupta , G. Karypis , and V. Kumar , Introduction to Parallel Computing , Addison Wesley , 2003 .]] A. Grama, A. Gupta, G. Karypis, and V. Kumar, Introduction to Parallel Computing, Addison Wesley, 2003.]]"},{"key":"e_1_3_2_1_31_1","unstructured":"Cray Online documentation. Optimizing Applications on the Cray X1TM System. http:\/\/www.cray.com\/craydoc\/20\/manuals\/S-2315-50\/html-S-2315-50\/S-2315-50-toc.html]]  Cray Online documentation. Optimizing Applications on the Cray X1TM System. http:\/\/www.cray.com\/craydoc\/20\/manuals\/S-2315-50\/html-S-2315-50\/S-2315-50-toc.html]]"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.5555\/645611.662053"},{"key":"e_1_3_2_1_33_1","unstructured":"ARMCI Web page. http:\/\/www.emsl.pnl.gov\/docs\/parsoft\/armci\/]]  ARMCI Web page. http:\/\/www.emsl.pnl.gov\/docs\/parsoft\/armci\/]]"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022800521563"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/645610.661733"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2004.60"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1063\/1.456153"},{"key":"e_1_3_2_1_38_1","unstructured":"Y. Alexeev M. Valiev D. A. Dixon T. L. Windus \"Ab initio study of catalytic GTP hydrolysis\" J. of ACS '04.]]  Y. Alexeev M. Valiev D. A. Dixon T. L. Windus \"Ab initio study of catalytic GTP hydrolysis\" J. of ACS '04.]]"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1096-987X(19960115)17:1<109::AID-JCC9>3.0.CO;2-V"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1008167621154"},{"key":"e_1_3_2_1_41_1","volume-title":"TR-95-39, Department of Computer Sciences","author":"Edwards C.","year":"1995","unstructured":"C. Edwards , P. Geng , A. Patra , and R. Van De Geign , \" Parallel Matrix Distributions : Have we been doing it all wrong? \", TR-95-39, Department of Computer Sciences , University of Texas , Oct. 1995 .]] C. Edwards, P. Geng, A. Patra, and R.Van De Geign, \"Parallel Matrix Distributions: Have we been doing it all wrong?\", TR-95-39, Department of Computer Sciences, University of Texas, Oct. 1995.]]"},{"key":"e_1_3_2_1_42_1","volume-title":"IPDPS","author":"Lee Hyuk-Jae","year":"1995","unstructured":"Hyuk-Jae Lee , J.A. B. Fortes, Toward data distribution independent parallel matrix multiplication , IPDPS , 1995 .]] Hyuk-Jae Lee, J.A.B. Fortes, Toward data distribution independent parallel matrix multiplication, IPDPS, 1995.]]"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/181181.181563"},{"key":"e_1_3_2_1_44_1","volume-title":"HPC Symposium","author":"Lu Ioana","year":"1998","unstructured":"Banicescu, Ioana and R. Lu , Experiences with Fractiling in N-Body Simulations , HPC Symposium , 1998 .]] Banicescu, Ioana and R. Lu, Experiences with Fractiling in N-Body Simulations, HPC Symposium, 1998.]]"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0010-4655(00)00065-5"}],"event":{"name":"CF06: Computing Frontiers Conference","sponsor":["ACM Association for Computing Machinery","SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"],"location":"Ischia Italy","acronym":"CF06"},"container-title":["Proceedings of the 3rd conference on Computing frontiers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1128022.1128054","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1128022.1128054","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T15:06:16Z","timestamp":1750259176000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1128022.1128054"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006,5,3]]},"references-count":44,"alternative-id":["10.1145\/1128022.1128054","10.1145\/1128022"],"URL":"https:\/\/doi.org\/10.1145\/1128022.1128054","relation":{},"subject":[],"published":{"date-parts":[[2006,5,3]]},"assertion":[{"value":"2006-05-03","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}