{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T16:09:51Z","timestamp":1725466191742},"publisher-location":"Berlin, Heidelberg","reference-count":31,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642314636"},{"type":"electronic","value":"9783642314643"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-31464-3_13","type":"book-chapter","created":{"date-parts":[[2012,7,2]],"date-time":"2012-07-02T15:20:38Z","timestamp":1341242438000},"page":"122-132","source":"Crossref","is-referenced-by-count":2,"title":["Cache Blocking for Linear Algebra Algorithms"],"prefix":"10.1007","author":[{"given":"Fred G.","family":"Gustavson","sequence":"first","affiliation":[]}],"member":"297","reference":[{"issue":"2","key":"13_CR1","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1147\/rd.302.0126","volume":"30","author":"R.C. Agarwal","year":"1986","unstructured":"Agarwal, R.C., Cooley, J.W., Gustavson, F.G., Shearer, J.B., Slishman, G., Tuckerman, B.: New scalar and vector elementary functions for the IBM System\/370. IBM Journal of Research and Development\u00a030(2), 126\u2013144 (1986)","journal-title":"IBM Journal of Research and Development"},{"key":"13_CR2","unstructured":"Agarwal, R.C., Gustavson, F.G.: A Parallel Implementation of Matrix Multiplication and LU factorization on the IBM 3090. In: Wright, M. (ed.) Proceedings of the IFIP WG 2.5 on Aspects of Computation on Asynchronous Parallel Processors, Stanford CA, pp. 217\u2013221. North Holland (August 1988)"},{"issue":"5","key":"13_CR3","doi-asserted-by":"publisher","first-page":"563","DOI":"10.1147\/rd.385.0563","volume":"38","author":"R.C. Agarwal","year":"1994","unstructured":"Agarwal, R.C., Gustavson, F.G., Zubair, M.: Exploiting functional parallelism of POWER2 to design high-performance numerical algorithms. IBM Journal of Research and Development\u00a038(5), 563\u2013576 (1994)","journal-title":"IBM Journal of Research and Development"},{"issue":"6","key":"13_CR4","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1147\/rd.386.0673","volume":"38","author":"R.C. Agarwal","year":"1994","unstructured":"Agarwal, R.C., Gustavson, F.G., Zubair, M.: A high-performance matrix-multiplication algorithm on a distributed-memory parallel computer, using overlapped communication. IBM J. R. & D.\u00a038(6), 673\u2013681 (1994); See also IBM RC 18694 with dates 8\/5\/92 & 8\/10\/92 & 2\/8\/93","journal-title":"IBM J. R. & D."},{"issue":"2","key":"13_CR5","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1145\/1067967.1067969","volume":"31","author":"B.S. Andersen","year":"2005","unstructured":"Andersen, B.S., Gunnels, J.A., Gustavson, F.G., Reid, J.K., Wa\u015bniewski, J.: A Fully Portable High Performance Minimal Storage Hybrid Cholesky Algorithm. ACM TOMS\u00a031(2), 201\u2013227 (2005)","journal-title":"ACM TOMS"},{"key":"13_CR6","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719604","volume-title":"LAPACK Users\u2019 Guide Release 3.0","author":"E. Anderson","year":"1999","unstructured":"Anderson, E., et al.: LAPACK Users\u2019 Guide Release 3.0. SIAM, Philadelphia (1999)"},{"key":"13_CR7","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719642","volume-title":"ScaLAPACK Users\u2019 Guide","author":"L.S. Blackford","year":"1997","unstructured":"Blackford, L.S., et al.: ScaLAPACK Users\u2019 Guide. SIAM, Philadelphia (1997)"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Bilmes, J., Asanovic, K., Whye Chin, C., Demmel, J.: Optimizing Matrix Multiply Using PHiPAC: A Portable, High-Performance, ANSI C Coding Methodology. In: Proceedings of International Conference on Supercomputing, Vienna, Austria (1997)","DOI":"10.1145\/263580.263662"},{"issue":"1","key":"13_CR9","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1016\/j.parco.2008.10.002","volume":"35","author":"A. Buttari","year":"2009","unstructured":"Buttari, A., Langou, J., Kurzak, J., Dongarra, J.: A class of parallel tiled linear algorithms for MC architectures. Parallel Comput.\u00a035(1), 38\u201353 (2009)","journal-title":"Parallel Comput."},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Chan, E., Quintana-orti, E.S., Quintana-orti, G., Van De Geijn, R.: Super-Matrix Out-of-Core Scheduling of Matrix Operations for SMP and Multi-Core Architectures. In: SPAA 2007, June 9-11, pp. 116\u2013125 (2007)","DOI":"10.1145\/1248377.1248397"},{"issue":"1","key":"13_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/77626.79170","volume":"16","author":"J.J. Dongarra","year":"1990","unstructured":"Dongarra, J.J., Du Croz, J., Hammarling, S., Duff, I.: A Set of Level 3 Basic Linear Algebra Subprograms. TOMS\u00a016(1), 1\u201317 (1990)","journal-title":"TOMS"},{"issue":"1","key":"13_CR12","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1137\/S0036144503428693","volume":"46","author":"E. Elmroth","year":"2004","unstructured":"Elmroth, E., Gustavson, F.G., Jonsson, I., K\u00e5gstr\u00f6m, B.: Recursive Blocked Algorithms and Hybrid Data Structures for Dense Matrix Library Software. SIAM Review\u00a046(1), 3\u201345 (2004)","journal-title":"SIAM Review"},{"issue":"1","key":"13_CR13","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1177\/109434208800200103","volume":"2","author":"K. Gallivan","year":"1988","unstructured":"Gallivan, K., Jalby, W., Meier, U., Sameh, A.: The Impact of Hierarchical Memory Systems on Linear Algebra Algorithm Design. International Journal of Supercomputer Applications\u00a02(1), 12\u201348 (1988)","journal-title":"International Journal of Supercomputer Applications"},{"key":"13_CR14","volume-title":"Matrix Computations","author":"G. Golub","year":"1996","unstructured":"Golub, G., VanLoan, C.: Matrix Computations, 3rd edn. John Hopkins Press, Baltimore and London (1996)","edition":"3"},{"issue":"6","key":"13_CR15","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1147\/rd.416.0737","volume":"41","author":"F.G. Gustavson","year":"1997","unstructured":"Gustavson, F.G.: Recursion Leads to Automatic Variable Blocking for Dense Linear-Algebra Algorithms. IBM J. R. & D.\u00a041(6), 737\u2013755 (1997)","journal-title":"IBM J. R. & D."},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Gustavson, F.G.: New Generalized Data Structures for Matrices Lead to a Variety of High-Performance Algorithms. In: Boisvert, R.F., Tang, P.T.P. (eds.) Proceedings of the IFIP WG 2.5 Working Group on The Architecture of Scientific Software, Ottawa, Canada, October 2-4, pp. 211\u2013234. Kluwer Academic Publishers (2000)","DOI":"10.1007\/978-0-387-35407-1_13"},{"issue":"1","key":"13_CR17","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1147\/rd.471.0031","volume":"47","author":"F.G. Gustavson","year":"2003","unstructured":"Gustavson, F.G.: High Performance Linear Algebra Algs. using New Generalized Data Structures for Matrices. IBM J. R. & D.\u00a047(1), 31\u201355 (2003)","journal-title":"IBM J. R. & D."},{"key":"13_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1007\/11558958_2","volume-title":"Applied Parallel Computing. State of the Art in Scientific Computing","author":"F.G. Gustavson","year":"2006","unstructured":"Gustavson, F.G.: New Generalized Data Structures for Matrices Lead to a Variety of High Performance Dense Linear Algebra Algorithms. In: Dongarra, J., Madsen, K., Wa\u015bniewski, J. (eds.) PARA 2004. LNCS, vol.\u00a03732, pp. 11\u201320. Springer, Heidelberg (2006)"},{"key":"13_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"540","DOI":"10.1007\/978-3-540-75755-9_66","volume-title":"Applied Parallel Computing. State of the Art in Scientific Computing","author":"F.G. Gustavson","year":"2007","unstructured":"Gustavson, F.G., Gunnels, J., Sexton, J.: Minimal Data Copy For Dense Linear Algebra Factorization. In: K\u00e5gstr\u00f6m, B., Elmroth, E., Dongarra, J., Wa\u015bniewski, J. (eds.) PARA 2006. LNCS, vol.\u00a04699, pp. 540\u2013549. Springer, Heidelberg (2007)"},{"key":"13_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"560","DOI":"10.1007\/978-3-540-75755-9_68","volume-title":"Applied Parallel Computing. State of the Art in Scientific Computing","author":"F.G. Gustavson","year":"2007","unstructured":"Gustavson, F.G., Swirszcz, T.: In-Place Transposition of Rectangular Matrices. In: K\u00e5gstr\u00f6m, B., Elmroth, E., Dongarra, J., Wa\u015bniewski, J. (eds.) PARA 2006. LNCS, vol.\u00a04699, pp. 560\u2013569. Springer, Heidelberg (2007)"},{"key":"13_CR21","unstructured":"Gustavson, F.G.: The Relevance of New Data Structure Approaches for Dense Linear Algebra in the New Multicore\/Manycore Environments. IBM Research report RC24599, also, to appear in PARA 2008 Proceeding, 10 pages (2008)"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Gustavson, F.G., Karlsson, L., K\u00e5gstr\u00f6m, B.: Parallel and Cache-Efficient In-Place Matrix Storage Format Conversion. ACM TOMS, 34 pages (to appear, 2012)","DOI":"10.1145\/2168773.2168775"},{"key":"#cr-split#-13_CR23.1","unstructured":"IBM. IBM Engineering and Scientific Subroutine Library. IBM Pub. No. SA22-7272-00 (February 1986)"},{"key":"#cr-split#-13_CR23.2","unstructured":"Also, Release II, 1987 & AIX Version 3, Release 3"},{"key":"13_CR24","unstructured":"Karlsson, L.: Blocked in-place transposition with application to storage format conversion. Tech. Rep. UMINF 09.01. ISSN 0348-0542, Department of Computing Science, Ume\u00e5 University, Ume\u00e5, Sweden (January 2009)"},{"key":"13_CR25","unstructured":"Knuth, D.: The Art of Computer Programming, 3rd edn., vol.\u00a01, 2 & 3. Addison-Wesley (1998)"},{"issue":"9","key":"13_CR26","doi-asserted-by":"publisher","first-page":"1175","DOI":"10.1109\/TPDS.2007.70813","volume":"19","author":"J. Kurzak","year":"2008","unstructured":"Kurzak, J., Buttari, A., Dongarra, J.: Solving systems of Linear Equations on the Cell Processor using Cholesky Factorization. IEEE Trans. Parallel Distrib. Syst.\u00a019(9), 1175\u20131186 (2008)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"issue":"10","key":"13_CR27","doi-asserted-by":"publisher","first-page":"1371","DOI":"10.1002\/cpe.1164","volume":"19","author":"J. Kurzak","year":"2007","unstructured":"Kurzak, J., Dongarra, J.: Implementation of mixed precision in solving mixed precision of linear equations on the Cell processor: Research Articles. Concurr. Comput.: Pract. Exper.\u00a019(10), 1371\u20131385 (2007)","journal-title":"Concurr. Comput. : Pract. Exper."},{"issue":"7","key":"13_CR28","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1109\/TPDS.2003.1214317","volume":"14","author":"N. Park","year":"2003","unstructured":"Park, N., Hong, B., Prasanna, V.: Tiling, Block Data Layout, and Memory Hierarchy Performance. IEEE Trans. Parallel and Distributed Systems\u00a014(7), 640\u2013654 (2003)","journal-title":"IEEE Trans. Parallel and Distributed Systems"},{"key":"13_CR29","unstructured":"Tietze, H.: Three Dimensions\u2013Higher Dimensions. In: Famous Problems of Mathematics, pp. 106\u2013120. Graylock Press (1965)"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Whaley, R.C., Petitet, A., Dongarra, J.J.: Automated Empirical Optimization of Software and the ATLAS Project. Parallel Computing\u00a0(1-2), 3\u201335 (2001)","DOI":"10.1016\/S0167-8191(00)00087-9"}],"container-title":["Lecture Notes in Computer Science","Parallel Processing and Applied Mathematics"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-31464-3_13.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T12:01:53Z","timestamp":1620129713000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-31464-3_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642314636","9783642314643"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-31464-3_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}