{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T16:04:15Z","timestamp":1761581055959,"version":"3.28.0"},"reference-count":33,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,9]]},"DOI":"10.1109\/hpec.2012.6408679","type":"proceedings-article","created":{"date-parts":[[2013,1,17]],"date-time":"2013-01-17T20:35:00Z","timestamp":1358454900000},"page":"1-6","source":"Crossref","is-referenced-by-count":11,"title":["Anatomy of a globally recursive embedded LINPACK benchmark"],"prefix":"10.1109","author":[{"given":"Jack","family":"Dongarra","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Piotr","family":"Luszczek","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","first-page":"116","article-title":"SuperMatrix out-of-order scheduling of matrix operations for SMP and multi-core architectures","author":"chan","year":"2007","journal-title":"ACM Symposium on Parallel Algorithms and Architectures"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-89740-8_13"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719604"},{"key":"33","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2006.30"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1145\/1071690.1064233"},{"key":"16","article-title":"Automatic measurement of instruction cache capacity","author":"yotov","year":"2005","journal-title":"Proceedings of the 18th Workshop on Languages and Compilers for Parallel Computing (LCPC)"},{"journal-title":"Technical Reference Manual","year":"2007","key":"13"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/180\/1\/012037"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1145\/77626.79170"},{"journal-title":"Technical Reference Manual","year":"2006","key":"12"},{"journal-title":"Parallel Algorithms for Reducing the Generalized Hermitian-definite Eigenvalue Problem","year":"2011","author":"poulson","key":"21"},{"journal-title":"A class of parallel tiled linear algebra algorithms for multicore architectures","year":"2007","author":"buttari","key":"20"},{"journal-title":"Distibuted Dense Numerical Linear Algebra Algorithms on Massively Parallel Architectures Dplasma","year":"2010","author":"bosilca","key":"22"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.299"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479896297744"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1147\/rd.416.0737"},{"key":"26","article-title":"New serial and parallel recursive QR factorization algorithms for SMP systems","volume":"1998","author":"elmroth","year":"1998","journal-title":"Proc PARA"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1147\/rd.444.0605"},{"key":"28","article-title":"Communication-efficient parallel dense LU using a 3-dimensional approach","author":"irony","year":"2001","journal-title":"Proceedings of the 10th SIAM Conference on Parallel Processing for Scientific Computing"},{"key":"29","article-title":"Exploiting fine-grain parallelism in recursive lu factorization","author":"dongarra","year":"2011","journal-title":"ParCo 2011 - International Conference on Parallel Computing"},{"key":"3","first-page":"339","article-title":"Portable high performance GEMM-based Level 3 BLAS","author":"kagstr?om","year":"1993","journal-title":"Proceedings of the 6th SIAM Conference on Parallel Processing for Scientific Computing"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8191(00)00087-9"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1145\/77626.79170"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.1998.10004"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.1145\/1693453.1693484"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2008.12.010"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1201\/b10376-3"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTR.2008.4663765"},{"journal-title":"Synthetic Programming User-directed Run-time Code Synthesis for High Performance Computing","year":"2007","author":"mueller","key":"5"},{"key":"31","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654079"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1145\/1356052.1356053"},{"key":"9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/cpe.728","article-title":"The LINPACK benchmark: Past, present, and future","volume":"15","author":"dongarra","year":"2003","journal-title":"Concurrency and Computation Practice and Experience"},{"key":"8","first-page":"935","article-title":"Fast and small short vector simd matrix multiplication kernels for the synergistic processing element of the CELL processor","author":"alvaro","year":"2008","journal-title":"ICCS International conference on computational science"}],"event":{"name":"2012 IEEE Conference on High Performance Extreme Computing (HPEC)","start":{"date-parts":[[2012,9,10]]},"location":"Waltham, MA, USA","end":{"date-parts":[[2012,9,12]]}},"container-title":["2012 IEEE Conference on High Performance Extreme Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6393528\/6408656\/06408679.pdf?arnumber=6408679","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T05:51:14Z","timestamp":1498024274000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6408679\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,9]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/hpec.2012.6408679","relation":{},"subject":[],"published":{"date-parts":[[2012,9]]}}}