{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T12:14:25Z","timestamp":1763468065833},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642281440"},{"type":"electronic","value":"9783642281457"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-28145-7_25","type":"book-chapter","created":{"date-parts":[[2012,2,15]],"date-time":"2012-02-15T14:58:14Z","timestamp":1329317894000},"page":"248-257","source":"Crossref","is-referenced-by-count":8,"title":["An Implementation of the Tile QR Factorization for a GPU and Multiple CPUs"],"prefix":"10.1007","author":[{"given":"Jakub","family":"Kurzak","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rajib","family":"Nath","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jack","family":"Dongarra","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"25_CR1","unstructured":"ATLAS, \n                      \n                        http:\/\/math-atlas.sourceforge.net\/"},{"key":"25_CR2","unstructured":"MAGMA, \n                      \n                        http:\/\/icl.cs.utk.edu\/magma\/"},{"key":"25_CR3","unstructured":"PLASMA, \n                      \n                        http:\/\/icl.cs.utk.edu\/plasma\/"},{"key":"25_CR4","unstructured":"StarPU, \n                      \n                        http:\/\/runtime.bordeaux.inria.fr\/StarPU\/"},{"key":"25_CR5","unstructured":"The Jade Parallel Programming Language, \n                      \n                        http:\/\/suif.stanford.edu\/jade.html"},{"key":"25_CR6","doi-asserted-by":"crossref","unstructured":"Augonnet, C., Thibault, S., Namyst, R., Wacrenier, P.: StarPU: A unified platform for task scheduling on heterogeneous multicore architectures. Concurrency Computat. Pract. Exper. (2010) (to appear)","DOI":"10.1007\/978-3-642-03869-3_80"},{"issue":"13","key":"25_CR7","doi-asserted-by":"publisher","first-page":"1573","DOI":"10.1002\/cpe.1301","volume":"20","author":"A. Buttari","year":"2008","unstructured":"Buttari, A., Langou, J., Kurzak, J., Dongarra, J.J.: Parallel tiled QR factorization for multicore architectures. Concurrency Computat.: Pract. Exper.\u00a020(13), 1573\u20131590 (2008)","journal-title":"Concurrency Computat.: Pract. Exper."},{"key":"25_CR8","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1016\/j.parco.2008.10.002","volume":"35","author":"A. Buttari","year":"2009","unstructured":"Buttari, A., Langou, J., Kurzak, J., Dongarra, J.J.: A class of parallel tiled linear algebra algorithms for multicore architectures. Parallel Comput. Syst. Appl.\u00a035, 38\u201353 (2009)","journal-title":"Parallel Comput. Syst. Appl."},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"Kurzak, J., Dongarra, J.J.: QR factorization for the CELL processor. Scientific Programming, 1\u201312 (2008)","DOI":"10.1155\/2009\/239720"},{"issue":"1","key":"25_CR10","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1002\/cpe.1467","volume":"21","author":"J. Kurzak","year":"2009","unstructured":"Kurzak, J., Ltaief, H., Dongarra, J.J., Badia, R.M.: Scheduling dense linear algebra operations on multicore processors. Concurrency Computat.: Pract. Exper.\u00a021(1), 15\u201344 (2009)","journal-title":"Concurrency Computat.: Pract. Exper."},{"key":"25_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"884","DOI":"10.1007\/978-3-642-01970-8_89","volume-title":"Computational Science \u2013 ICCS 2009","author":"Y. Li","year":"2009","unstructured":"Li, Y., Dongarra, J., Tomov, S.: A Note on Auto-Tuning GEMM for GPUs. In: Allen, G., Nabrzyski, J., Seidel, E., van Albada, G.D., Dongarra, J., Sloot, P.M.A. (eds.) ICCS 2009. LNCS, vol.\u00a05544, pp. 884\u2013892. Springer, Heidelberg (2009)"},{"key":"25_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/978-3-642-19328-6_10","volume-title":"High Performance Computing for Computational Science \u2013 VECPAR 2010","author":"R. Nath","year":"2011","unstructured":"Nath, R., Tomov, S., Dongarra, J.: Accelerating GPU Kernels for Dense Linear Algebra. In: Palma, J.M.L.M., Dayd\u00e9, M., Marques, O., Lopes, J.C. (eds.) VECPAR 2010. LNCS, vol.\u00a06449, pp. 83\u201392. Springer, Heidelberg (2011)"},{"issue":"3","key":"25_CR13","doi-asserted-by":"publisher","first-page":"284","DOI":"10.1177\/1094342009106195","volume":"23","author":"J. Planas","year":"2009","unstructured":"Planas, J., Badia, R.M., Ayguad, E., Labarta, J.: Hierarchical task-based programming with StarSs. Int. J. High Perf. Comput. Applic.\u00a023(3), 284\u2013299 (2009)","journal-title":"Int. J. High Perf. Comput. Applic."},{"issue":"3","key":"25_CR14","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1145\/291889.291893","volume":"20","author":"M.C. Rinard","year":"1998","unstructured":"Rinard, M.C., Lam, M.S.: The design, implementation, and evaluation of Jade. ACM Trans. Programming Lang. Syst.\u00a020(3), 483\u2013545 (1998)","journal-title":"ACM Trans. Programming Lang. Syst."},{"issue":"5-6","key":"25_CR15","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1016\/j.parco.2009.12.005","volume":"36","author":"S. Tomov","year":"2010","unstructured":"Tomov, S., Dongarra, J., Baboulin, M.: Towards dense linear algebra for hybrid gpu accelerated manycore systems. Parellel Comput. Syst. Appl.\u00a036(5-6), 232\u2013240 (2010)","journal-title":"Parellel Comput. Syst. Appl."},{"key":"25_CR16","first-page":"1","volume-title":"Proceedings of the 2010 IEEE International Parallel & Distributed Processing Symposium, IPDPS 2010","author":"S. Tomov","year":"2010","unstructured":"Tomov, S., Nath, R., Ltaief, H., Dongarra, J.: Dense linear algebra solvers for multicore with GPU accelerators. In: Proceedings of the 2010 IEEE International Parallel & Distributed Processing Symposium, IPDPS 2010, April 19-23, pp. 1\u20138. IEEE Computer Society, Atlanta (2010)"},{"issue":"1-2","key":"25_CR17","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/S0167-8191(00)00087-9","volume":"27","author":"R.C. Whaley","year":"2001","unstructured":"Whaley, R.C., Petitet, A., Dongarra, J.: Automated empirical optimizations of software and the ATLAS project. Parellel Comput. Syst. Appl.\u00a027(1-2), 3\u201335 (2001)","journal-title":"Parellel Comput. Syst. Appl."}],"container-title":["Lecture Notes in Computer Science","Applied Parallel and Scientific Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-28145-7_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,4,27]],"date-time":"2019-04-27T17:16:47Z","timestamp":1556385407000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-28145-7_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642281440","9783642281457"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-28145-7_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}