{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T10:35:21Z","timestamp":1725791721140},"publisher-location":"Berlin, Heidelberg","reference-count":23,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642544194"},{"type":"electronic","value":"9783642544200"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-642-54420-0_64","type":"book-chapter","created":{"date-parts":[[2014,4,9]],"date-time":"2014-04-09T17:40:41Z","timestamp":1397065241000},"page":"657-667","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Implementing a Systolic Algorithm for QR Factorization on Multicore Clusters with PaRSEC"],"prefix":"10.1007","author":[{"given":"Guillaume","family":"Aupy","sequence":"first","affiliation":[]},{"given":"Mathieu","family":"Faverge","sequence":"additional","affiliation":[]},{"given":"Yves","family":"Robert","sequence":"additional","affiliation":[]},{"given":"Jakub","family":"Kurzak","sequence":"additional","affiliation":[]},{"given":"Piotr","family":"Luszczek","sequence":"additional","affiliation":[]},{"given":"Jack","family":"Dongarra","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"64_CR1","unstructured":"Adiga, N.R., Alm\u00e1si, G., Almasi, G.S., Aridor, Y., Barik, R., Beece, D., Bellofatto, R., et al.: An overview of the BlueGene\/L supercomputer. In: Supercomputing Conference (2002)"},{"key":"64_CR2","unstructured":"The National Institute for Computational Sciences: Kraken machine size, \n                    \n                      http:\/\/www.nics.tennessee.edu\/computing-resources\/machine_size"},{"key":"64_CR3","doi-asserted-by":"crossref","unstructured":"Bhatele, A., Kale, L.V.: Application-specific topology-aware mapping for three dimensional topologies. In: IPDPS (2008)","DOI":"10.1109\/IPDPS.2008.4536348"},{"issue":"1","key":"64_CR4","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.parco.2011.10.003","volume":"38","author":"G. Bosilca","year":"2012","unstructured":"Bosilca, G., Bouteiller, A., Danalis, A., Herault, T., Lemarinier, P., Dongarra, J.: DAGuE: A generic distributed DAG engine for high performance computing. Parallel Computing\u00a038(1), 37\u201351 (2012)","journal-title":"Parallel Computing"},{"key":"64_CR5","doi-asserted-by":"crossref","unstructured":"Dongarra, J., Faverge, M., Herault, T., Jacquelin, M., Langou, J., Robert, Y.: Hierarchical QR factorization algorithms for multi-core clusters. Parallel Computing (2013)","DOI":"10.1109\/IPDPS.2012.62"},{"issue":"13","key":"64_CR6","doi-asserted-by":"publisher","first-page":"1573","DOI":"10.1002\/cpe.1301","volume":"20","author":"A. Buttari","year":"2008","unstructured":"Buttari, A., Langou, J., Kurzak, J., Dongarra, J.: Parallel tiled QR factorization for multicore architectures. Concurrency: Practice and Experience\u00a020(13), 1573\u20131590 (2008)","journal-title":"Concurrency: Practice and Experience"},{"key":"64_CR7","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1016\/j.parco.2008.10.002","volume":"35","author":"A. Buttari","year":"2009","unstructured":"Buttari, A., Langou, J., Kurzak, J., Dongarra, J.: A class of parallel tiled linear algebra algorithms for multicore architectures. Parallel Computing\u00a035, 38\u201353 (2009)","journal-title":"Parallel Computing"},{"key":"64_CR8","doi-asserted-by":"crossref","unstructured":"Quintana-Ort\u00ed, G., Quintana-Ort\u00ed, E.S., van de Geijn, R.A., Zee, F.G.V., Chan, E.: Programming matrix algorithms-by-blocks for thread-level parallelism. ACM Transactions on Mathematical Software 36(3) (2009)","DOI":"10.1145\/1527286.1527288"},{"key":"64_CR9","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1145\/322047.322054","volume":"25","author":"A. Sameh","year":"1978","unstructured":"Sameh, A., Kuck, D.: On stable parallel linear systems solvers. J. ACM\u00a025, 81\u201391 (1978)","journal-title":"J. ACM"},{"key":"64_CR10","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/BF01389639","volume":"43","author":"J. Modi","year":"1984","unstructured":"Modi, J., Clarke, M.: An alternative Givens ordering. Numerische Mathematik\u00a043, 83\u201390 (1984)","journal-title":"Numerische Mathematik"},{"issue":"16","key":"64_CR11","doi-asserted-by":"publisher","first-page":"1113","DOI":"10.1137\/0910067","volume":"10","author":"A. Pothen","year":"1989","unstructured":"Pothen, A., Raghavan, P.: Distributed orthogonal factorization: Givens and Householder algorithms. SIAM J. Scientific Computing\u00a010(16), 1113\u20131134 (1989)","journal-title":"SIAM J. Scientific Computing"},{"key":"64_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1007\/3-540-45706-2_94","volume-title":"Euro-Par 2002. Parallel Processing","author":"R.D. Cunha da","year":"2002","unstructured":"da Cunha, R.D., Becker, D., Patterson, J.C.: New parallel (rank-revealing) QR factorization algorithms. In: Monien, B., Feldmann, R.L. (eds.) Euro-Par 2002. LNCS, vol.\u00a02400, pp. 677\u2013686. Springer, Heidelberg (2002)"},{"key":"64_CR13","unstructured":"Demmel, J.W., Grigori, L., Hoemmen, M., Langou, J.: Communication-avoiding parallel and sequential QR and LU factorizations: theory and practice. Technical Report 204, LAPACK Working Note (2008)"},{"key":"64_CR14","unstructured":"Langou, J.: Computing the R of the QR factorization of tall and skinny matrices using MPI_Reduce. Technical Report 1002.4250, arXiv (2010)"},{"key":"64_CR15","doi-asserted-by":"crossref","unstructured":"Hadri, B., Ltaief, H., Agullo, E., Dongarra, J.: Tile QR factorization with parallel panel processing for multicore architectures. In: IPDPS (2010)","DOI":"10.1109\/IPDPS.2010.5470443"},{"key":"64_CR16","doi-asserted-by":"crossref","unstructured":"Bouwmeester, H., Jacquelin, M., Langou, J., Robert, Y.: Tiled QR factorization algorithms. In: SC. ACM\/ IEEE Computer Society Press (2011)","DOI":"10.1145\/2063384.2063393"},{"issue":"4","key":"64_CR17","first-page":"712","volume":"33","author":"M. Cosnard","year":"1986","unstructured":"Cosnard, M., Robert, Y.: Complexity of parallel QR factorization. Journal of the A.C.M.\u00a033(4), 712\u2013723 (1986)","journal-title":"Journal of the A.C.M."},{"key":"64_CR18","doi-asserted-by":"crossref","unstructured":"Agullo, E., Coti, C., Dongarra, J., Herault, T., Langou, J.: QR factorization of tall and skinny matrices in a grid computing environment. In: IPDPS (2010)","DOI":"10.1109\/IPDPS.2010.5470475"},{"key":"64_CR19","doi-asserted-by":"crossref","unstructured":"Song, F., Ltaief, H., Hadri, B., Dongarra, J.: Scalable tile communication-avoiding QR factorization on multicore cluster systems. In: SC. ACM\/IEEE Computer Society Press (2010)","DOI":"10.1109\/SC.2010.48"},{"key":"64_CR20","doi-asserted-by":"crossref","unstructured":"Bosilca, G., Bouteiller, A., Danalis, A., Herault, T., Lemarinier, P., Dongarra, J.: DAGuE: A generic distributed DAG engine for high performance computing. In: HIPS (2011)","DOI":"10.1109\/IPDPS.2011.281"},{"key":"64_CR21","doi-asserted-by":"crossref","unstructured":"Bosilca, G., Bouteiller, A., Danalis, A., Faverge, M., Haidar, A., Herault, T., Kurzak, J., Langou, J., Lemarinier, P., Ltaief, H., et al.: Flexible development of dense linear algebra algorithms on massively parallel architectures with DPLASMA. In: PDSEC (2011)","DOI":"10.1109\/IPDPS.2011.299"},{"key":"64_CR22","doi-asserted-by":"crossref","unstructured":"Kurzak, J., Luszczek, P., Gates, M., Yamazaki, I., Dongarra, J.: Virtual systolic array for QR decomposition. In: IPDPS. IEEE Computer Society Press (2013)","DOI":"10.1109\/IPDPS.2013.119"},{"key":"64_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/cpe.728","volume":"15","author":"J.J. Dongarra","year":"2003","unstructured":"Dongarra, J.J., Luszczek, P., Petitet, A.: The LINPACK benchmark: Past, present, and future. Concurrency and Computation: Practice and Experience\u00a015, 1\u201318 (2003)","journal-title":"Concurrency and Computation: Practice and Experience"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2013: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-54420-0_64","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,19]],"date-time":"2020-04-19T20:14:55Z","timestamp":1587327295000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-54420-0_64"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783642544194","9783642544200"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-54420-0_64","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}