{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T02:46:01Z","timestamp":1777603561368,"version":"3.51.4"},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,12,13]],"date-time":"2022-12-13T00:00:00Z","timestamp":1670889600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,13]],"date-time":"2022-12-13T00:00:00Z","timestamp":1670889600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"publisher","award":["DE-NA-0003525"],"award-info":[{"award-number":["DE-NA-0003525"]}],"id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006168","name":"National Nuclear Security Administration","doi-asserted-by":"publisher","award":["17-SC-20-SC"],"award-info":[{"award-number":["17-SC-20-SC"]}],"id":[{"id":"10.13039\/100006168","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Numer Algor"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s11075-022-01439-z","type":"journal-article","created":{"date-parts":[[2022,12,13]],"date-time":"2022-12-13T13:03:04Z","timestamp":1670936584000},"page":"119-147","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Optimal size of the block in block GMRES on GPUs: computational model and experiments"],"prefix":"10.1007","volume":"92","author":[{"given":"Erik G.","family":"Boman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrew J.","family":"Higgins","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8010-0391","authenticated-orcid":false,"given":"Daniel B.","family":"Szyld","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,12,13]]},"reference":[{"issue":"1","key":"1439_CR1","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1109\/LCA.2019.2904497","volume":"18","author":"Y Arafa","year":"2019","unstructured":"Arafa, Y., Badawy, A.A., Chennupati, G., Santhi, N., Eidenbenz, S.: PPT-GPU: scalable GPU performance modeling. IEEE Comput. Archit. Lett. 18(1), 55\u201358 (2019)","journal-title":"IEEE Comput. Archit. Lett."},{"key":"1439_CR2","unstructured":"The Belos Project Team: The Belos Project Website. https:\/\/docs.trilinos.org\/dev\/packages\/belos\/doc\/html\/classBelos_1_1PseudoBlockGmresSolMgr.html. Accessed 2021 Dec 01"},{"issue":"2","key":"1439_CR3","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1145\/567806.567807","volume":"28","author":"L Susan Blackford","year":"2002","unstructured":"Susan Blackford, L., Demmel, J., Dongarra, J.J., Duff, I.S., Hammarling, S., Henry, G., Heroux, M., Kaufman, L., Lumsdaine, A., Petitet, A., Pozo, R., Remington, K., Clint Whaley, R.: An updated set of basic linear algebra subprograms (BLAS). ACM Trans. Math. Softw. 28(2), 135\u2013151 (2002)","journal-title":"ACM Trans. Math. Softw."},{"issue":"3","key":"1439_CR4","doi-asserted-by":"publisher","first-page":"1365","DOI":"10.1137\/21M1394424","volume":"42","author":"E Carson","year":"2021","unstructured":"Carson, E., Lund, K., Rozloznik, M.: The stability of block variants of classical Gram-Schmidt. SIAM J. Matrix Anal. Appl. 42(3), 1365\u20131380 (2021)","journal-title":"SIAM J. Matrix Anal. Appl."},{"issue":"1","key":"1439_CR5","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1109\/99.660313","volume":"5","author":"L Dagum","year":"1998","unstructured":"Dagum, L., Menon, R.: OpenMP : an industry standard API for shared-memory programming. IEEE Comput. Sci. Eng. 5(1), 46\u201355 (1998)","journal-title":"IEEE Comput. Sci. Eng."},{"key":"1439_CR6","doi-asserted-by":"crossref","unstructured":"Davis, T.A., Hu, Y.: The university of Florida sparse matrix collection. ACM Trans. Math. Softw. 38(1) (2011)","DOI":"10.1145\/2049662.2049663"},{"issue":"1","key":"1439_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/77626.79170","volume":"16","author":"JJ Dongarra","year":"1990","unstructured":"Dongarra, J.J., Croz, J.D., Hammarling, S., Duff, I.S.: A set of level 3 basic linear algebra subprograms. ACM Trans. Math. Softw. 16(1), 1\u201317 (1990)","journal-title":"ACM Trans. Math. Softw."},{"issue":"12","key":"1439_CR8","doi-asserted-by":"publisher","first-page":"3202","DOI":"10.1016\/j.jpdc.2014.07.003","volume":"74","author":"EH Carter","year":"2014","unstructured":"Carter, E.H., Trott, C.R., Sunderland, D.: Kokkos: enabling manycore performance portability through polymorphic memory access patterns. J. Parallel Distrib. Comput. 74(12), 3202\u20133216 (2014). Domain-Specific Languages and High-Level Frameworks for High-Performance Computing","journal-title":"J. Parallel Distrib. Comput."},{"key":"1439_CR9","unstructured":"Gutknecht, M.H.: Block Krylov subspace methods for linear systems with multiple right-hand sides: an introduction. In: Siddiqi, A.H., Duff, I.S., Ole, C. (eds.) Modern Mathematical Models, Methods and Algorithms for Real World Systems, Chapter 10, pp. 420\u2013447. Anamaya Publishers, New Dehli (2006)"},{"issue":"6","key":"1439_CR10","doi-asserted-by":"publisher","first-page":"871","DOI":"10.1016\/j.apnum.2007.04.010","volume":"58","author":"MH Gutknecht","year":"2008","unstructured":"Gutknecht, M.H., Schmelzer, T.: Updating the QR decomposition of block tridiagonal and block Hessenberg matrices. Appl. Numer. Math. 58(6), 871\u2013883 (2008)","journal-title":"Appl. Numer. Math."},{"key":"1439_CR11","unstructured":"Hoemmen, M.: Communication-avoiding Krylov subspace methods. PhD thesis, EECS Department, University of California, Berkeley (2010)"},{"key":"1439_CR12","unstructured":"Intel. Intel math kernel library. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/tools\/oneapi\/onemkl.html. Accessed 2022 Apr 24"},{"key":"1439_CR13","unstructured":"Langou, J.: Solving large linear systems with multiple right-hand sides. PhD thesis, INSA de Toulouse (2003)"},{"key":"1439_CR14","volume-title":"Krylov subspace methods. Principles and analysis. Numerical Mathematics and Scientific Computation, 1st edn.","author":"J Liesen","year":"2013","unstructured":"Liesen, J., Strako\u0161, Z.: Krylov subspace methods. Principles and analysis. Numerical Mathematics and Scientific Computation, 1st edn. Oxford University Press, Oxford (2013)"},{"key":"1439_CR15","unstructured":"NVIDIA. cuBLAS documentation. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html. Accessed 2021 Dec 08"},{"key":"1439_CR16","unstructured":"NVIDIA. cuSPARSE documentation. https:\/\/docs.nvidia.com\/cuda\/cusparse\/index.html. Accessed 2021 Dec 08"},{"key":"1439_CR17","unstructured":"NVIDIA. NVIDIA V100 Tensor Core GPU. https:\/\/images.nvidia.com\/content\/technologies\/volta\/pdf\/volta-v100-datasheet-update-us-1165301-r5.pdf Accessed 2021 Apr 07"},{"key":"1439_CR18","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/0024-3795(80)90247-5","volume":"29","author":"DP O\u2019Leary","year":"1980","unstructured":"O\u2019Leary, D.P.: The block conjugate gradient algorithm and related methods. Linear Algebra Appl. 29, 293\u2013322 (1980). Special Volume Dedicated to Alson S. Householder.","journal-title":"Linear Algebra Appl."},{"key":"1439_CR19","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1016\/j.cam.2015.11.040","volume":"300","author":"S Rashedi","year":"2016","unstructured":"Rashedi, S., Ebadi, G., Birk, S., Frommer, A.: On short recurrence Krylov type methods for linear systems with many right-hand sides. J. Comput. Appl. Math. 300, 18\u201329 (2016)","journal-title":"J. Comput. Appl. Math."},{"key":"1439_CR20","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718003","volume-title":"Iterative Methods for Sparse Linear Systems, 2nd edn.","author":"Y Saad","year":"2003","unstructured":"Saad, Y.: Iterative Methods for Sparse Linear Systems, 2nd edn. Society for Industrial and Applied Mathematics, Philadelphia (2003)"},{"issue":"3","key":"1439_CR21","doi-asserted-by":"publisher","first-page":"856","DOI":"10.1137\/0907058","volume":"7","author":"S Yousef","year":"1986","unstructured":"Yousef, S., Schultz, M.H.: GMRES: a generalized minimal residual algorithm for solving nonsymmetric linear systems. SIAM J. Sci. Stat. Comput. 7 (3), 856\u2013869 (1986)","journal-title":"SIAM J. Sci. Stat. Comput."},{"key":"1439_CR22","doi-asserted-by":"publisher","first-page":"917","DOI":"10.1137\/0916053","volume":"16","author":"V Simoncini","year":"1995","unstructured":"Simoncini, V., Gallopoulos, E.: An iterative method for nonsymmetric systems with multiple right-hand sides. SIAM J. Sci. Comput. 16, 917\u2013933 (1995)","journal-title":"SIAM J. Sci. Comput."},{"key":"1439_CR23","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1016\/0024-3795(95)00093-3","volume":"247","author":"V Simoncini","year":"1996","unstructured":"Simoncini, V., Gallopoulos, E.: Convergence properties of block GMRES and matrix polynomials. Linear Algebra Appl. 247, 97\u2013119 (1996)","journal-title":"Linear Algebra Appl."},{"key":"1439_CR24","doi-asserted-by":"publisher","first-page":"A302","DOI":"10.1137\/140998214","volume":"38","author":"K Soodhalter","year":"2016","unstructured":"Soodhalter, K.: Block Krylov subspace recycling for shifted systems with unrelated right-hand sides. SIAM J. Sci. Comput. 38, A302\u2013A324 (2016)","journal-title":"SIAM J. Sci. Comput."},{"key":"1439_CR25","unstructured":"Vital, B.: Etude de quelques m\u00e9thodes de r\u00e9solution de probl\u00e9mes lin\u00e9aires de grande taille sur multiprocesseur. PhD thesis, Universit\u00e9 de Rennes I (1990)"},{"key":"1439_CR26","doi-asserted-by":"crossref","unstructured":"Yamazaki, I., Hoemmen, M., Luszczek, P., Dongarra, J.J.: Improving performance of GMRES by reducing communication and pipelining global collectives. In: 2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 1118\u20131127 (2017)","DOI":"10.1109\/IPDPSW.2017.65"}],"container-title":["Numerical Algorithms"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11075-022-01439-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11075-022-01439-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11075-022-01439-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T08:31:44Z","timestamp":1673253104000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11075-022-01439-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,13]]},"references-count":26,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["1439"],"URL":"https:\/\/doi.org\/10.1007\/s11075-022-01439-z","relation":{},"ISSN":["1017-1398","1572-9265"],"issn-type":[{"value":"1017-1398","type":"print"},{"value":"1572-9265","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,13]]},"assertion":[{"value":"2 May 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 December 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of interest"}}]}}