{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T00:14:03Z","timestamp":1742948043707,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319971353"},{"type":"electronic","value":"9783319971360"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-97136-0_1","type":"book-chapter","created":{"date-parts":[[2018,7,16]],"date-time":"2018-07-16T09:33:12Z","timestamp":1531733592000},"page":"1-18","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A High Arithmetic Intensity Krylov Subspace Method Based on Stencil Compiler Programs"],"prefix":"10.1007","author":[{"given":"Simplice","family":"Donfack","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Patrick","family":"Sanan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Olaf","family":"Schenk","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bram","family":"Reps","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wim","family":"Vanroose","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,7,17]]},"reference":[{"issue":"10","key":"1_CR1","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/1562764.1562783","volume":"52","author":"K Asanovic","year":"2009","unstructured":"Asanovic, K., Bodik, R., Demmel, J., Keaveny, T., Keutzer, K., Kubiatowicz, J., Morgan, N., Patterson, D., Sen, K., Wawrzynek, J., Wessel, D., Yelick, K.: A view of the parallel computing landscape. Commun. ACM 52(10), 56\u201367 (2009)","journal-title":"Commun. ACM"},{"issue":"1","key":"1_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1137\/0913001","volume":"13","author":"SF Ashby","year":"1992","unstructured":"Ashby, S.F., Manteuffel, T.A., Otto, J.S.: A comparison of adaptive Chebyshev and least squares polynomial preconditioning for Hermitian positive definite linear systems. SIAM J. Sci. Stat. Comput. 13(1), 1\u201329 (1992)","journal-title":"SIAM J. Sci. Stat. Comput."},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Balay, S., Abhyankar, S., Adams, M.F., Brown, J., Brune, P., Buschelman, K., Dalcin, L., Eijkhout, V., Gropp, W.D., Kaushik, D., Knepley, M.G., McInnes, L.C., Rupp, K., Smith, B.F., Zampini, S., Zhang, H.: PETSc users manual. Technical report ANL-95\/11 - Revision 3.6, Argonne National Laboratory (2015)","DOI":"10.2172\/1178109"},{"key":"1_CR4","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/978-1-4612-1986-6_8","volume-title":"Modern Software Tools in Scientific Computing","author":"S Balay","year":"1997","unstructured":"Balay, S., Gropp, W.D., McInnes, L.C., Smith, B.F.: Efficient management of parallelism in object oriented numerical software libraries. In: Arge, E., Bruaset, A.M., Langtangen, H.P. (eds.) Modern Software Tools in Scientific Computing, pp. 163\u2013202. Birkh\u00e4user Press, Boston (1997). \n                    https:\/\/doi.org\/10.1007\/978-1-4612-1986-6_8"},{"key":"1_CR5","unstructured":"Bianco, M., Varetto, U.: A generic library for stencil computations. arXiv preprint \n                    arXiv:1207.1746\n                    \n                   (2012)"},{"key":"1_CR6","unstructured":"Bondhugula, U., Hartono, A., Ramanujam, J., Sadayappan, P.: Pluto: a practical and fully automatic polyhedral program optimization system. In: Proceedings of the ACM SIGPLAN 2008 Conference on Programming Language Design and Implementation (PLDI 2008), June 2008. Citeseer, Tucson (2008)"},{"issue":"6","key":"1_CR7","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1145\/1379022.1375595","volume":"43","author":"U Bondhugula","year":"2008","unstructured":"Bondhugula, U., Hartono, A., Ramanujam, J., Sadayappan, P.: A practical automatic polyhedral parallelizer and locality optimizer. ACM SIGPLAN Not. 43(6), 101\u2013113 (2008)","journal-title":"ACM SIGPLAN Not."},{"key":"1_CR8","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898719505","volume-title":"A Multigrid Tutorial","author":"WL Briggs","year":"2000","unstructured":"Briggs, W.L., Henson, V.E., McCormick, S.F.: A Multigrid Tutorial, 2nd edn. SIAM, University City (2000)","edition":"2"},{"key":"1_CR9","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1137\/1.9780898718133.ch10","volume-title":"Parallel Processing for Scientific Computing","author":"Edmond Chow","year":"2006","unstructured":"Chow, E., Falgout, R.D., Hu, J.J., Tuminaro, R.S., Yang, U.M.: A survey of parallelization techniques for multigrid solvers. In: Parallel Processing for Scientific Computing, vol. 20, pp. 179\u2013201 (2006)"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Christen, M., Schenk, O., Burkhart, H.: Automatic code generation and tuning for stencil kernels on modern microarchitectures. In: Proceedings of International Supercomputing Conference (ISC 2011), vol. 26, pp. 205\u2013210 (2011)","DOI":"10.1007\/s00450-011-0160-6"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Christen, M., Schenk, O., Burkhart, H.: PATUS: a code generation and autotuning framework for parallel iterative stencil computations on modern microarchitectures. In: 2011 IEEE International Conference on Parallel and Distributed Processing Symposium (IPDPS), pp. 676\u2013687. IEEE (2011)","DOI":"10.1109\/IPDPS.2011.70"},{"key":"1_CR12","doi-asserted-by":"crossref","unstructured":"Christen, M., Schenk, O., Cui, Y.: PATUS for convenient high-performance stencils: evaluation in earthquake simulations. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2012, pp. 11:1\u201311:10. IEEE Computer Society Press, Los Alamitos (2012)","DOI":"10.1109\/SC.2012.95"},{"issue":"5","key":"1_CR13","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1016\/0167-8191(96)00022-1","volume":"22","author":"AT Chronopoulos","year":"1996","unstructured":"Chronopoulos, A.T., Swanson, C.D.: Parallel iterative s-step methods for unsymmetric linear systems. Parallel Comput. 22(5), 623\u2013641 (1996)","journal-title":"Parallel Comput."},{"issue":"2","key":"1_CR14","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1016\/0377-0427(89)90045-9","volume":"25","author":"AT Chronopoulos","year":"1989","unstructured":"Chronopoulos, A.T., Gear, C.W.: s-Step iterative methods for symmetric linear systems. J. Comput. Appl. Math. 25(2), 153\u2013168 (1989)","journal-title":"J. Comput. Appl. Math."},{"issue":"3","key":"1_CR15","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1007\/BF02243566","volume":"22","author":"PF Dubois","year":"1979","unstructured":"Dubois, P.F., Greenbaum, A., Rodrigue, G.H.: Approximating the inverse of a matrix for use in iterative algorithms on vector processors. Computing 22(3), 257\u2013268 (1979)","journal-title":"Computing"},{"issue":"3","key":"1_CR16","doi-asserted-by":"publisher","first-page":"1572","DOI":"10.1137\/070684550","volume":"30","author":"YA Erlangga","year":"2008","unstructured":"Erlangga, Y.A., Nabben, R.: Multilevel projection-based nested Krylov iteration for boundary value problems. SIAM J. Sci. Comput. 30(3), 1572\u20131595 (2008)","journal-title":"SIAM J. Sci. Comput."},{"key":"1_CR17","unstructured":"Feautrier, P., Lengauer, C.: The polyhedron model. In: Encyclopedia of Parallel Computing, pp. 1581\u20131592. Springer, Heidelberg (2011)"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Fujita, K., Ichimura, T., Koyama, K., Inoue, H., Hori, M., Maddegedara, L.: Fast and scalable low-order implicit unstructured finite-element solver for earth\u2019s crust deformation problem. In: Proceedings of the Platform for Advanced Scientific Computing Conference, PASC 2017, pp. 11:1\u201311:10. ACM, New York (2017)","DOI":"10.1145\/3093172.3093236"},{"issue":"7","key":"1_CR19","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1016\/j.parco.2013.06.001","volume":"40","author":"P Ghysels","year":"2014","unstructured":"Ghysels, P., Vanroose, W.: Hiding global synchronization latency in the preconditioned conjugate gradient algorithm. Parallel Comput. 40(7), 224\u2013238 (2014)","journal-title":"Parallel Comput."},{"issue":"1","key":"1_CR20","doi-asserted-by":"publisher","first-page":"C48","DOI":"10.1137\/12086563X","volume":"35","author":"P Ghysels","year":"2013","unstructured":"Ghysels, P., Ashby, T.J., Meerbergen, K., Vanroose, W.: Hiding global communication latency in the GMRES algorithm on massively parallel machines. SIAM J. Sci. Comput. 35(1), C48\u2013C71 (2013)","journal-title":"SIAM J. Sci. Comput."},{"issue":"2","key":"1_CR21","doi-asserted-by":"publisher","first-page":"C217","DOI":"10.1137\/130930376","volume":"37","author":"L Grigori","year":"2015","unstructured":"Grigori, L., Moufawad, S.: Communication avoiding ILU0 preconditioner. SIAM J. Sci. Comput. 37(2), C217\u2013C246 (2015)","journal-title":"SIAM J. Sci. Comput."},{"issue":"4","key":"1_CR22","doi-asserted-by":"publisher","first-page":"1250010","DOI":"10.1142\/S0129626412500107","volume":"22","author":"T Grosser","year":"2012","unstructured":"Grosser, T., Gr\u00f6\u00dflinger, A., Lengauer, C.: Polly - performing polyhedral optimizations on a low-level intermediate representation. Parallel Process. Lett. 22(4), 1250010 (2012)","journal-title":"Parallel Process. Lett."},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Gysi, T., Grosser, T., Hoefler, T.: MODESTO: data-centric analytic optimization of complex stencil programs on heterogeneous architectures. In: Proceedings of the 29th ACM on International Conference on Supercomputing, ICS 2015, pp. 177\u2013186. ACM, New York (2015)","DOI":"10.1145\/2751205.2751223"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"King, J., Kirby, R.M.: A scalable, efficient scheme for evaluation of stencil computations over unstructured meshes. In: 2013 International Conference for High Performance Computing, Networking, Storage and Analysis (SC), pp. 1\u201312, November 2013","DOI":"10.1145\/2503210.2503214"},{"issue":"4","key":"1_CR25","doi-asserted-by":"publisher","first-page":"C439","DOI":"10.1137\/140991133","volume":"37","author":"T Malas","year":"2015","unstructured":"Malas, T., Hager, G., Ltaief, H., Stengel, H., Wellein, G., Keyes, D.: Multicore-optimized wavefront diamond blocking for optimizing stencil updates. SIAM J. Sci. Comput. 37(4), C439\u2013C464 (2015)","journal-title":"SIAM J. Sci. Comput."},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Mohiyuddin, M., Hoemmen, M., Demmel, J., Yelick, K.: Minimizing communication in sparse matrix solvers. In: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis, p. 36. ACM (2009)","DOI":"10.1145\/1654059.1654096"},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Stiefel, E., Hestenes, M.R.: Methods of conjugate gradients for solving linear systems. Journal of Research of the National Bureau of Standards 49(6) (1952)","DOI":"10.6028\/jres.049.044"},{"key":"1_CR28","unstructured":"Rupp, K.: CPU, GPU, and MIC hardware characteristics over time. \n                    https:\/\/www.karlrupp.net\/2013\/06\/cpu-gpu-and-mic-hardware-characteristics-over-time\/"},{"key":"1_CR29","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/978-3-0348-7224-9_2","volume-title":"Refined Iterative Methods for Computation of the Solution and the Eigenvalues of Self-adjoint Boundary Value Problems","author":"H Rutishauser","year":"1959","unstructured":"Rutishauser, H.: Theory of gradient methods. In: Engeli, M., Ginsburg, T., Rutishauser, H., Stiefel, E. (eds.) Refined Iterative Methods for Computation of the Solution and the Eigenvalues of Self-adjoint Boundary Value Problems, pp. 24\u201349. Springer, Heidelberg (1959). \n                    https:\/\/doi.org\/10.1007\/978-3-0348-7224-9_2"},{"issue":"6","key":"1_CR30","doi-asserted-by":"publisher","first-page":"1200","DOI":"10.1137\/0910073","volume":"10","author":"Y Saad","year":"1989","unstructured":"Saad, Y.: Krylov subspace methods on supercomputers. SIAM J. Sci. Stat. Comput. 10(6), 1200\u20131232 (1989)","journal-title":"SIAM J. Sci. Stat. Comput."},{"key":"1_CR31","doi-asserted-by":"crossref","unstructured":"Tang, Y., Chowdhury, R.A., Kuszmaul, B.C., Luk, C.-K., Leiserson, C.E.: The Pochoir stencil compiler. In: Proceedings of 23rd ACM Symposium on Parallelism in Algorithms and Architectures (SPAA 2011), pp. 117\u2013128. ACM (2011)","DOI":"10.1145\/1989493.1989508"},{"key":"1_CR32","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/978-3-642-24025-6_14","volume-title":"Competence in High Performance Computing 2010","author":"J Treibig","year":"2012","unstructured":"Treibig, J., Hager, G., Wellein, G.: LIKWID: lightweight performance tools. In: Bischof, C., Hegering, H.G., Nagel, W., Wittum, G. (eds.) Competence in High Performance Computing 2010, pp. 165\u2013175. Springer, Heidelberg (2012). \n                    https:\/\/doi.org\/10.1007\/978-3-642-24025-6_14"},{"key":"1_CR33","unstructured":"U.S. Department of Energy, Office of Advanced Scientific Computing Research. Report on the workshop on Extreme-Scale Solvers: Transition to future Architectures, March 2012. \n                    http:\/\/science.energy.gov\/~\/media\/ascr\/pdf\/program-documents\/docs\/reportExtremeScaleSolvers2012.pdf\n                    \n                  . Accessed Mar 2013"},{"key":"1_CR34","doi-asserted-by":"crossref","unstructured":"Bondhugula, U., Bandishti, V., Pananilath, I.: Tiling stencil computations to maximize parallelism. In: Proceedings of ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis (SC 2012), pp. 1\u201311 (2012)","DOI":"10.1109\/SC.2012.107"},{"key":"1_CR35","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511615115","volume-title":"Iterative Krylov Methods for Large Linear Systems","author":"HA Vorst Van der","year":"2003","unstructured":"Van der Vorst, H.A.: Iterative Krylov Methods for Large Linear Systems, vol. 13. Cambridge University Press, Cambridge (2003)"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing in Science and Engineering"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-97136-0_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T04:17:40Z","timestamp":1558325860000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-97136-0_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319971353","9783319971360"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-97136-0_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"17 July 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HPCSE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing in Science and Engineering","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Karolinka","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 May 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hpcse2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/industry.it4i.cz\/HPCSE17\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}