{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:19:24Z","timestamp":1768029564088,"version":"3.49.0"},"publisher-location":"Berlin, Heidelberg","reference-count":21,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783662480953","type":"print"},{"value":"9783662480960","type":"electronic"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-662-48096-0_46","type":"book-chapter","created":{"date-parts":[[2015,7,24]],"date-time":"2015-07-24T06:16:03Z","timestamp":1437718563000},"page":"601-612","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["High Performance Multi-GPU SpMV for Multi-component PDE-Based Applications"],"prefix":"10.1007","author":[{"given":"Ahmad","family":"Abdelfattah","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hatem","family":"Ltaief","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Keyes","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,7,25]]},"reference":[{"key":"46_CR1","unstructured":"KAUST BLAS. \n                      http:\/\/ecrc.kaust.edu.sa\/Pages\/Res-kblas.aspx"},{"key":"46_CR2","unstructured":"Abdelfattah, A., Keyes, D., Ltaief, H.: KBLAS: an optimized library for dense matrix-vector multiplication on GPU accelerators. ACM Trans. Math. Softw. (accepted subject to revision) (2014). \n                      http:\/\/arxiv.org\/abs\/1410.1726"},{"key":"46_CR3","unstructured":"Antz, H., Tomov, S., Dongarra, J.: Implementing a Sparse Matrix Vector Product for the SELL-C\/SELL-C-\n                      \n                        \n                      \n                      $$\\sigma $$\n                      \n                        \n                          \u03c3\n                        \n                      \n                     formats on NVIDIA GPUs. Technical report (2014). \n                      http:\/\/www.icl.utk.edu\/sites\/icl\/files\/publications\/2014\/icl-utk-772-2014.pdf"},{"key":"46_CR4","doi-asserted-by":"crossref","unstructured":"Ashari, A., Sedaghati, N., Eisenlohr, J., Parthasarathy, S., Sadayappan, P.: Fast sparse matrix-vector multiplication on GPUs for graph applications. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2014, pp. 781\u2013792 (2014). \n                      http:\/\/dx.doi.org\/10.1109\/SC.2014.69","DOI":"10.1109\/SC.2014.69"},{"key":"46_CR5","unstructured":"Balay, S., Abhyankar, S., Adams, M.F., Brown, J., Brune, P., Buschelman, K., Eijkhout, V., Gropp, W.D., Kaushik, D., Knepley, M.G., McInnes, L.C., Rupp, K., Smith, B.F., Zhang, H.: PETSc Web page (2014). \n                      http:\/\/www.mcs.anl.gov\/petsc"},{"key":"46_CR6","unstructured":"Bell, N., Garland, M.: CUSP: Generic Parallel Algorithms for Sparse Matrix and Graph Computations. \n                      http:\/\/cusplibrary.github.io\/"},{"key":"46_CR7","unstructured":"Bell, N., Garland, M.: Implementing sparse matrix-vector multiplication on throughput-oriented processors. In: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis, SC 2009, pp. 18:1\u201318:11. ACM, New York (2009). \n                      http:\/\/doi.acm.org\/10.1145\/1654059.1654078"},{"key":"46_CR8","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718942","volume-title":"Computational Methods for Multiphase Flows in Porous Media","author":"Z Chen","year":"2006","unstructured":"Chen, Z., Huan, G., Ma, Y.: Computational Methods for Multiphase Flows in Porous Media. Society for Industrial and Applied Mathematics, Philadelphia (2006)"},{"key":"46_CR9","unstructured":"Choi, J.W., Singh, A., Vuduc, R.W.: Model-driven autotuning of sparse matrix-vector multiply on GPUs. In: Proceedings of the 15th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2010, pp. 115\u2013126. ACM, New York (2010). \n                      http:\/\/doi.acm.org\/10.1145\/1693453.1693471"},{"key":"46_CR10","unstructured":"Godwin, J., Holewinski, J., Sadayappan, P.: High-performance sparse matrix-vector multiplication on GPUs for structured grid computations. In: Proceedings of the 5th Annual Workshop on General Purpose Processing with Graphics Processing Units, GPGPU-5, pp. 47\u201356. ACM, New York (2012). \n                      http:\/\/doi.acm.org\/10.1145\/2159430.2159436"},{"key":"46_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/3-540-45545-0_22","volume-title":"Computational Science - ICCS 2001","author":"E-J Im","year":"2001","unstructured":"Im, E.-J., Yelick, K.A.: Optimizing sparse matrix computations for register reuse in SPARSITY. In: Alexandrov, V.N., Dongarra, J., Juliano, B.A., Renner, R.S., Tan, C.J.K. (eds.) ICCS-ComputSci 2001. LNCS, vol. 2073, pp. 127\u2013136. Springer, Heidelberg (2001). \n                      http:\/\/dx.doi.org\/10.1007\/3-540-45545-0_22"},{"issue":"1","key":"46_CR12","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1177\/1094342004041296","volume":"18","author":"EJ Im","year":"2004","unstructured":"Im, E.J., Yelick, K., Vuduc, R.: Sparsity: optimization framework for sparse matrix kernels. Int. J. High Perform. Comput. Appl. 18(1), 135\u2013158 (2004). \n                      http:\/\/dx.doi.org\/10.1177\/1094342004041296","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"46_CR13","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1177\/1094342012468181","volume":"27","author":"DE Keyes","year":"2013","unstructured":"Keyes, D.E., McInnes, L.C., Woodward, C., Gropp, W.D., Myra, E., Pernice, M., Bell, J., Brown, J., Clo, A., Connors, J., Constantinescu, E., Estep, D., Evans, K., Farhat, C., Hakim, A., Hammond, G., Hansen, G., Hill, J., Isaac, T., Jiao, X., Jordan, K., Kaushik, D., Kaxiras, E., Koniges, A., Lee, K., Lott, A., Lu, Q., Magerlein, J., Maxwell, R., McCourt, M., Mehl, M., Pawlowski, R., Peters, A., Reynolds, D., Riviere, B., R\u00fcde, U., Scheibe, T., Shadid, J., Sheehan, B., Shephard, M., Siegel, A., Smith, B., Tang, X., Wilson, C., Wohlmuth, B.: Multiphysics simulations: challenges and opportunities. Int. J. High Perform. Comput. Appl. 27, 4\u201383 (2013)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"46_CR14","unstructured":"Kincaid, D., Oppe, T., Young, D.: ITPACKV 2D User\u2019s Guide (1989). \n                      http:\/\/www.ma.utexas.edu\/CNA\/ITPACK\/manuals\/userv2d\/"},{"issue":"5","key":"46_CR15","doi-asserted-by":"publisher","first-page":"C401","DOI":"10.1137\/130930352","volume":"36","author":"M Kreutzer","year":"2014","unstructured":"Kreutzer, M., Hager, G., Wellein, G., Fehske, H., Bishop, A.: A unified sparse matrix data format for efficient general sparse matrix-vector multiplication on modern processors with wide SIMD units. SIAM J. Sci. Comput. 36(5), C401\u2013C423 (2014). \n                      http:\/\/dx.doi.org\/10.1137\/130930352","journal-title":"SIAM J. Sci. Comput."},{"key":"46_CR16","doi-asserted-by":"crossref","unstructured":"Kreutzer, M., Hager, G., Wellein, G., Fehske, H., Basermann, A., Bishop, A.R.: Sparse matrix-vector multiplication on GPGPU clusters: a new storage format and a scalable implementation. In: Proceedings of the 2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2012, pp. 1696\u20131702. IEEE Computer Society, Washington, DC (2012). \n                      http:\/\/dx.doi.org\/10.1109\/IPDPSW.2012.211","DOI":"10.1109\/IPDPSW.2012.211"},{"key":"46_CR17","unstructured":"Minden, V., Smith, B., Knepley, M.: Preliminary implementation of petsc using gpus. In: Proceedings of the 2010 International Workshop of GPU Solutions to Multiscale Problems in Science and Engineering (2010)"},{"key":"46_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1007\/978-3-642-11515-8_10","volume-title":"High Performance Embedded Architectures and Compilers","author":"A Monakov","year":"2010","unstructured":"Monakov, A., Lokhmotov, A., Avetisyan, A.: Automatically tuning sparse matrix-vector multiplication for GPU architectures. In: Patt, Y.N., Foglia, P., Duesterwald, E., Faraboschi, P., Martorell, X. (eds.) HiPEAC 2010. LNCS, vol. 5952, pp. 111\u2013125. Springer, Heidelberg (2010). \n                      http:\/\/dx.doi.org\/10.1007\/978-3-642-11515-8_10"},{"issue":"3","key":"46_CR19","doi-asserted-by":"publisher","first-page":"856","DOI":"10.1137\/0907058","volume":"7","author":"Y Saad","year":"1986","unstructured":"Saad, Y., Schultz, M.: GMRES: a generalized minimal residual algorithm for solving nonsymmetric linear systems. SIAM J. Sci. Stat. Comput. 7(3), 856\u2013869 (1986). \n                      http:\/\/dx.doi.org\/10.1137\/0907058","journal-title":"SIAM J. Sci. Stat. Comput."},{"issue":"8","key":"46_CR20","doi-asserted-by":"publisher","first-page":"815","DOI":"10.1002\/cpe.1658","volume":"23","author":"F V\u00e1zquez","year":"2011","unstructured":"V\u00e1zquez, F., Fern\u00e1ndez, J.J., Garz\u00f3n, E.M.: A new approach for sparse matrix vector product on NVIDIA GPUs. Concurrency Comput. Pract. Experience 23(8), 815\u2013826 (2011). \n                      http:\/\/dx.doi.org\/10.1002\/cpe.1658","journal-title":"Concurrency Comput. Pract. Experience"},{"key":"46_CR21","volume-title":"Combustion Theory","author":"FA Williams","year":"1985","unstructured":"Williams, F.A.: Combustion Theory. Benjamin\/Cummings, Menlo Park (1985)"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2015: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-48096-0_46","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:35:52Z","timestamp":1587342952000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-48096-0_46"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783662480953","9783662480960"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-48096-0_46","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"25 July 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}