{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T19:42:03Z","timestamp":1772912523713,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,10,11]],"date-time":"2016-10-11T00:00:00Z","timestamp":1476144000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51539002"],"award-info":[{"award-number":["51539002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51509020"],"award-info":[{"award-number":["51509020"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the Fundamental Research Funds for Central Public Welfare Research Institutes","award":["CKSF2015033\/CL"],"award-info":[{"award-number":["CKSF2015033\/CL"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2017,1]]},"DOI":"10.1007\/s11227-016-1887-4","type":"journal-article","created":{"date-parts":[[2016,10,11]],"date-time":"2016-10-11T04:43:09Z","timestamp":1476160989000},"page":"433-454","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["A Jacobi_PCG solver for sparse linear systems on multi-GPU cluster"],"prefix":"10.1007","volume":"73","author":[{"given":"Shaozhong","family":"Lin","sequence":"first","affiliation":[]},{"given":"Zhiqiang","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,10,11]]},"reference":[{"key":"1887_CR1","unstructured":"Nvidia (2007) NVIDIA CUDA Compute unified device architecture programming guide. NVIDIA Corporation. http:\/\/developer.download.nvidia.com\/compute\/cuda\/1.0\/NVIDIA_CUDA_Programming_Guide_1.0.pdf"},{"key":"1887_CR2","doi-asserted-by":"crossref","DOI":"10.1137\/1.9780898718003","volume-title":"Iterative methods for sparse linear systems","author":"Y Saad","year":"2003","unstructured":"Saad Y (2003) Iterative methods for sparse linear systems. SIAM, Philadelpha"},{"issue":"6","key":"1887_CR3","doi-asserted-by":"crossref","first-page":"1394","DOI":"10.1137\/0724090","volume":"24","author":"EL Poole","year":"1987","unstructured":"Poole EL, Ortega JM (1987) Multicolor ICCG methods for vector computers. SIAM J Numer Anal 24(6):1394\u20131418","journal-title":"SIAM J Numer Anal"},{"issue":"8","key":"1887_CR4","doi-asserted-by":"crossref","first-page":"675","DOI":"10.1016\/S1383-7621(99)00036-3","volume":"46","author":"P Gonz\u00e1lez","year":"2000","unstructured":"Gonz\u00e1lez P, Cabaleiro JC, Pena TF (2000) On parallel solvers for sparse triangular systems. J Syst Archit 46(8):675\u2013685","journal-title":"J Syst Archit"},{"issue":"6","key":"1887_CR5","first-page":"79","volume":"22","author":"SZ Lin","year":"2013","unstructured":"Lin SZ, Xu HW, Xie ZQ (2013) Hybrid programming implementation of MPI + OpenMP on multicolor SSOR-PCG. Comput Aided Eng 22(6):79\u201383 (in Chinese)","journal-title":"Comput Aided Eng"},{"issue":"2","key":"1887_CR6","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1007\/s11227-012-0825-3","volume":"63","author":"RP Li","year":"2013","unstructured":"Li RP, Saad Y (2013) GPU-accelerated preconditioned iterative linear solvers. J Supercomput 63(2):443\u2013466","journal-title":"J Supercomput"},{"issue":"4","key":"1887_CR7","first-page":"843","volume":"52","author":"Y Chen","year":"2015","unstructured":"Chen Y, Zhao YH, Zhao W, Zhao L (2015) GPU-accelerated incomplete Cholesky factorization preconditioned conjugate gradient method. J Comput Res Dev 52(4):843\u2013850 (in Chinese)","journal-title":"J Comput Res Dev"},{"issue":"3","key":"1887_CR8","doi-asserted-by":"crossref","first-page":"917","DOI":"10.1145\/882262.882364","volume":"22","author":"J Bolz","year":"2003","unstructured":"Bolz J, Farmer I, Grinspun E (2003) Sparse matrix solvers on the GPU: conjugate gradients and multigrid. Acm Trans Graph 22(3):917\u2013924","journal-title":"Acm Trans Graph"},{"issue":"1\u20132","key":"1887_CR9","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1007\/s00450-010-0112-6","volume":"25","author":"A Cevahir","year":"2010","unstructured":"Cevahir A, Nukada A, Matsuoka S (2010) High performance conjugate gradient solver on multi-GPU clusters using hypergraph partitioning. Comput Sci Res Dev 25(1\u20132):83\u201391","journal-title":"Comput Sci Res Dev"},{"key":"1887_CR10","volume-title":"Concurrent number cruncher: an efficient sparse linear solver on the GPU. High performance computing and communications","author":"L Buatois","year":"2007","unstructured":"Buatois L, Caumon G, L\u00e9vy B (2007) Concurrent number cruncher: an efficient sparse linear solver on the GPU. High performance computing and communications. Springer, Berlin"},{"key":"1887_CR11","unstructured":"Georgescu S, Okuda H (2007) Conjugate gradients on graphic hardware: performance & feasibility. http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.163.8861&rep=rep1&type=pdf"},{"key":"1887_CR12","unstructured":"Agullo E, Giraud L, Guermouche A et al (2012) Task-based conjugate-gradient for multi-GPUs platforms. RR-8192, INRIA"},{"issue":"3","key":"1887_CR13","first-page":"825","volume":"33","author":"JF Zhang","year":"2013","unstructured":"Zhang JF, Shen DF (2013) GPU-based preconditioned conjugate gradient method for solving sparse linear systems. J Comput Appl 33(3):825\u2013829 (in Chinese)","journal-title":"J Comput Appl"},{"issue":"1","key":"1887_CR14","doi-asserted-by":"crossref","first-page":"200","DOI":"10.1007\/s11227-014-1143-8","volume":"69","author":"LZ Khodja","year":"2014","unstructured":"Khodja LZ, Couturier R, Giersch A et al (2014) Parallel sparse linear solver with GMRES method using minimization techniques of communications for GPU clusters. J Supercomput 69(1):200\u2013224","journal-title":"J Supercomput"},{"issue":"2","key":"1887_CR15","doi-asserted-by":"crossref","first-page":"327","DOI":"10.1007\/s10586-013-0279-2","volume":"17","author":"C Chen","year":"2014","unstructured":"Chen C, Taha TM (2014) A communication reduction approach to iteratively solve large sparse linear systems on a GPGPU cluster. Clust Comput 17(2):327\u2013337","journal-title":"Clust Comput"},{"key":"1887_CR16","unstructured":"Wolfe M (2014) CUDA Fortran programming guide and reference. The Portland Group, Release. http:\/\/www.pgroup.com\/support\/"},{"key":"1887_CR17","unstructured":"Forum MPI (2012) MPI: a message-passing interface standard. Version 3.0, September. http:\/\/www.mpi-forum.org\/docs\/mpi-3.0\/mpi30-report.pdf"},{"key":"1887_CR18","volume-title":"CUDA Fortran for scientists and engineers: best practices for efficient CUDA Fortran programming","author":"G Ruetsch","year":"2013","unstructured":"Ruetsch G, Fatica M (2013) CUDA Fortran for scientists and engineers: best practices for efficient CUDA Fortran programming. Elsevier, Amsterdam"},{"key":"1887_CR19","unstructured":"Harris M (2007) Optimizing parallel reduction in CUDA. Nvidia developer technology"},{"key":"1887_CR20","unstructured":"Bell N, Garland M (2008) Efficient sparse matrix\u2013vector multiplication on CUDA. Nvidia Technical Report NVR-2008-004, Nvidia Corporation"},{"key":"1887_CR21","unstructured":"Vazquez F, Garzon EM, Martinez JA et al (2009) The sparse matrix vector product on GPUs. In: Proceedings of the 2009 International Conference on Computational and Mathematical Methods in Science and Engineering, vol 2, pp 1081\u20131092"},{"key":"1887_CR22","unstructured":"Wafai M (2009) Sparse matrix vector multiplications on graphics processors. University of Stuttgart"},{"key":"1887_CR23","volume-title":"Computer solution of large sparse positive definite systems","author":"A George","year":"1981","unstructured":"George A, Liu JW (1981) Computer solution of large sparse positive definite systems. Prentice-Hall, Englewood Cliffs, New Jersey"},{"key":"1887_CR24","doi-asserted-by":"crossref","unstructured":"Cuthill E, McKee J (1969) Reducing the bandwidth of sparse symmetric matrices. In: Proceedings of the 24th National Conference. ACM, pp 157\u2013172","DOI":"10.1145\/800195.805928"},{"key":"1887_CR25","volume-title":"The art of computer programming: sorting and searching","author":"DE Knuth","year":"1998","unstructured":"Knuth DE (1998) The art of computer programming: sorting and searching. Addison-Wesley, Boston, New York"},{"key":"1887_CR26","unstructured":"https:\/\/en.wikipedia.org\/wiki\/Counting_sort"},{"key":"1887_CR27","unstructured":"Mathew C (2011) Multi-GPU programming with CUDA Fortran, MPI, and GPUDirect-Part 1. http:\/\/www.pgroup.com\/lit\/articles\/insider\/v3n3a2.htm"},{"key":"1887_CR28","unstructured":"Micikevicius P Multi-GPU programming. http:\/\/on-demand.gputechconf.com\/gtc\/2012\/presentations\/S0515-GTC2012-Multi-GPU-Programming.pdf"},{"key":"1887_CR29","doi-asserted-by":"crossref","unstructured":"Jacobsen DA, Thibault JC, Senocak I (2010) An MPI-CUDA implementation for massively parallel incompressible flow computations on Multi-GPU clusters. In: 48th AIAA Aerospace Sciences Meeting and Exhibit, Orlando, FL., Jan 2010","DOI":"10.2514\/6.2010-522"},{"issue":"1","key":"1887_CR30","doi-asserted-by":"crossref","first-page":"1093","DOI":"10.1016\/j.procs.2010.04.121","volume":"1","author":"P Macio\u0142","year":"2010","unstructured":"Macio\u0142 P, P\u0142aszewski P, Bana\u015b K (2010) 3D finite element numerical integration on GPUs. Procedia Comput Sci 1(1):1093\u20131100","journal-title":"Procedia Comput Sci"},{"key":"1887_CR31","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1016\/j.cam.2013.09.001","volume":"257","author":"ZS Fu","year":"2014","unstructured":"Fu ZS, Lewis TJ, Kirby RM et al (2014) Architecting the finite element method pipeline for the GPU. J Comput Appl Math 257:195\u2013211","journal-title":"J Comput Appl Math"},{"key":"1887_CR32","unstructured":"DeConinck A (2014) Tools and tips for managing a gpu cluster. In: GPU Technology Conference http:\/\/on-demand.gputechconf.com\/gtc\/2014\/presentations\/S4253-tools-tips-for-managing-a-gpu-cluster.pdf"},{"key":"1887_CR33","doi-asserted-by":"crossref","unstructured":"Thapliyal H, Arabnia HR, Vinod AP (2006) Combined integer and floating point multiplication architecture (CIFM) for FPGAs and its reversible logic implementation. In: Proceedings of 49th IEEE International Midwest Symposium on Circuits and Systems. IEEE, pp 148\u2013154","DOI":"10.1109\/MWSCAS.2006.382306"},{"key":"1887_CR34","unstructured":"Thapliyal H, Arabnia HR, Bajpai R et al (2007) Combined integer and variable precision (CIVP) floating point multiplication architecture for FPGAs. In: Proceedings of 2007 International Conference on Parallel & Distributed Processing Techniques & Applications (PDPTA\u201907), pp 449\u2013450"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-016-1887-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-016-1887-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-016-1887-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T20:49:01Z","timestamp":1498337341000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-016-1887-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,10,11]]},"references-count":34,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,1]]}},"alternative-id":["1887"],"URL":"https:\/\/doi.org\/10.1007\/s11227-016-1887-4","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,10,11]]}}}