{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,7,16]],"date-time":"2023-07-16T17:40:13Z","timestamp":1689529213142},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2014,9,20]],"date-time":"2014-09-20T00:00:00Z","timestamp":1411171200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,2]]},"DOI":"10.1007\/s11227-014-1299-2","type":"journal-article","created":{"date-parts":[[2014,9,19]],"date-time":"2014-09-19T11:28:52Z","timestamp":1411126132000},"page":"369-390","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Efficiently solving tri-diagonal system by chunked cyclic reduction and single-GPU shared memory"],"prefix":"10.1007","volume":"71","author":[{"given":"Di","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Jinhang","family":"Yu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,9,20]]},"reference":[{"key":"1299_CR1","volume-title":"Matrix computations","author":"GH Golub","year":"1996","unstructured":"Golub GH, Van Loan CF (1996) Matrix computations. Johns Hopkins University Press, Baltimore"},{"key":"1299_CR2","doi-asserted-by":"crossref","unstructured":"Niemeyer K, Sung C-J (2014) Recent progress and challenges in exploiting graphics processors in computational fluid dynamics. J Supercomput 67(2):528\u2013564","DOI":"10.1007\/s11227-013-1015-7"},{"key":"1299_CR3","doi-asserted-by":"crossref","first-page":"439","DOI":"10.1016\/j.procs.2013.05.207","volume":"18","author":"Y Wang","year":"2013","unstructured":"Wang Y et al (2013) A parallel solver for incompressible fluid flows. Procedia Comput Sci 18:439\u2013448","journal-title":"Procedia Comput Sci"},{"key":"1299_CR4","doi-asserted-by":"crossref","first-page":"389","DOI":"10.1016\/j.procs.2013.05.202","volume":"18","author":"Z Wei","year":"2013","unstructured":"Wei Z et al (2013) Parallelizing alternating direction implicit solver on GPUs. Procedia Comput Sci 18:389\u2013398","journal-title":"Procedia Comput Sci"},{"key":"1299_CR5","doi-asserted-by":"crossref","DOI":"10.1007\/978-94-011-1112-6","volume-title":"Computational methods in solid mechanics","author":"A Curnier","year":"1994","unstructured":"Curnier A (1994) Computational methods in solid mechanics. Kluwer Academic, Dordrecht"},{"key":"1299_CR6","doi-asserted-by":"crossref","DOI":"10.1142\/4134","volume-title":"Classical and computational solid mechanics","author":"Y Fung","year":"2001","unstructured":"Fung Y, Tong P (2001) Classical and computational solid mechanics. World Scientific, Singapore"},{"key":"1299_CR7","volume-title":"Computational fluid and solid mechanics","author":"KJ Bathe","year":"2001","unstructured":"Bathe KJ (2001) Computational fluid and solid mechanics. Elsevier, Amsterdam"},{"key":"1299_CR8","volume-title":"Computational electromagnetics","author":"T Rylander","year":"2012","unstructured":"Rylander T, Bondeson A, Ingelstr\u00f6m P (2012) Computational electromagnetics. Springer, Berlin"},{"key":"1299_CR9","doi-asserted-by":"crossref","DOI":"10.1002\/9780470829646","volume-title":"Essentials of computational electromagnetics","author":"XQ Sheng","year":"2012","unstructured":"Sheng XQ, Song W (2012) Essentials of computational electromagnetics. Wiley, New York"},{"key":"1299_CR10","volume-title":"Computational finance: numerical methods for pricing financial instruments","author":"G Levy","year":"2004","unstructured":"Levy G (2004) Computational finance: numerical methods for pricing financial instruments. Elsevier Butterworth-Heinemann, Oxford"},{"key":"1299_CR11","volume-title":"Computational finance: a scientific perspective","author":"CA Los","year":"2001","unstructured":"Los CA (2001) Computational finance: a scientific perspective. World Scientific, Singapore"},{"key":"1299_CR12","volume-title":"Handbook of computational finance","author":"JC Duan","year":"2011","unstructured":"Duan JC, H\u00e4rdle W, Gentle JE (2011) Handbook of computational finance. Springer, Berlin"},{"key":"1299_CR13","volume-title":"Computational finance using C and C#","author":"G Levy","year":"2008","unstructured":"Levy G (2008) Computational finance using C and C#. Elsevier, Amsterdam"},{"key":"1299_CR14","volume-title":"Financial engineering and computation: principles, mathematics, algorithms","author":"YD Lyuu","year":"2002","unstructured":"Lyuu YD (2002) Financial engineering and computation: principles, mathematics, algorithms. Cambridge University Press, Cambridge"},{"key":"1299_CR15","volume-title":"GPU Gems 3","author":"H Nguyuen","year":"2008","unstructured":"Nguyuen H, Corporation N (2008) GPU Gems 3. Addison Wesley Professional, Reading"},{"key":"1299_CR16","volume-title":"GPU Gems 2: programming techniques for high-performance graphics and general-purpose computation","author":"M Pharr","year":"2005","unstructured":"Pharr M, Fernando R (2005) GPU Gems 2: programming techniques for high-performance graphics and general-purpose computation. Pearson Addison Wesley Professional, Reading"},{"key":"1299_CR17","volume-title":"Parallel computers 2: architecture, programming, and algorithms","author":"RW Hockney","year":"1988","unstructured":"Hockney RW, Jesshope CR (1988) Parallel computers 2: architecture, programming, and algorithms. A. Hilger, London"},{"issue":"4","key":"1299_CR18","doi-asserted-by":"crossref","first-page":"761","DOI":"10.1137\/0909050","volume":"9","author":"R Sweet","year":"1988","unstructured":"Sweet R (1988) A parallel and vector variant of the cyclic reduction algorithm. SIAM J Sci Stat Comput 9(4):761\u2013765","journal-title":"SIAM J Sci Stat Comput"},{"issue":"11","key":"1299_CR19","doi-asserted-by":"crossref","first-page":"1273","DOI":"10.1016\/0167-8191(93)90031-F","volume":"19","author":"P Amodio","year":"1993","unstructured":"Amodio P, Mastronardi N (1993) A parallel version of the cyclic reduction algorithm on a hypercube. Parallel Comput 19(11):1273\u20131281","journal-title":"Parallel Comput"},{"issue":"11","key":"1299_CR20","doi-asserted-by":"crossref","first-page":"1769","DOI":"10.1016\/0167-8191(95)00033-0","volume":"21","author":"N Mattor","year":"1995","unstructured":"Mattor N, Williams TJ, Hewett DW (1995) Algorithm for solving tri-diagonal matrix problems in parallel. Parallel Comput 21(11):1769\u20131782","journal-title":"Parallel Comput"},{"issue":"4","key":"1299_CR21","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1145\/355656.355657","volume":"1","author":"HS Stone","year":"1975","unstructured":"Stone HS (1975) Parallel tri-diagonal equation solvers. ACM Trans Math Softw 1(4):289\u2013307","journal-title":"ACM Trans Math Softw"},{"issue":"3","key":"1299_CR22","doi-asserted-by":"crossref","first-page":"661","DOI":"10.1137\/0726039","volume":"26","author":"H Schwandt","year":"1989","unstructured":"Schwandt H (1989) Cyclic reduction for tri-diagonal systems of equations with interval coefficients on vector computers. SIAM J Numer Anal 26(3):661\u2013680","journal-title":"SIAM J Numer Anal"},{"key":"1299_CR23","doi-asserted-by":"crossref","unstructured":"Allmann S, Rauber T, Runger G (2001) Cyclic reduction on distributed shared memory machines. In: Proceedings of ninth Euromicro workshop on parallel and distributed processing, 2001","DOI":"10.1109\/EMPDP.2001.905055"},{"issue":"5","key":"1299_CR24","doi-asserted-by":"crossref","first-page":"545","DOI":"10.1016\/0167-8191(93)90005-6","volume":"19","author":"MP Bekakos","year":"1993","unstructured":"Bekakos MP, Evans DJ (1993) Parallel cyclic odd\u2013even reduction algorithms for solving Toeplitz tri-diagonal equations on MIMD computers. Parallel Comput 19(5):545\u2013561","journal-title":"Parallel Comput"},{"issue":"2","key":"1299_CR25","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1016\/0167-8191(89)90014-8","volume":"10","author":"E Gallopoulos","year":"1989","unstructured":"Gallopoulos E, Saad Y (1989) A parallel block cyclic reduction algorithm for the fast solution of elliptic equations. Parallel Comput 10(2):143\u2013159","journal-title":"Parallel Comput"},{"issue":"4","key":"1299_CR26","doi-asserted-by":"crossref","first-page":"706","DOI":"10.1137\/0714048","volume":"14","author":"R Sweet","year":"1977","unstructured":"Sweet R (1977) A cyclic reduction algorithm for solving block tri-diagonal systems of arbitrary dimension. SIAM J Numer Anal 14(4):706\u2013720","journal-title":"SIAM J Numer Anal"},{"issue":"2","key":"1299_CR27","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1016\/j.jpdc.2012.10.003","volume":"73","author":"SK Seal","year":"2013","unstructured":"Seal SK, Perumalla KS, Hirshman SP (2013) Revisiting parallel cyclic reduction and parallel prefix-based algorithms for block tri-diagonal systems of equations. J Parallel Distrib Comput 73(2):273\u2013280","journal-title":"J Parallel Distrib Comput"},{"issue":"2","key":"1299_CR28","doi-asserted-by":"crossref","first-page":"170","DOI":"10.1145\/355945.355947","volume":"7","author":"HH Wang","year":"1981","unstructured":"Wang HH (1981) A parallel method for tri-diagonal equations. ACM Trans Math Softw 7(2):170\u2013183","journal-title":"ACM Trans Math Softw"},{"issue":"1","key":"1299_CR29","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1145\/321738.321741","volume":"20","author":"HS Stone","year":"1973","unstructured":"Stone HS (1973) An efficient parallel algorithm for the solution of a tri-diagonal linear system of equations. J ACM 20(1):27\u201338","journal-title":"J ACM"},{"issue":"1","key":"1299_CR30","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1137\/S0895479891220533","volume":"15","author":"S Bondeli","year":"1994","unstructured":"Bondeli S, Gander W (1994) Cyclic reduction for special tri-diagonal systems. SIAM J Matrix Anal Appl 15(1):321\u2013330","journal-title":"SIAM J Matrix Anal Appl"},{"issue":"3","key":"1299_CR31","doi-asserted-by":"crossref","first-page":"286","DOI":"10.1109\/12.127441","volume":"41","author":"S Xian-he","year":"1992","unstructured":"Xian-he S, Zhang H, Ni LM (1992) Efficient tri-diagonal solvers on multicomputers. IEEE Trans Comput 41(3):286\u2013296","journal-title":"IEEE Trans Comput"},{"issue":"6\u20137","key":"1299_CR32","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1016\/j.parco.2012.03.003","volume":"38","author":"F Arg\u00fcello","year":"2012","unstructured":"Arg\u00fcello F et al (2012) The split-and-merge method in general purpose computation on GPUs. Parallel Comput 38(6\u20137):277\u2013288","journal-title":"Parallel Comput"},{"issue":"5","key":"1299_CR33","doi-asserted-by":"crossref","first-page":"879","DOI":"10.1109\/JPROC.2008.917757","volume":"96","author":"JD Owens","year":"2008","unstructured":"Owens JD et al (2008) GPU computing. Proc IEEE 96(5):879\u2013899","journal-title":"Proc IEEE"},{"key":"1299_CR34","doi-asserted-by":"crossref","unstructured":"Volkov V, Demmel JW (2008) Benchmarking GPUs to tune dense linear algebra. In: Proceedings of the 2008 ACM\/IEEE conference on supercomputing. IEEE Press, Austin, pp 1\u201311","DOI":"10.1109\/SC.2008.5214359"},{"key":"1299_CR35","doi-asserted-by":"crossref","unstructured":"Zhang Y, Cohen J, Owens JD (2010) Fast tri-diagonal solvers on the GPU. In: Proceedings of the 15th ACM SIGPLAN symposium on principles and practice of parallel programming. ACM, Bangalore, pp 127\u2013136","DOI":"10.1145\/1693453.1693472"},{"key":"1299_CR36","unstructured":"Zhang Y, Cohen J, Davidson AA, Owens JD (2011) A hybrid method for solving tri-diagonal systems on the GPU. In: W-mW Hwu (ed) GPU computing gems, vol 2, chap 11. Morgan Kaufmann, Los Altos, pp 117\u2013132"},{"key":"1299_CR37","doi-asserted-by":"crossref","unstructured":"Zhang Y (2009) Fast tridiagonal solvers on GPU. In: GPU technology conference. San Jose, California","DOI":"10.1145\/1693453.1693472"},{"key":"1299_CR38","unstructured":"Davidson A, Yao Z, Owens JD (2011) An auto-tuned method for solving large tri-diagonal systems on the GPU. In: IEEE international symposium on parallel and distributed processing (IPDPS), 2011"},{"key":"1299_CR39","doi-asserted-by":"crossref","unstructured":"Davidson A, Owens JD (2011) Register packing for cyclic reduction: a case study. In: Proceedings of the fourth workshop on general purpose processing on graphics processing units. ACM, Newport Beach, pp 1\u20136","DOI":"10.1145\/1964179.1964185"},{"key":"1299_CR40","unstructured":"Chang L-W et al (2012) A scalable, numerically stable, high-performance tri-diagonal solver using GPUs. In: Proceedings of the international conference on high performance computing, networking, storage and analysis. IEEE Computer Society Press, Salt Lake City, pp 1\u201311"},{"key":"1299_CR41","unstructured":"Hee-Seok K et al. (2011) A scalable tri-diagonal solver for GPUs. In: International conference on parallel processing (ICPP), 2011"},{"key":"1299_CR42","unstructured":"Cuda C Programming, Version Guide, 5.5. (2013) Nvidia, Santa Clara"},{"key":"1299_CR43","volume-title":"CUDA by example: an introduction to general-purpose GPU programming","author":"J Sanders","year":"2010","unstructured":"Sanders J, Kandrot E (2010) CUDA by example: an introduction to general-purpose GPU programming. Pearson Education, Boston"},{"key":"1299_CR44","volume-title":"CUDA programming: a developer\u2019s guide to parallel computing with GPUs","author":"S Cook","year":"2013","unstructured":"Cook S (2013) CUDA programming: a developer\u2019s guide to parallel computing with GPUs. Morgan Kaufmann, Los Altos"},{"key":"1299_CR45","volume-title":"CUDA application design and development","author":"R Farber","year":"2011","unstructured":"Farber R (2011) CUDA application design and development. Morgan Kaufmann, Los Altos"},{"key":"1299_CR46","volume-title":"The CUDA handbook: a comprehensive guide to GPU programming","author":"N Wilt","year":"2013","unstructured":"Wilt N (2013) The CUDA handbook: a comprehensive guide to GPU programming. Pearson Education, Boston"},{"issue":"1","key":"1299_CR47","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1109\/TPDS.2010.61","volume":"22","author":"D Goeddeke","year":"2011","unstructured":"Goeddeke D, Strzodka R (2011) Cyclic reduction tri-diagonal solvers on GPUs applied to mixed-precision multigrid. IEEE Trans Parallel Distrib Syst 22(1):22\u201332","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"1299_CR48","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511812583","volume-title":"Parallel scientific computing in c++ and mpi: a seamless approach to parallel algorithms and their implementation","author":"GE Karniadakis","year":"2003","unstructured":"Karniadakis GE, Kirby RM (2003) Parallel scientific computing in c++ and mpi: a seamless approach to parallel algorithms and their implementation. Cambridge University Press, Cambridge"},{"issue":"145","key":"1299_CR49","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1090\/S0025-5718-1979-0514818-5","volume":"33","author":"PN Swarztrauber","year":"1979","unstructured":"Swarztrauber PN (1979) A parallel algorithm for solving general tri-diagonal equations. Math Comput 33(145):185\u2013199","journal-title":"Math Comput"},{"issue":"7","key":"1299_CR50","doi-asserted-by":"crossref","first-page":"925","DOI":"10.1016\/S0167-8191(01)00075-8","volume":"27","author":"HX Lin","year":"2001","unstructured":"Lin HX (2001) A unifying graph model for designing parallel algorithms for tri-diagonal systems. Parallel Comput 27(7):925\u2013939","journal-title":"Parallel Comput"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1299-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-014-1299-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1299-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,16]],"date-time":"2023-07-16T17:18:53Z","timestamp":1689527933000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-014-1299-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,9,20]]},"references-count":50,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2015,2]]}},"alternative-id":["1299"],"URL":"https:\/\/doi.org\/10.1007\/s11227-014-1299-2","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,9,20]]}}}