{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,31]],"date-time":"2025-08-31T10:01:55Z","timestamp":1756634515854,"version":"3.41.0"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319586663"},{"type":"electronic","value":"9783319586670"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-58667-0_9","type":"book-chapter","created":{"date-parts":[[2017,5,11]],"date-time":"2017-05-11T15:27:38Z","timestamp":1494516458000},"page":"158-178","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["A Framework for Out of Memory SVD Algorithms"],"prefix":"10.1007","author":[{"given":"Khairul","family":"Kabir","sequence":"first","affiliation":[]},{"given":"Azzam","family":"Haidar","sequence":"additional","affiliation":[]},{"given":"Stanimire","family":"Tomov","sequence":"additional","affiliation":[]},{"given":"Aurelien","family":"Bouteiller","sequence":"additional","affiliation":[]},{"given":"Jack","family":"Dongarra","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,5,12]]},"reference":[{"key":"9_CR1","unstructured":"Anderson, E., Bai, Z., Bischof, C., Blackford, L.S., Demmel, J.W., Dongarra, J.J., Du Croz, J., Greenbaum, A., Hammarling, S., McKenney, A., Sorensen, D.: LAPACK Users\u2019 Guide. SIAM, Philadelphia, (1992). http:\/\/www.netlib.org\/lapack\/lug\/"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Bischof, C., Lang, B., Sun, X.: Parallel tridiagonalization through two-step band reduction. In: Proceedings of the Scalable High-Performance Computing Conference, pp. 23\u201327. IEEE Computer Society Press (1994)","DOI":"10.1109\/SHPCC.1994.296622"},{"issue":"4","key":"9_CR3","doi-asserted-by":"publisher","first-page":"602","DOI":"10.1145\/365723.365736","volume":"26","author":"CH Bischof","year":"2000","unstructured":"Bischof, C.H., Lang, B., Sun, X.: Algorithm 807: the SBR toolbox\u2013software for successive band reduction. ACM TOMS 26(4), 602\u2013616 (2000)","journal-title":"ACM TOMS"},{"issue":"15","key":"9_CR4","doi-asserted-by":"publisher","first-page":"1481","DOI":"10.1002\/1096-9128(20001225)12:15<1481::AID-CPE540>3.0.CO;2-V","volume":"12","author":"EF D\u2019Azevedo","year":"2000","unstructured":"D\u2019Azevedo, E.F., Dongarra, J.: The design and implementation of the parallel out-of-core ScaLAPACK LU, QR, and Cholesky factorization routines. Concurr. - Pract. Exp. 12(15), 1481\u20131493 (2000)","journal-title":"Concurr. - Pract. Exp."},{"issue":"1\u20132","key":"9_CR5","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1016\/0377-0427(89)90367-1","volume":"27","author":"JJ Dongarra","year":"1989","unstructured":"Dongarra, J.J., Sorensen, D.C., Hammarling, S.J.: Block reduction of matrices to condensed forms for eigenvalue computations. J. Comput. Appl. Math. 27(1\u20132), 215\u2013227 (1989)","journal-title":"J. Comput. Appl. Math."},{"issue":"7","key":"9_CR6","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/S0898-1221(98)00029-7","volume":"35","author":"JJ Dongarra","year":"1998","unstructured":"Dongarra, J.J., Hammarling, S., Walker, D.W.: Key concepts for parallel out-of-core LU factorization. Comput. Math. Appl. 35(7), 13\u201331 (1998)","journal-title":"Comput. Math. Appl."},{"key":"9_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/10703040_3","volume-title":"Vector and Parallel Processing \u2013 VECPAR\u201998","author":"WN Gansterer","year":"1999","unstructured":"Gansterer, W.N., Kvasnicka, D.F., Ueberhuber, C.W.: Multi-sweep algorithms for the symmetric eigenproblem. In: Hern\u00e1ndez, V., Palma, J.M.L.M., Dongarra, J.J. (eds.) VECPAR 1998. LNCS, vol. 1573, pp. 20\u201328. Springer, Heidelberg (1999). doi:10.1007\/10703040_3"},{"key":"9_CR8","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1016\/0021-9991(87)90178-1","volume":"69","author":"R Grimes","year":"1987","unstructured":"Grimes, R., Krakauer, H., Lewis, J., Simon, H., Wei, S.-H.: The solution of large dense generalized eigenvalue problems on the cray X-MP\/24 with SSD. J. Comput. Phys. 69, 471\u2013481 (1987)","journal-title":"J. Comput. Phys."},{"key":"9_CR9","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1145\/44128.44130","volume":"14","author":"RG Grimes","year":"1988","unstructured":"Grimes, R.G., Simon, H.D.: Solution of large, dense symmetric generalized eigenvalue problems using secondary storage. ACM Trans. Math. Softw. 14, 241\u2013256 (1988)","journal-title":"ACM Trans. Math. Softw."},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Haidar, A., Tomov, S., Dongarra, J., Solca, R., Schulthess, T.: A novel hybrid CPU-GPU generalized eigensolver for electronic structure calculations based on fine grained memory aware tasks. Int. J. High Perform. Comput. Appl. (2012, accepted)","DOI":"10.1177\/1094342013502097"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Haidar, A., Kurzak, J., Luszczek, P.: An improved parallel singular value algorithm and its implementation for multicore hardware. In: SC 2012: The International Conference for High Performance Computing, Networking, Storage and Analysis (2013)","DOI":"10.1145\/2503210.2503292"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Haidar, A., Ltaief, H., Dongarra, J.: Parallel reduction to condensed forms for symmetric eigenvalue problems using aggregated fine-grained and memory-aware kernels. In: Proceedings of SC 2011, pp. 8:1\u20138:11. ACM, New York (2011)","DOI":"10.1145\/2063384.2063394"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Haidar, A., Ltaief, H., Luszczek, P., Dongarra, J.: A comprehensive study of task coalescing for selecting parallelism granularity in a two-stage bidiagonal reduction. In: Proceedings of the IEEE International Parallel and Distributed Processing Symposium, Shanghai, China, 21\u201325 May 2012. ISBN 978-1-4673-0975-2","DOI":"10.1109\/IPDPS.2012.13"},{"key":"9_CR14","doi-asserted-by":"publisher","first-page":"1320","DOI":"10.1137\/0914078","volume":"14","author":"B Lang","year":"1993","unstructured":"Lang, B.: A parallel algorithm for reducing symmetric banded matrices to tridiagonal form. SIAM J. Sci. Comput. 14, 1320\u20131338 (1993)","journal-title":"SIAM J. Sci. Comput."},{"key":"9_CR15","doi-asserted-by":"crossref","unstructured":"Ltaief, H., Luszczek, P., Dongarra, J.: High performance bidiagonal reduction using tile algorithms on homogeneous multicore architectures. ACM TOMS, 39(3) (2013, in publication)","DOI":"10.1145\/2450153.2450154"},{"key":"9_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"661","DOI":"10.1007\/978-3-642-31464-3_67","volume-title":"Parallel Processing and Applied Mathematics","author":"H Ltaief","year":"2012","unstructured":"Ltaief, H., Luszczek, P., Dongarra, J.: Enhancing parallelism of tile bidiagonal transformation on multicore architectures using tree reduction. In: Wyrzykowski, R., Dongarra, J., Karczewski, K., Wa\u015bniewski, J. (eds.) PPAM 2011. LNCS, vol. 7203, pp. 661\u2013670. Springer, Heidelberg (2012). doi:10.1007\/978-3-642-31464-3_67"},{"key":"9_CR17","unstructured":"Rabani, E., Toledo, S.: Out-of-core SVD and QR decompositions. In: PPSC (2001)"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Toledo, S., Gustavson, F.G.: The design and implementation of SOLAR, a portable library for scalable out-of-core linear algebra computations. In: Proceedings of the Fourth Workshop on I\/O in Parallel and Distributed Systems: Part of the Federated Computing Research Conference, IOPADS 1996, pp. 28\u201340. ACM, New York (1996)","DOI":"10.1145\/236017.236029"},{"key":"9_CR19","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.procs.2012.04.005","volume":"9","author":"I Yamazaki","year":"2012","unstructured":"Yamazaki, I., Tomov, S., Dongarra, J.: One-sided dense matrix factorizations on a multicore with multiple GPU accelerators*. Procedia Comput. Sci. 9, 37\u201346 (2012)","journal-title":"Procedia Comput. Sci."},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Yamazaki, I., Tomov, S., Dongarra, J.: Non-GPU-resident dense symmetric indefinite factorization. Concurr. Comput.: Pract. Exp. (2016)","DOI":"10.1002\/cpe.4012"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-58667-0_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T12:54:20Z","timestamp":1750251260000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-58667-0_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319586663","9783319586670"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-58667-0_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"12 May 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}