{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T20:33:37Z","timestamp":1771706017875,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2011,11,12]],"date-time":"2011-11-12T00:00:00Z","timestamp":1321056000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2011,11,12]]},"DOI":"10.1145\/2063384.2063392","type":"proceedings-article","created":{"date-parts":[[2011,11,8]],"date-time":"2011-11-08T13:32:09Z","timestamp":1320759129000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":36,"title":["Optimizing symmetric dense matrix-vector multiplication on GPUs"],"prefix":"10.1145","author":[{"given":"Rajib","family":"Nath","sequence":"first","affiliation":[{"name":"University of California, San Diego"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stanimire","family":"Tomov","sequence":"additional","affiliation":[{"name":"University of Tennessee, Knoxville"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tingxing \"Tim\"","family":"Dong","sequence":"additional","affiliation":[{"name":"University of Tennessee, Knoxville"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jack","family":"Dongarra","sequence":"additional","affiliation":[{"name":"University of Tennessee, Knoxville"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2011,11,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"CUDA CUBLAS Library. http:\/\/developer.download.nvidia.com.  CUDA CUBLAS Library. http:\/\/developer.download.nvidia.com."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/1413370.1413402"},{"key":"e_1_3_2_1_3_1","volume-title":"Multicores and Hybrid Architectures: an. Autotuned and Algorithmic Approach. Master Thesis","author":"Dense Linear R. Nath Accelerating","unstructured":"R. Nath Accelerating Dense Linear Algebra for GP Us , Multicores and Hybrid Architectures: an. Autotuned and Algorithmic Approach. Master Thesis , University of Tennessee , Knoxville, USA. R. Nath Accelerating Dense Linear Algebra for GPUs, Multicores and Hybrid Architectures: an. Autotuned and Algorithmic Approach. Master Thesis, University of Tennessee, Knoxville, USA."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-01970-8_89"},{"key":"e_1_3_2_1_5_1","unstructured":"S. Tomov R. Nath P. Du and J. Dongarra. MAGMA version 0.2 Users' Guide. http:\/\/icl.cs.utk.edu\/magma 11\/2009.  S. Tomov R. Nath P. Du and J. Dongarra. MAGMA version 0.2 Users' Guide. http:\/\/icl.cs.utk.edu\/magma 11\/2009."},{"key":"e_1_3_2_1_6_1","volume-title":"BLAS for GPUs","author":"Nath R.","year":"2010","unstructured":"R. Nath , S. Tomov , and J. Dongarra . BLAS for GPUs . Chapter 4, In Scientific Computing with Multicore and Accelerators, Computational Science Series, Chapman and Hall\/CRC , 2010 . R. Nath, S. Tomov, and J. Dongarra. BLAS for GPUs. Chapter 4, In Scientific Computing with Multicore and Accelerators, Computational Science Series, Chapman and Hall\/CRC, 2010."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342010385729"},{"key":"e_1_3_2_1_8_1","volume-title":"http:\/\/icl.cs.utk.edu\/magma","author":"MAGMA","year":"2010","unstructured":"MAGMA version 1.0RC2. http:\/\/icl.cs.utk.edu\/magma , 2010 . MAGMA version 1.0RC2. http:\/\/icl.cs.utk.edu\/magma, 2010."},{"key":"e_1_3_2_1_9_1","volume-title":"http:\/\/developer.download.nvidia.com","author":"Compute Unified Device NVIDIA CUDA","year":"2007","unstructured":"NVIDIA CUDA Compute Unified Device Architecture - Programming Guide . http:\/\/developer.download.nvidia.com , 2007 . NVIDIA CUDA Compute Unified Device Architecture - Programming Guide. http:\/\/developer.download.nvidia.com, 2007."},{"key":"e_1_3_2_1_10_1","volume-title":"Accelerating GPU Kernels for Dense Linear Algebra. Vecpar","author":"Nath R.","year":"2010","unstructured":"R. Nath , S. Tomov ., and J. Dongarra . Accelerating GPU Kernels for Dense Linear Algebra. Vecpar 2010 , Berkeley, CA , USA R. Nath, S. Tomov., and J. Dongarra. Accelerating GPU Kernels for Dense Linear Algebra. Vecpar 2010, Berkeley, CA, USA"},{"key":"e_1_3_2_1_11_1","volume-title":"LAPACK Users' Guide","author":"Anderson E.","year":"1992","unstructured":"E. Anderson , Z. Bai , C. Bischof , L. S. Blackford , J. W. Demmel , J. J. Dongarra , J. Du Croz , A. Greenbaum , S. Hammarling , A. McKenney , and D. Sorensen . LAPACK Users' Guide . SIAM , Philadelphia, PA , 1992 . E. Anderson, Z. Bai, C. Bischof, L. S. Blackford, J. W. Demmel, J. J. Dongarra, J. Du Croz, A. Greenbaum, S. Hammarling, A. McKenney, and D. Sorensen. LAPACK Users' Guide. SIAM, Philadelphia, PA, 1992."},{"key":"e_1_3_2_1_14_1","volume-title":"Dongarra Accelerating the reduction to upper Hessenberg, tridiagonal, and bidiagonal forms through hybrid GPU-based computing PARCO","author":"Tomov S.","year":"2010","unstructured":"S. Tomov , R. Nath , and J. Dongarra Accelerating the reduction to upper Hessenberg, tridiagonal, and bidiagonal forms through hybrid GPU-based computing PARCO 2010 . S. Tomov, R. Nath, and J. Dongarra Accelerating the reduction to upper Hessenberg, tridiagonal, and bidiagonal forms through hybrid GPU-based computing PARCO 2010."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/1087537"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840848"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/263580.263662"},{"key":"e_1_3_2_1_18_1","first-page":"1381","volume-title":"Proc. 1998 IEEE Intl. Conf. Acoustics Speech and Signal Processing","volume":"3","author":"Matteo","year":"1998","unstructured":"Matteo Frigo and Steven G. Johnson. FFTW: An Adaptive Software Architecture for the FFT . Proc. 1998 IEEE Intl. Conf. Acoustics Speech and Signal Processing , vol. 3 , IEEE, 1998 , pp. 1381 -- 1384 . Matteo Frigo and Steven G. Johnson. FFTW: An Adaptive Software Architecture for the FFT. Proc. 1998 IEEE Intl. Conf. Acoustics Speech and Signal Processing, vol. 3, IEEE, 1998, pp. 1381--1384."},{"key":"e_1_3_2_1_19_1","volume-title":"Tomov Fully Empirical Autotuned Dense QR Factorization For Multicore Architectures. Research Report INRIA, to appear in europar","author":"Agullo E.","year":"2011","unstructured":"E. Agullo , J. Dongarra , R. Nath , S. Tomov Fully Empirical Autotuned Dense QR Factorization For Multicore Architectures. Research Report INRIA, to appear in europar 2011 . E. Agullo, J. Dongarra, R. Nath, S. Tomov Fully Empirical Autotuned Dense QR Factorization For Multicore Architectures. Research Report INRIA, to appear in europar 2011."},{"key":"e_1_3_2_1_20_1","unstructured":"Parallel Linear Algebra for Scalable Multi-core Architectures (PLASMA) http:\/\/icl.cs.utk.edu\/plasma\/  Parallel Linear Algebra for Scalable Multi-core Architectures (PLASMA) http:\/\/icl.cs.utk.edu\/plasma\/"}],"event":{"name":"SC '11: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Seattle Washington","acronym":"SC '11","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IEEE-CS Computer Society"]},"container-title":["Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2063384.2063392","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2063384.2063392","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T10:06:07Z","timestamp":1750241167000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2063384.2063392"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,11,12]]},"references-count":18,"alternative-id":["10.1145\/2063384.2063392","10.1145\/2063384"],"URL":"https:\/\/doi.org\/10.1145\/2063384.2063392","relation":{},"subject":[],"published":{"date-parts":[[2011,11,12]]},"assertion":[{"value":"2011-11-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}