{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:25:03Z","timestamp":1768029903115,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,11,12]],"date-time":"2017-11-12T00:00:00Z","timestamp":1510444800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,11,12]]},"DOI":"10.1145\/3126908.3126941","type":"proceedings-article","created":{"date-parts":[[2017,11,8]],"date-time":"2017-11-08T21:02:30Z","timestamp":1510174950000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["Designing vector-friendly compact BLAS and LAPACK kernels"],"prefix":"10.1145","author":[{"given":"Kyungjoo","family":"Kim","sequence":"first","affiliation":[{"name":"Sandia National Laboratories"}]},{"given":"Timothy B.","family":"Costa","sequence":"additional","affiliation":[{"name":"Intel Corporation"}]},{"given":"Mehmet","family":"Deveci","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories"}]},{"given":"Andrew M.","family":"Bradley","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories"}]},{"given":"Simon D.","family":"Hammond","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories"}]},{"given":"Murat E.","family":"Guney","sequence":"additional","affiliation":[{"name":"Intel Corporation"}]},{"given":"Sarah","family":"Knepper","sequence":"additional","affiliation":[{"name":"Intel Corporation"}]},{"given":"Shane","family":"Story","sequence":"additional","affiliation":[{"name":"Intel Corporation"}]},{"given":"Sivasankaran","family":"Rajamanickam","sequence":"additional","affiliation":[{"name":"Sandia National Laboratories"}]}],"member":"320","published-online":{"date-parts":[[2017,11,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.05.302"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-41321-1_2"},{"key":"e_1_3_2_1_3_1","volume-title":"Anne Greenbaum, Sven Hammarling, Alan McKenney, and others.","author":"Anderson Edward","year":"1999","unstructured":"Edward Anderson , Zhaojun Bai , Christian Bischof , L Susan Blackford , James Demmel , Jack Dongarra , Jeremy Du Croz , Anne Greenbaum, Sven Hammarling, Alan McKenney, and others. 1999 . LAPACK Users' guide. SIAM. Edward Anderson, Zhaojun Bai, Christian Bischof, L Susan Blackford, James Demmel, Jack Dongarra, Jeremy Du Croz, Anne Greenbaum, Sven Hammarling, Alan McKenney, and others. 1999. LAPACK Users' guide. SIAM."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.2514\/6.2015-1893"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2014.07.003"},{"key":"e_1_3_2_1_6_1","volume-title":"http:\/\/docs.nvidia.com\/cuda\/cublas\/index.html, (last accessed","author":"Toolkit Documentation CUDA","year":"2017","unstructured":"CUDA Toolkit Documentation . 2017. http:\/\/docs.nvidia.com\/cuda\/cublas\/index.html, (last accessed Mar 2017 ). (2017). CUDA Toolkit Documentation. 2017. http:\/\/docs.nvidia.com\/cuda\/cublas\/index.html, (last accessed Mar 2017). (2017)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1137\/15M1017946"},{"key":"e_1_3_2_1_8_1","unstructured":"Jack Dongarra Iain Duff Mark Gates Azzam Haidar Sven Hammarling Nicholas J Higham Jonathon Hogg Pedro Valero-Lara Samuel D Relton Stanimire Tomov and others. 2016. A proposed API for batched basic linear algebra subprograms. (2016).  Jack Dongarra Iain Duff Mark Gates Azzam Haidar Sven Hammarling Nicholas J Higham Jonathon Hogg Pedro Valero-Lara Samuel D Relton Stanimire Tomov and others. 2016. A proposed API for batched basic linear algebra subprograms. (2016)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/77626.79170"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1356052.1356053"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1377603.1377607"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.2172\/1221578"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/3014904.3015017"},{"key":"e_1_3_2_1_14_1","volume-title":"TR-16-13","author":"Huang Jianyu","unstructured":"Jianyu Huang and Robert A Van de Geijn . 2016. BLISlab: A Sandbox for Optimizing GEMM. FLAME Working Note #80 , TR-16-13 . The University of Texas at Austin. Jianyu Huang and Robert A Van de Geijn. 2016. BLISlab: A Sandbox for Optimizing GEMM. FLAME Working Note #80, TR-16-13. The University of Texas at Austin."},{"key":"e_1_3_2_1_15_1","volume-title":"https:\/\/software.intel.com\/en-us\/intel-mkl, (last accessed","author":"Kernel Library Intel Math","year":"2017","unstructured":"Intel Math Kernel Library . 2017. https:\/\/software.intel.com\/en-us\/intel-mkl, (last accessed Mar 2017 ). (2017). Intel Math Kernel Library. 2017. https:\/\/software.intel.com\/en-us\/intel-mkl, (last accessed Mar 2017). (2017)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/355841.355847"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1064978.1065034"},{"key":"e_1_3_2_1_18_1","unstructured":"Samuel Relton and Mawussi Zounon. 2017. Batched BLAS API and Memory Layouts. (2017). http:\/\/www.netlib.org\/utk\/people\/JackDongarra\/WEB-PAGES\/Batched-BLAS-2017\/talk02-relton.pdf last accessed Mar 2017.  Samuel Relton and Mawussi Zounon. 2017. Batched BLAS API and Memory Layouts. (2017). http:\/\/www.netlib.org\/utk\/people\/JackDongarra\/WEB-PAGES\/Batched-BLAS-2017\/talk02-relton.pdf last accessed Mar 2017."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2012.10.003"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1115\/1.4005896"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8191(97)00055-0"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1137\/1019071"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1137\/15M1040839"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2764454"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/2743134.2743258"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_27_1","volume-title":"Reproducible and Reduced Precision BLAS","author":"Batched Workshop","year":"2017","unstructured":"Workshop on Batched , Reproducible and Reduced Precision BLAS . 2017 . bit.ly\/ Batch-BLAS- 2017, (last accessed Mar 2017). (2017). Workshop on Batched, Reproducible and Reduced Precision BLAS. 2017. bit.ly\/ Batch-BLAS-2017, (last accessed Mar 2017). (2017)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.2514\/2.586"},{"key":"e_1_3_2_1_29_1","volume-title":"Introducing Batch GEMM Operations. (2017). https:\/\/software.intel.com\/en-us\/articles\/introducing-batch-gemm-operations last accessed","author":"Zhang Zhang","year":"2017","unstructured":"Zhang Zhang . 2017. Introducing Batch GEMM Operations. (2017). https:\/\/software.intel.com\/en-us\/articles\/introducing-batch-gemm-operations last accessed Mar 2017 . Zhang Zhang. 2017. Introducing Batch GEMM Operations. (2017). https:\/\/software.intel.com\/en-us\/articles\/introducing-batch-gemm-operations last accessed Mar 2017."}],"event":{"name":"SC '17: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver Colorado","acronym":"SC '17","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3126908.3126941","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3126908.3126941","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:11:08Z","timestamp":1750212668000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3126908.3126941"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,11,12]]},"references-count":29,"alternative-id":["10.1145\/3126908.3126941","10.1145\/3126908"],"URL":"https:\/\/doi.org\/10.1145\/3126908.3126941","relation":{},"subject":[],"published":{"date-parts":[[2017,11,12]]},"assertion":[{"value":"2017-11-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}