{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T05:07:26Z","timestamp":1769922446704,"version":"3.49.0"},"reference-count":14,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2018,6,1]],"date-time":"2018-06-01T00:00:00Z","timestamp":1527811200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Research Foundation (NRF) grant funded by the Korea government","award":["NRF-2015M3C4A7075662"],"award-info":[{"award-number":["NRF-2015M3C4A7075662"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1007\/s10586-018-2810-y","type":"journal-article","created":{"date-parts":[[2018,6,1]],"date-time":"2018-06-01T04:29:15Z","timestamp":1527827355000},"page":"1785-1795","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":28,"title":["An implementation of matrix\u2013matrix multiplication on the Intel KNL processor with AVX-512"],"prefix":"10.1007","volume":"21","author":[{"given":"Roktaek","family":"Lim","sequence":"first","affiliation":[]},{"given":"Yeongha","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Raehyun","family":"Kim","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7321-9682","authenticated-orcid":false,"given":"Jaeyoung","family":"Choi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,6,1]]},"reference":[{"key":"2810_CR1","doi-asserted-by":"crossref","unstructured":"Jeffers, J., Reinders, J., Sodani, A.: Intel Xeon Phi Processor High Performance Programming: Knights Landing Edition. Morgan Kaufmann (2016)","DOI":"10.1016\/B978-0-12-809194-4.00002-8"},{"key":"2810_CR2","doi-asserted-by":"crossref","unstructured":"Bilmes, J., Asanovic, K., Chin, C.W., Demmel, J.: Optimizing matrix multiply using PHiPAC: a portable, high-performance, ANSI C coding methodology. In: ACM International Conference on Supercomputing 25th Anniversary Volume, pp. 253\u2013260. ACM (2014)","DOI":"10.1145\/2591635.2667174"},{"issue":"3","key":"2810_CR3","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1145\/1356052.1356053","volume":"34","author":"K Goto","year":"2008","unstructured":"Goto, K., van de Geijn, R.A.: Anatomy of high-performance matrix multiplication. ACM Trans. Math. Softw. (TOMS) 34(3), 12 (2008)","journal-title":"ACM Trans. Math. Softw. (TOMS)"},{"key":"2810_CR4","doi-asserted-by":"crossref","unstructured":"Heinecke, A., Vaidyanathan, K., Smelyanskiy, M., Kobotov, A., Dubtsov, R., Henry, G., Shet, A.G., Chrysos, G., Dubey, P.: Design and implementation of the linpack benchmark for single and multi-node systems based on Intel\u00ae Xeon Phi Coprocessor. In: 2013 IEEE 27th International Symposium on Parallel & Distributed Processing (IPDPS), pp. 126\u2013137. IEEE (2013)","DOI":"10.1109\/IPDPS.2013.113"},{"key":"2810_CR5","unstructured":"Peyton, J.L.: Programming dense linear algebra kernels on vectorized architectures. Master\u2019s thesis, The University of Tennessee, Knoxville (2013)"},{"issue":"3","key":"2810_CR6","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1145\/2764454","volume":"41","author":"FG Zee Van","year":"2015","unstructured":"Van Zee, F.G., Van De Geijn, R.A.: BLIS: a framework for rapidly instantiating BLAS functionality. ACM Transactions on Mathematical Software (TOMS) 41(3), 14 (2015)","journal-title":"ACM Transactions on Mathematical Software (TOMS)"},{"key":"2810_CR7","doi-asserted-by":"crossref","unstructured":"Whaley, R.C., Dongarra, J.J.: Automatically tuned linear algebra software. In: Proceedings of the 1998 ACM\/IEEE conference on Supercomputing, pp. 1\u201327. IEEE Computer Society (1998)","DOI":"10.1109\/SC.1998.10004"},{"issue":"2","key":"2810_CR8","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1145\/2925987","volume":"43","author":"TM Low","year":"2016","unstructured":"Low, T.M., Igual, F.D., Smith, T.M., Quintana-Orti, E.S.: Analytical modeling is enough for high-performance blis. ACM Trans. Math. Softw. (TOMS) 43(2), 12 (2016)","journal-title":"ACM Trans. Math. Softw. (TOMS)"},{"issue":"1","key":"2810_CR9","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/S0167-8191(00)00087-9","volume":"27","author":"RC Whaley","year":"2001","unstructured":"Whaley, R.C., Petitet, A., Dongarra, J.J.: Automated empirical optimizations of software and the atlas project. Parallel Comput. 27(1), 3\u201335 (2001)","journal-title":"Parallel Comput."},{"key":"2810_CR10","doi-asserted-by":"crossref","unstructured":"Gunnels, J.A., Henry, G.M., Van De\u00a0Geijn, R.A.: A family of high-performance matrix multiplication algorithms. In: International Conference on Computational Science, pp. 51\u201360. Springer (2001)","DOI":"10.1007\/3-540-45545-0_15"},{"issue":"2","key":"2810_CR11","first-page":"101","volume":"35","author":"R. Clint Whaley","year":"2005","unstructured":"Whaley, R.C., Petitet, A.: Minimizing development and maintenance costs in supporting persistently optimized BLAS. Softw. Pract. Exp. 35(2), 101\u2013121 (2005)","journal-title":"Software: Practice and Experience"},{"issue":"1","key":"2810_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2133382.2133384","volume":"9","author":"Jaekyu Lee","year":"2012","unstructured":"Lee, J., Kim, H., Vuduc, R.: When prefetching works, when it doesn\u2019t, and why. Architecture and Code Optimization (TACO), vol. 9(2) (2012)","journal-title":"ACM Transactions on Architecture and Code Optimization"},{"key":"2810_CR13","doi-asserted-by":"crossref","unstructured":"Smith, T.M., Van De\u00a0Geijn, R.A., Smelyanskiy, M., Hammond, J.R., Van\u00a0Zee, F.G.: Anatomy of high-performance many-threaded matrix multiplication. In: 2014 IEEE 28th International Parallel and Distributed Processing Symposium, pp. 1049\u20131059. IEEE (2014)","DOI":"10.1109\/IPDPS.2014.110"},{"key":"2810_CR14","doi-asserted-by":"crossref","unstructured":"Marker, B., Van\u00a0Zee, F.G., Goto, K., Quintana-Ort\u00ed, G., Van De\u00a0Geijn, R.A.: Toward scalable matrix multiply on multithreaded architectures. In: European Conference on Parallel Processing, pp. 748\u2013757. Springer (2007)","DOI":"10.1007\/978-3-540-74466-5_79"}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10586-018-2810-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-018-2810-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-018-2810-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T02:53:45Z","timestamp":1559357625000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10586-018-2810-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6,1]]},"references-count":14,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["2810"],"URL":"https:\/\/doi.org\/10.1007\/s10586-018-2810-y","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"value":"1386-7857","type":"print"},{"value":"1573-7543","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,6,1]]},"assertion":[{"value":"18 October 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 February 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 May 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 June 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}