{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:40:02Z","timestamp":1740123602819,"version":"3.37.3"},"reference-count":24,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T00:00:00Z","timestamp":1710288000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T00:00:00Z","timestamp":1710288000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100014188","name":"Ministry of Science and ICT, South Korea","doi-asserted-by":"publisher","award":["RS-2023-00321688","RS-2023-00321688","RS-2023-00321688","RS-2023-00321688"],"award-info":[{"award-number":["RS-2023-00321688","RS-2023-00321688","RS-2023-00321688","RS-2023-00321688"]}],"id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s11227-024-06002-2","type":"journal-article","created":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T20:05:59Z","timestamp":1710360359000},"page":"13813-13836","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Revisiting the performance optimization of QR factorization on Intel KNL and SKL multiprocessors"],"prefix":"10.1007","volume":"80","author":[{"given":"Muhammad","family":"Rizwan","sequence":"first","affiliation":[]},{"given":"Enoch","family":"Jung","sequence":"additional","affiliation":[]},{"given":"Jongsun","family":"Choi","sequence":"additional","affiliation":[]},{"given":"Jaeyoung","family":"Choi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,13]]},"reference":[{"key":"6002_CR1","doi-asserted-by":"publisher","DOI":"10.1155\/1996\/483083","author":"J Choi","year":"1996","unstructured":"Choi J, Dongarra JJ, Ostrouchoy LS, Petitet AP, Whaley RC, Walker DW (1996) Design and Implementation of the ScaLAPACK LU, QR, and Cholesky factorization routines. Sci Program. https:\/\/doi.org\/10.1155\/1996\/483083","journal-title":"Sci Program"},{"key":"6002_CR2","doi-asserted-by":"publisher","unstructured":"Choi J, Dongarra JJ, Pozo R, Walker DW (1992) ScaLAPACK: ascalable linear algebra library for distributed memory concurrent computers. In: The Fourth Symposium on the Frontiers of Massively Parallel Computation. IEEE Computer Society. pp 120\u2013121. https:\/\/doi.org\/10.1109\/fmpc.1992.234898","DOI":"10.1109\/fmpc.1992.234898"},{"key":"6002_CR3","doi-asserted-by":"publisher","unstructured":"Nassif N, Erhel J, Philippe B (2015) Basic linear algebra subprograms\u2014BLAS. Introduction to computational linear algebra. https:\/\/doi.org\/10.1201\/b18662-7","DOI":"10.1201\/b18662-7"},{"key":"6002_CR4","doi-asserted-by":"publisher","unstructured":"Rizwan M, Jung E, Park Y, Choi J, Kim Y (2022) Optimization of matrix\u2013matrix multiplication algorithm for matrix\u2013panel multiplication on Intel KNL. In: 2022 IEEE\/ACS 19th International Conference on Computer Systems and Applications (AICCSA). IEEE. pp 1\u20137. https:\/\/doi.org\/10.1109\/AICCSA56895.2022.10017947","DOI":"10.1109\/AICCSA56895.2022.10017947"},{"key":"6002_CR5","doi-asserted-by":"publisher","unstructured":"Gunnels JA, Henry GM, van\u00a0de Geijn RA (2001) A family of high-performance matrix multiplication algorithms. In: Computational Science\u2014ICCS 2001. 2073. pp 51\u201360. https:\/\/doi.org\/10.1007\/3-540-45545-0_15","DOI":"10.1007\/3-540-45545-0_15"},{"issue":"1145\/1356052","key":"6002_CR6","first-page":"1356053","volume":"10","author":"K Goto","year":"2008","unstructured":"Goto K, Geijn RAVD (2008) Anatomy of high-performance matrix multiplication. ACM Trans Math Softw 10(1145\/1356052):1356053","journal-title":"ACM Trans Math Softw"},{"issue":"1145\/1377603","key":"6002_CR7","first-page":"1377607","volume":"10","author":"K Goto","year":"2008","unstructured":"Goto K, Geijn RVD (2008) High-performance implementation of the level-3 BLAS. ACM Trans Math Softw 10(1145\/1377603):1377607","journal-title":"ACM Trans Math Softw"},{"key":"6002_CR8","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-018-2810-y","author":"R Lim","year":"2018","unstructured":"Lim R, Lee Y, Kim R, Choi J (2018) An implementation of matrix\u2013matrix multiplication on the Intel KNL processor with AVX-512. Cluster Comput. https:\/\/doi.org\/10.1007\/s10586-018-2810-y","journal-title":"Cluster Comput"},{"key":"6002_CR9","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-018-2702-1","author":"R Lim","year":"2019","unstructured":"Lim R, Lee Y, Kim R, Choi J, Lee M (2019) Auto-tuning GEMM kernels on the Intel KNL and Intel Skylake-SP processors. J Supercomput. https:\/\/doi.org\/10.1007\/s11227-018-2702-1","journal-title":"J Supercomput"},{"key":"6002_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-021-03274-8","author":"Y Park","year":"2021","unstructured":"Park Y, Kim R, Nguyen TMT, Choi J (2021) Improving blocked matrix\u2013matrix multiplication routine by utilizing AVX-512 instructions on intel knights landing and Xeon scalable processors. Cluster Comput. https:\/\/doi.org\/10.1007\/s10586-021-03274-8","journal-title":"Cluster Comput"},{"key":"6002_CR11","unstructured":"Thi N, Tuyen M (2020). Thesis for the Degree of Master. Improving Performance of LU Factorization Routine on Intel KNL and Xeon Scalable Processors. Dissertation, Soongsil University, Korea"},{"key":"6002_CR12","unstructured":"Intel (2021) Intel oneAPI Math Kernel Library (oneMKL) Overview. https:\/\/www.intel.com\/content\/www\/us\/en\/docs\/onemkl\/developer-reference-c\/2023-0\/overview.html. Accessed 21 Dec 2022"},{"key":"6002_CR13","unstructured":"Xianyi Z, Qian W, Saar W (2023) OpenBLAS: an optimized BLAS library. https:\/\/www.openblas.net. Accessed 15 Apr 2023"},{"key":"6002_CR14","doi-asserted-by":"publisher","DOI":"10.1145\/2764454","author":"FGV Zee","year":"2015","unstructured":"Zee FGV, van de Geijn RA (2015) BLIS: a framework for rapidly instantiating BLAS functionality. ACM Trans Math Softw. https:\/\/doi.org\/10.1145\/2764454","journal-title":"ACM Trans Math Softw"},{"key":"6002_CR15","doi-asserted-by":"publisher","unstructured":"Anderson E, Bai Z, Dongarra J, Greenbaum A, McKenney A, Croz J\u00a0D, Hammarling S, Demmel J, Bischof C, Sorensen D (1990) LAPACK: A portable linear algebra library for high-performance computers. In: Supercomputing \u201990:Proceedings of the 1990 ACM\/IEEE Conference on Supercomputing. pp 2\u201311. https:\/\/doi.org\/10.1109\/superc.1990.129995","DOI":"10.1109\/superc.1990.129995"},{"key":"6002_CR16","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4330030610","author":"J Demmel","year":"1991","unstructured":"Demmel J (1991) LAPACK: a portable linear algebra library for high-performance computers. Concurr: Pract Exp. https:\/\/doi.org\/10.1002\/cpe.4330030610","journal-title":"Concurr: Pract Exp"},{"key":"6002_CR17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-0348-8534-8_21","volume-title":"The MPI message passing interface standard","author":"L Clarke","year":"1994","unstructured":"Clarke L, Glendinning I, Hempel R (1994) The MPI message passing interface standard. Birkh, Basel. https:\/\/doi.org\/10.1007\/978-3-0348-8534-8_21"},{"key":"6002_CR18","doi-asserted-by":"publisher","unstructured":"Anderson E, Dongarra J, Ostrouchov S, Benzoni A, Moulton S, Tourancheau B, Geijn RVD (1991) Basic linear Algebra communication subprograms. In: The Sixth Distributed Memory Computing Conference, 1991. Proceedings. pp 287\u2013290. https:\/\/doi.org\/10.1109\/DMCC.1991.633146","DOI":"10.1109\/DMCC.1991.633146"},{"key":"6002_CR19","doi-asserted-by":"publisher","unstructured":"Choi J, Dongarra J, Ostrouchov S, Petitet A, Walker D, Whaley RC (1996) A proposal for a set of parallel basic linear algebra subprograms. In: Applied Parallel Computing Computations in Physics, Chemistry and Engineering Science. Springer, Berlin. vol 1041, pp 107\u2013114. https:\/\/doi.org\/10.1007\/3-540-60902-4_13","DOI":"10.1007\/3-540-60902-4_13"},{"key":"6002_CR20","doi-asserted-by":"publisher","unstructured":"Smith TM, Geijn RVD, Smelyanskiy M, Hammond JR, Zee FG (2014) Anatomy of high-performance many-threaded matrix multiplication. In: 2014 IEEE 28th International Parallel and Distributed Processing Symposium, pp 1049\u20131059. https:\/\/doi.org\/10.1109\/IPDPS.2014.110","DOI":"10.1109\/IPDPS.2014.110"},{"key":"6002_CR21","unstructured":"KISTI (2018) National Supercomputing Center. https:\/\/www.ksc.re.kr\/eng\/resources\/nurion. Accessed 5 Aug 1 2023"},{"key":"6002_CR22","unstructured":"Cantalupo C, Venkatesan V, Hammond J, Czurlyo K, Hammond SD (2022) Memkind: an extensible heap memory manager for heterogeneous memory platforms and mixed memory policies. https:\/\/github.com\/memkind\/memkind. Accessed 20 Dec 2022"},{"key":"6002_CR23","unstructured":"Choi J, Dongarra JJ, Ostrouchoy LS, Petitet AP, Whaley RC, Walker DW (2022) ScaLAPACK\u2014Scalable Linear Algebra PACKage. https:\/\/www.netlib.org\/scalapack. Accessed 2 Dec 2022"},{"key":"6002_CR24","unstructured":"Zee FGV, van\u00a0de Geijn RA (2022) BLIS: BLAS-like Library Instantiation Software Framework. https:\/\/github.com\/flame\/blis. Accessed 28 Apr 2023"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06002-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-024-06002-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06002-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T11:16:05Z","timestamp":1718018165000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-024-06002-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,13]]},"references-count":24,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["6002"],"URL":"https:\/\/doi.org\/10.1007\/s11227-024-06002-2","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2024,3,13]]},"assertion":[{"value":"16 February 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}}]}}