{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T06:03:17Z","timestamp":1774591397363,"version":"3.50.1"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319460789","type":"print"},{"value":"9783319460796","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46079-6_24","type":"book-chapter","created":{"date-parts":[[2016,10,5]],"date-time":"2016-10-05T12:01:40Z","timestamp":1475668900000},"page":"339-353","source":"Crossref","is-referenced-by-count":39,"title":["Applying the Roofline Performance Model to the Intel Xeon Phi Knights Landing Processor"],"prefix":"10.1007","author":[{"given":"Douglas","family":"Doerfler","sequence":"first","affiliation":[]},{"given":"Jack","family":"Deslippe","sequence":"additional","affiliation":[]},{"given":"Samuel","family":"Williams","sequence":"additional","affiliation":[]},{"given":"Leonid","family":"Oliker","sequence":"additional","affiliation":[]},{"given":"Brandon","family":"Cook","sequence":"additional","affiliation":[]},{"given":"Thorsten","family":"Kurth","sequence":"additional","affiliation":[]},{"given":"Mathieu","family":"Lobet","sequence":"additional","affiliation":[]},{"given":"Tareq","family":"Malas","sequence":"additional","affiliation":[]},{"given":"Jean-Luc","family":"Vay","sequence":"additional","affiliation":[]},{"given":"Henri","family":"Vincenti","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,10,6]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Aktulga, H.M., Buluc, A., Williams, S., Yang, C.: Optimizing sparse matrix-multiple vector multiplication for nuclear configuration interaction calculations. In: International Parallel and Distributed Processing Symposium (IPDPS 2014), May 2014","DOI":"10.1109\/IPDPS.2014.125"},{"issue":"16","key":"24_CR2","doi-asserted-by":"publisher","first-page":"2631","DOI":"10.1002\/cpe.3129","volume":"26","author":"HM Aktulga","year":"2014","unstructured":"Aktulga, H.M., Yang, C., Ng, E.G., Maris, P., Vary, J.P.: Improving the scalability of a symmetric iterative eigensolver for multi-core platforms. Concurrency Comput. Pract. Exp. 26(16), 2631\u20132651 (2014). doi: 10.1002\/cpe.3129","journal-title":"Concurrency Comput. Pract. Exp."},{"issue":"6","key":"24_CR3","doi-asserted-by":"crossref","first-page":"1768","DOI":"10.1145\/197320.197366","volume":"16","author":"S Carr","year":"1994","unstructured":"Carr, S., Kennedy, K.: Improving the ratio of memory operations to floating-point operations in loops. ACM Trans. Program. Lang. Syst. 16(6), 1768\u20131810 (1994). http:\/\/doi.acm.org\/10.1145\/197320.197366","journal-title":"ACM Trans. Program. Lang. Syst."},{"key":"24_CR4","series-title":"Series in Plasma Physics","volume-title":"Plasma Physics Via Computer Simulation","author":"CK Birdsall","year":"2005","unstructured":"Birdsall, C.K., Langdon, A.B.: Plasma Physics Via Computer Simulation. Series in Plasma Physics. CRC Press, Boca Raton (2005)"},{"key":"24_CR5","unstructured":"Cray xc series supercomputers. http:\/\/www.cray.com\/products\/computing\/xc-series"},{"issue":"6","key":"24_CR6","doi-asserted-by":"crossref","first-page":"1269","DOI":"10.1016\/j.cpc.2011.12.006","volume":"183","author":"J Deslippe","year":"2012","unstructured":"Deslippe, J., Samsonidze, G., Strubbe, D.A., Jain, M., Cohen, M.L., Louie, S.G.: Berkeleygw: a massively parallel computer package for the calculation of the quasiparticle and optical properties of materials and nanostructures. Comput. Phys. Commun. 183(6), 1269\u20131289 (2012). http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0010465511003912","journal-title":"Comput. Phys. Commun."},{"key":"24_CR7","unstructured":"Doerfler, D.: Understanding application data movement characteristics using intel vtune amplifier and software development emulator tools. In: IXPUG 2015, Berkeley, CA, September 28 - October 2 2015"},{"issue":"12","key":"24_CR8","doi-asserted-by":"crossref","first-page":"1612","DOI":"10.1109\/12.40842","volume":"38","author":"MD Hill","year":"1989","unstructured":"Hill, M.D., Smith, A.J.: Evaluating associativity in CPU caches. IEEE Trans. Comput. 38(12), 1612\u20131630 (1989)","journal-title":"IEEE Trans. Comput."},{"key":"24_CR9","unstructured":"Lawrence Berkeley National Laboratory.: Warp website. http:\/\/warp.lbl.gov"},{"key":"24_CR10","unstructured":"Ligocki, T.: Roofline toolkit. https:\/\/bitbucket.org\/berkeleylab\/cs-roofline-toolkit"},{"key":"24_CR11","unstructured":"Malas, T., Kurth, T., Deslippe, J.: Optimization of the sparse matrix-vector products of an idr krylov iterative solver for the intel knl manycore processor (in preparation)"},{"issue":"1","key":"24_CR12","doi-asserted-by":"crossref","first-page":"012019","DOI":"10.1088\/1742-6596\/403\/1\/012019","volume":"403","author":"P Maris","year":"2012","unstructured":"Maris, P., Aktulga, H.M., Caprio, M.A., \u00c7ataly\u00fcrek, \u00dc.V., Ng, E.G., Oryspayev, D., Potter, H., Saule, E., Sosonkina, M., Vary, J.P., Yang, C., Zhou, Z.: Large-scale ab initio configuration interaction calculations for light nuclei. J. Phys. Conf. Ser. 403(1), 012019 (2012). http:\/\/stacks.iop.org\/1742-6596\/403\/i=1\/a=012019","journal-title":"J. Phys. Conf. Ser."},{"key":"24_CR13","unstructured":"NERSC: Cori. https:\/\/www.nersc.gov\/systems\/cori\/"},{"key":"24_CR14","unstructured":"NERSC: Measuring arithmetic intensity. https:\/\/www.nersc.gov\/users\/application-performance\/measuring-arithmetic-intensity"},{"key":"24_CR15","unstructured":"Nesap. http:\/\/www.nersc.gov\/users\/computational-systems\/cori\/nesap\/"},{"issue":"4","key":"24_CR16","doi-asserted-by":"publisher","first-page":"T137","DOI":"10.1190\/geo2011-0238.1","volume":"77","author":"PV Petrov","year":"2012","unstructured":"Petrov, P.V., Newman, G.A.: 3d finite-difference modeling of elasticwave propagation in the laplace-fourier domain. Geophysics 77(4), T137\u2013T155 (2012). doi: 10.1190\/geo2011-0238.1","journal-title":"Geophysics"},{"key":"24_CR17","unstructured":"Raman, K.: Calculating \u201cflop\u201d using intel software developmentemulator (intelsde) (March 2015). https:\/\/software.intel.com\/en-us\/articles\/calculating-flop-using-intel-software-development-emulator-intel-sde"},{"key":"24_CR18","doi-asserted-by":"crossref","unstructured":"Sodani, A.: Knights landing (knl): 2nd generation intel xeon phiprocessor. In: Hot Chips 27. Flint Center, Cupertino, August 23rd-25th 2015. http:\/\/www.hotchips.org\/wp-content\/uploads\/hc_archives\/hc27\/HC27.25-Tuesday-Epub\/HC27.25.70-Processors-Epub\/HC27.25.710-Knights-Landing-Sodani-Intel.pdf","DOI":"10.1109\/HOTCHIPS.2015.7477467"},{"key":"24_CR19","unstructured":"Tal, A.: Intel software development emulator. https:\/\/software.intel.com\/en-us\/articles\/intel-software-development-emulator"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Vincenti, H., Lehe, R., Sasanka, R., Vay, J.: An efficient and portable SIMD algorithm for charge\/current deposition in Particle-In-Cell codes. ArXiv e-prints, January 2016","DOI":"10.1016\/j.cpc.2016.08.023"},{"key":"24_CR21","unstructured":"Williams, S.: Auto-tuning Performance on Multicore Computers. Ph.D. thesis, EECS Department, University of California, Berkeley, December 2008"},{"issue":"4","key":"24_CR22","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Watterman, A., Patterson, D.: Roofline: an insightful visual performance model for floating-point programs and multicore architectures. Commun. ACM 52(4), 65\u201376 (2009)","journal-title":"Commun. ACM"},{"key":"24_CR23","unstructured":"Williams, S., Stralen, B.V., Ligocki, T., Oliker, L., Cordery, M., Lo, L.: Roofline performance model. http:\/\/crd.lbl.gov\/departments\/computer-science\/PAR\/research\/roofline\/"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46079-6_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,9]],"date-time":"2022-07-09T16:06:58Z","timestamp":1657382818000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46079-6_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319460789","9783319460796"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46079-6_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016]]}}}