{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T02:51:27Z","timestamp":1725936687305},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319729701"},{"type":"electronic","value":"9783319729718"}],"license":[{"start":{"date-parts":[[2017,12,23]],"date-time":"2017-12-23T00:00:00Z","timestamp":1513987200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-72971-8_12","type":"book-chapter","created":{"date-parts":[[2017,12,22]],"date-time":"2017-12-22T08:44:54Z","timestamp":1513932294000},"page":"236-249","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Performance and Energy Usage of Workloads on\u00a0KNL and Haswell Architectures"],"prefix":"10.1007","author":[{"given":"Tyler","family":"Allen","sequence":"first","affiliation":[]},{"given":"Christopher S.","family":"Daley","sequence":"additional","affiliation":[]},{"given":"Douglas","family":"Doerfler","sequence":"additional","affiliation":[]},{"given":"Brian","family":"Austin","sequence":"additional","affiliation":[]},{"given":"Nicholas J.","family":"Wright","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,12,23]]},"reference":[{"key":"12_CR1","unstructured":"DGEMM. http:\/\/www.nersc.gov\/research-and-development\/apex\/apex-benchmarks\/dgemm\/"},{"key":"12_CR2","unstructured":"GTC-P. http:\/\/www.nersc.gov\/research-and-development\/apex\/apex-benchmarks\/gtc-p\/"},{"key":"12_CR3","unstructured":"Intel Xeon Phi Processor 7250 16GB, 1.40 GHz, 68 core. https:\/\/ark.intel.com\/products\/94035\/Intel-Xeon-Phi-Processor-7250-16GB-1_40-GHz-68-core"},{"key":"12_CR4","unstructured":"Intel Xeon Processor E5\u20132698 v3 40M Cache, 2.30 GHz. https:\/\/ark.intel.com\/products\/81060\/Intel-Xeon-Processor-E5-2698-v3-40M-Cache-2_30-GHz"},{"key":"12_CR5","unstructured":"Intel Xeon Processor E7\u20134850 v4 40M Cache, 2.10 GHz. https:\/\/ark.intel.com\/products\/93806\/Intel-Xeon-Processor-E7-4850-v4-40M-Cache-2_10-GHz"},{"key":"12_CR6","unstructured":"STREAM: Sustainable Memory Bandwidth in High Performance Computers. https:\/\/www.cs.virginia.edu\/stream\/FTP\/Code\/"},{"key":"12_CR7","unstructured":"Agelastos, A.M., Rajan, M., Wichmann, N., Baker, R., Domino, S., Draeger, E.W., Anderson, S., Balma, J., Behling, S., Berry, M., Carrier, P., Davis, M., McMahon, K., Sandness, D., Thomas, K., Warren, S., Zhu, T.: Performance on Trinity phase 2 (a Cray XC40 utilizing Intel Xeon Phi processors) with acceptance applications and benchmarks. In: Cray User Group CUG, May 2017. https:\/\/cug.org\/proceedings\/cug2017_proceedings\/includes\/files\/pap138s2-file1.pdf"},{"key":"12_CR8","doi-asserted-by":"crossref","first-page":"1221","DOI":"10.1088\/0004-637X\/715\/2\/1221","volume":"715","author":"AS Almgren","year":"2010","unstructured":"Almgren, A.S., Beckner, V.E., Bell, J.B., Day, M.S., Howell, L.H., Joggerst, C.C., Lijewski, M.J., Nonaka, A., Singer, M., Zingale, M.: CASTRO: A new compressible astrophysical solver. I. hydrodynamics and self-gravity. Astrophys. J. 715, 1221\u20131238 (2010)","journal-title":"Astrophys. J."},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Almgren, A.S., Bell, J.B., Lijewski, M.J., Luki\u0107, Z., Andel, E.V.: Nyx: A massively parallel AMR code for computational cosmology. Astrophys. J. 765(1), 39 (2013). http:\/\/stacks.iop.org\/0004-637X\/765\/i=1\/a=39","DOI":"10.1088\/0004-637X\/765\/1\/39"},{"key":"12_CR10","unstructured":"APEX Benchmark Distribution and Run Rules. http:\/\/www.nersc.gov\/research-and-development\/apex\/apex-benchmarks\/"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Austin, B., Wright, N.J.: Measurement and interpretation of microbenchmark and application energy use on the Cray XC30. In: Proceedings of the 2nd International Workshop on Energy Efficient Supercomputing, pp. 51\u201359. IEEE Press (2014)","DOI":"10.1109\/E2SC.2014.7"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Barnes, T., Cook, B., Deslippe, J., Doerfler, D., Friesen, B., He, Y., Kurth, T., Koskela, T., Lobet, M., Malas, T., Oliker, L., Ovsyannikov, A., Sarje, A., Vay, J.L., Vincenti, H., Williams, S., Carrier, P., Wichmann, N., Wagner, M., Kent, P., Kerr, C., Dennis, J.: Evaluating and optimizing the NERSC workload on knights landing. In: 2016 7th International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS), pp. 43\u201353, November 2016","DOI":"10.1109\/PMBS.2016.010"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Bauer, B., Gottlieb, S., Hoefler, T.: Performance modeling and comparative analysis of the MILC Lattice QCD application su3_rmd. In: Proceedings CCGRID2012: IEEE\/ACM International Symposium on Cluster, Cloud, and Grid Computing (2012)","DOI":"10.1109\/CCGrid.2012.123"},{"issue":"1\/2","key":"12_CR14","doi-asserted-by":"crossref","first-page":"12:1","DOI":"10.1147\/JRD.2013.2238371","volume":"57","author":"S Coghlan","year":"2013","unstructured":"Coghlan, S., Kumaran, K., Loy, R.M., Messina, P., Morozov, V., Osborn, J.C., Parker, S., Riley, K.M., Romero, N.A., Williams, T.J.: Argonne applications for the IBM Blue Gene\/Q, Mira. IBM J. Res. Dev. 57(1\/2), 12:1\u201312:11 (2013)","journal-title":"IBM J. Res. Dev."},{"key":"12_CR15","unstructured":"LANL Trinity Supercomputer. http:\/\/www.lanl.gov\/projects\/trinity\/"},{"key":"12_CR16","unstructured":"NERSC Cori Supercomputer. https:\/\/www.nersc.gov\/systems\/cori\/"},{"key":"12_CR17","unstructured":"Cray XC Series Supercomputers. http:\/\/www.cray.com\/products\/computing\/xc-series"},{"key":"12_CR18","doi-asserted-by":"publisher","unstructured":"Evangelinos, C., Walkup, R.E., Sachdeva, V., Jordan, K.E., Gahvari, H., Chung, I.H., Perrone, M.P., Lu, L., Liu, L.K., Magerlein, K.: Determination of performance characteristics of scientific applications on IBM Blue Gene\/Q. IBM J. Res. Dev. 57(1), 99\u2013110 (2013). https:\/\/doi.org\/10.1147\/JRD.2012.2229901","DOI":"10.1147\/JRD.2012.2229901"},{"key":"12_CR19","unstructured":"The Opportunities and Challenges of Exascale Computing. https:\/\/science.energy.gov\/~\/media\/ascr\/ascac\/pdf\/reports\/Exascale_subcommittee_report.pdf"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Fuerlinger, K., Wright, N.J., Skinner, D.: Effective performance measurement at petascale using IPM. In: 2010 IEEE 16th International Conference on Parallel and Distributed Systems, pp. 373\u2013380, December 2010","DOI":"10.1109\/ICPADS.2010.16"},{"key":"12_CR21","doi-asserted-by":"publisher","unstructured":"F\u00fcrlinger, K., Wright, N.J., Skinner, D.: Performance analysis and workload characterization with IPM. In: M\u00fcller, M., Resch, M., Schulz, A., Nagel, W. (eds.) Tools for High Performance Computing 2009, pp. 31\u201338. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-11261-4_3","DOI":"10.1007\/978-3-642-11261-4_3"},{"key":"12_CR22","doi-asserted-by":"publisher","unstructured":"F\u00fcrlinger, K., Wright, N.J., Skinner, D., Klausecker, C., Kranzlm\u00fcller, D.: Effective holistic performance measurement at petascale using IPM. In: Bischof, C., Hegering, H.G., Nagel, W., Wittum, G. (eds.) Competence in High Performance Computing 2010, pp. 15\u201326. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-24025-6_2","DOI":"10.1007\/978-3-642-24025-6_2"},{"key":"12_CR23","unstructured":"Giannozzi, P., Baroni, S., Bonini, N., Calandra, M., Car, R., Cavazzoni, C., Ceresoli, D., Chiarotti, G.L., Cococcioni, M., Dabo, I., Dal Corso, A., de Gironcoli, S., Fabris, S., Fratesi, G., Gebauer, R., Gerstmann, U., Gougoussis, C., Kokalj, A., Lazzeri, M., Martin-Samos, L., Marzari, N., Mauri, F., Mazzarello, R., Paolini, S., Pasquarello, A., Paulatto, L., Sbraccia, C., Scandolo, S., Sclauzero, G., Seitsonen, A.P., Smogunov, A., Umari, P., Wentzcovitch, R.M.: QUANTUM ESPRESSO: a modular and open-source software project for quantum simulations of materials. J. Phys. Condens. Matter 21(39), 395502 (19pp) (2009). http:\/\/www.quantum-espresso.org"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Hackenberg, D., Oldenburg, R., Molka, D., Sch\u00f6ne, R.: Introducing FIRESTARTER: a processor stress test utility. In: 2013 International Green Computing Conference Proceedings, pp. 1\u20139, June 2013","DOI":"10.1109\/IGCC.2013.6604507"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"He, Y., Cook, B., Deslippe, J., Friesen, B., Gerber, R., Hartman-Baker, R., Koniges, A., Kurth, T., Leak, S., Yang, W.S., Zhao, Z.: Preparing NERSC users for Cori, a Cray XC40 system with Intel many integrated cores. In: Cray User Group CUG, May 2017. https:\/\/cug.org\/proceedings\/cug2017_proceedings\/includes\/files\/pap161s2-file1.pdf","DOI":"10.1002\/cpe.4291"},{"key":"12_CR26","unstructured":"Hill, P., Snyder, C., Sygulla, J.: KNL system software. In: Cray User Group CUG, May 2017. https:\/\/cug.org\/proceedings\/cug2017_proceedings\/includes\/files\/pap169s2-file1.pdf"},{"key":"12_CR27","volume-title":"Intel Xeon Phi Processor High Performance Programming: Knights","author":"J Jeffers","year":"2016","unstructured":"Jeffers, J., Reinders, J., Sodani, A.: Intel Xeon Phi Processor High Performance Programming: Knights, Landing edn. Morgan Kaufmann, Boston (2016)","edition":"Landing"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Lawson, G., Sundriyal, V., Sosonkina, M., Shen, Y.: Runtime power limiting of parallel applications on Intel Xeon Phi Processors. In: 2016 4th International Workshop on Energy Efficient Supercomputing (E2SC), pp. 39\u201345, November 2016","DOI":"10.1109\/E2SC.2016.011"},{"key":"12_CR29","unstructured":"Martin, S.J., Kappel, M.: Cray XC30 power monitoring and management. In: Cray User Group 2014 Proceedings (2014)"},{"key":"12_CR30","unstructured":"National Energy Research Scientific Computing Center. https:\/\/www.nersc.gov"},{"key":"12_CR31","unstructured":"Parker, S., Morozov, V., Chunduri, S., Harms, K., Knight, C., Kumaran, K.: Early evaluation of the Cray XC40 Xeon Phi System \u2018Theta\u2019 at Argonne. In: Cray User Group CUG, May 2017. https:\/\/cug.org\/proceedings\/cug2017_proceedings\/includes\/files\/pap113s2-file1.pdf"},{"key":"12_CR32","unstructured":"Patwary, M.M.A., Dubey, P., Byna, S., Satish, N.R., Sundaram, N., Luki\u0107, Z., Roytershteyn, V., Anderson, M.J., Yao, Y., Prabhat: BD-CATS: big data clustering at trillion particle scale. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis on - SC 2015, pp. 1\u201312. ACM Press, New York (2015). http:\/\/dl.acm.org\/citation.cfm?doid=2807591.2807616"},{"key":"12_CR33","unstructured":"Peng, I.B., Gioiosa, R., Kestor, G., Laure, E., Markidis, S.: Exploring the Performance Benefit of Hybrid Memory System on HPC Environments. CoRR abs\/1704.08273 (2017). http:\/\/arxiv.org\/abs\/1704.08273"},{"key":"12_CR34","doi-asserted-by":"crossref","unstructured":"Ramos, S., Hoefler, T.: Capability models for manycore memory systems: a case-study with Xeon Phi KNL. In: 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 297\u2013306, May 2017","DOI":"10.1109\/IPDPS.2017.30"},{"key":"12_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1007\/978-3-319-58667-0_22","volume-title":"High Performance Computing","author":"SI Roberts","year":"2017","unstructured":"Roberts, S.I., Wright, S.A., Fahmy, S.A., Jarvis, S.A.: Metrics for energy-aware software optimisation. In: Kunkel, J.M., Yokota, R., Balaji, P., Keyes, D. (eds.) ISC 2017. LNCS, vol. 10266, pp. 413\u2013430. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-58667-0_22"},{"key":"12_CR36","unstructured":"Rush, D., Martin, S.J., Kappel, M., Sandstedt, M., Williams, J.: Cray XC40 power monitoring and control for knights landing. In: Cray User Group CUG, May 2017. https:\/\/cug.org\/proceedings\/cug2016_proceedings\/includes\/files\/pap112s2-file1.pdf"},{"key":"12_CR37","doi-asserted-by":"publisher","unstructured":"Saini, S., Jin, H., Hood, R., Barker, D., Mehrotra, P., Biswas, R.: The impact of hyper-threading on processor resource utilization in production applications. In: Proceedings of the 2011 18th International Conference on High Performance Computing, pp. 1\u201310, HIPC 2011, IEEE Computer Society, Washington, DC, USA (2011). https:\/\/doi.org\/10.1109\/HiPC.2011.6152743","DOI":"10.1109\/HiPC.2011.6152743"},{"key":"12_CR38","doi-asserted-by":"crossref","unstructured":"Sodani, A.: Knights landing (KNL): 2nd generation Intel Xeon Phi Processor. In: Hot Chips 27, Flint Center, Cupertino, CA, August 23\u201325 2015. http:\/\/www.hotchips.org\/wp-content\/uploads\/hc_archives\/hc27\/HC27.25-Tuesday-Epub\/HC27.25.70-Processors-Epub\/HC27.25.710-Knights-Landing-Sodani-Intel.pdf","DOI":"10.1109\/HOTCHIPS.2015.7477467"},{"key":"12_CR39","unstructured":"ANL Theta Supercomputer. https:\/\/www.alcf.anl.gov\/theta"},{"key":"12_CR40","unstructured":"Wang, B., Ethier, S., Tang, W.M., Ibrahim, K.Z., Madduri, K., Williams, S., Oliker, L.: Modern Gyrokinetic Particle-In-Cell Simulation of Fusion Plasmas on Top Supercomputers. CoRR abs\/1510.05546 (2015). http:\/\/arxiv.org\/abs\/1510.05546"},{"key":"12_CR41","unstructured":"Zhao, Z., Wright, N.J., Antypas, K.: Effects of hyper-threading on the NERSC workload on Edison. In: Cray User Group CUG, May 2013. https:\/\/www.nersc.gov\/assets\/CUG13HTpaper.pdf"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-72971-8_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,8]],"date-time":"2019-10-08T13:30:59Z","timestamp":1570541459000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-72971-8_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,23]]},"ISBN":["9783319729701","9783319729718"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-72971-8_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017,12,23]]}}}