{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,29]],"date-time":"2025-10-29T06:16:31Z","timestamp":1761718591384,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319586663"},{"type":"electronic","value":"9783319586670"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-58667-0_16","type":"book-chapter","created":{"date-parts":[[2017,5,11]],"date-time":"2017-05-11T15:27:38Z","timestamp":1494516458000},"page":"294-314","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["An Analysis of Core- and Chip-Level Architectural Features in Four Generations of Intel Server Processors"],"prefix":"10.1007","author":[{"given":"Johannes","family":"Hofmann","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Georg","family":"Hager","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gerhard","family":"Wellein","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dietmar","family":"Fey","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,5,12]]},"reference":[{"issue":"4","key":"16_CR1","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1142\/S012962640800351X","volume":"18","author":"K Barker","year":"2008","unstructured":"Barker, K., Davis, K., Hoisie, A., Kerbyson, D.J., Lang, M., Pakin, S., Sancho, J.C.: A performance evaluation of the Nehalem quad-core processor for scientific computing. Parallel Proces. Lett. 18(4), 453\u2013469 (2008). http:\/\/dx.doi.org\/10.1142\/S012962640800351X","journal-title":"Parallel Proces. Lett."},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Gasc, T., Vuyst, F.D., Peybernes, M., Poncet, R., Motte, R.: Building a more efficient Lagrange-remap scheme thanks to performance modeling. In: Papadrakakis, M., et al. (ed.) Proceedings of the ECCOMAS Congress 2016, the VII European Congress on Computational Methods in Applied Sciences and Engineering, Crete Island, Greece, 5\u201310 June 2016. https:\/\/www.eccomas2016.org\/proceedings\/pdf\/12210.pdf","DOI":"10.7712\/100016.1879.12210"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Hackenberg, D., Oldenburg, R., Molka, D., Sch\u00f6ne, R.: Introducing FIRESTARTER: a processor stress test utility. In: 2013 International Green Computing Conference Proceedings. pp. 1\u20139, June 2013","DOI":"10.1109\/IGCC.2013.6604507"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"Hackenberg, D., Sch\u00f6ne, R., Ilsche, T., Molka, D., Schuchart, J., Geyer, R.: An energy efficiency feature survey of the Intel Haswell processor. In: 2015 IEEE International Parallel and Distributed Processing Symposium Workshop, pp. 896\u2013904, May 2015","DOI":"10.1109\/IPDPSW.2015.70"},{"key":"16_CR5","doi-asserted-by":"publisher","unstructured":"Hager, G., Treibig, J., Habich, J., Wellein, G.: Exploring performance and power properties of modern multicore chips via simple machine models. Concurr. Computat.: Pract. Exper. (2013). doi:10.1002\/cpe.3180","DOI":"10.1002\/cpe.3180"},{"issue":"3","key":"16_CR6","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1016\/0167-8191(89)90100-2","volume":"10","author":"RW Hockney","year":"1989","unstructured":"Hockney, R.W., Curington, I.J.: $$f_{1\/2}$$: a parameter to characterize memory and communication bottlenecks. Parallel Comput. 10(3), 277\u2013286 (1989)","journal-title":"Parallel Comput."},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Hofmann, J., Fey, D.: An ECM-based energy-efficiency optimization approach for bandwidth-limited streaming kernels on recent Intel Xeon processors. In: Proceedings of the 4th International Workshop on Energy Efficient Supercomputing, E2SC 2016, pp. 31\u201338. IEEE Press, Piscataway (2016). https:\/\/doi.org\/10.1109\/E2SC.2016.16","DOI":"10.1109\/E2SC.2016.010"},{"key":"16_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"210","DOI":"10.1007\/978-3-319-30695-7_16","volume-title":"Architecture of Computing Systems \u2013 ARCS 2016","author":"J Hofmann","year":"2016","unstructured":"Hofmann, J., Fey, D., Eitzinger, J., Hager, G., Wellein, G.: Analysis of Intel\u2019s Haswell microarchitecture using the ECM model and microbenchmarks. In: Hannig, F., Cardoso, J.M.P., Pionteck, T., Fey, D., Schr\u00f6der-Preikschat, W., Teich, J. (eds.) ARCS 2016. LNCS, vol. 9637, pp. 210\u2013222. Springer, Cham (2016). doi:10.1007\/978-3-319-30695-7_16"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"Hofmann, J., Fey, D., Riedmann, M., Eitzinger, J., Hager, G., Wellein, G.: Performance analysis of the Kahan-enhanced scalar product on current multi-core and many-core processors. Concurr. Comput.: Pract. Exp. (2016). http:\/\/dx.doi.org\/10.1002\/cpe.3921","DOI":"10.1002\/cpe.3921"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Hofmann, J., Treibig, J., Hager, G., Wellein, G.: Comparing the performance of different x86 SIMD instruction sets for a medical imaging application on modern multi- and manycore chips. In: Proceedings of the 2014 Workshop on Programming Models for SIMD\/Vector Processing, WPMVP 2014, pp. 57\u201364. ACM, New York (2014). http:\/\/doi.acm.org\/10.1145\/2568058.2568068","DOI":"10.1145\/2568058.2568068"},{"key":"16_CR11","unstructured":"Intel Corporation: Intel Xeon Processor E5-1600, E5-2400, and E5-2600 v3 Product Families - volume 2 of 2, Registers. http:\/\/www.intel.com\/content\/dam\/www\/public\/us\/en\/documents\/datasheets\/xeon-e5-v3-datasheet-vol-2.pdf"},{"key":"16_CR12","unstructured":"Intel Corporation: Intel Xeon Processor E5 v3 Product Family. http:\/\/www.intel.com\/content\/dam\/www\/public\/us\/en\/documents\/specification-updates\/xeon-e5-v3-spec-update.pdf"},{"key":"16_CR13","first-page":"19","volume":"19","author":"JD McCalpin","year":"1995","unstructured":"McCalpin, J.D.: Memory bandwidth and machine balance in current high performance computers. IEEE Comput. Soc. Tech. Comm. Comput. Archit. (TCCA) Newsl. 19, 19\u201325 (1995)","journal-title":"IEEE Comput. Soc. Tech. Comm. Comput. Archit. (TCCA) Newsl."},{"key":"16_CR14","unstructured":"Microway Inc.: Detailed specifications of the Intel Xeon E5-2600 v4 Broadwell-EP processors"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Molka, D., Hackenberg, D., Sch\u00f6ne, R., Nagel, W.E.: Cache coherence protocol and memory performance of the Intel Haswell-EP architecture. In: Proceedings of the 44th International Conference on Parallel Processing (ICPP 2015). IEEE (2015)","DOI":"10.1109\/ICPP.2015.83"},{"key":"16_CR16","unstructured":"Kottapalli, S., Geetha, V., Neefs, H.G., Choi, Y.: Patent US20130007376 A1: Opportunistic Snoop Broadcast (OSB) in directory enabled home snoopy systems. http:\/\/www.google.com\/patents\/US20130007376"},{"issue":"4","key":"16_CR17","first-page":"273","volume":"22","author":"R Sch\u00f6ne","year":"2014","unstructured":"Sch\u00f6ne, R., Treibig, J., Dolz, M.F., Guillen, C., Navarrete, C., Knobloch, M., Rountree, B.: Tools and methods for measuring and tuning the energy efficiency of HPC systems. Sci. Program. 22(4), 273\u2013283 (2014). http:\/\/dx.doi.org\/10.3233\/SPR-140393","journal-title":"Sci. Program."},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Stengel, H., Treibig, J., Hager, G., Wellein, G.: Quantifying performance bottlenecks of stencil computations using the Execution-Cache-Memory model. In: Proceedings of the 29th ACM International Conference on Supercomputing, ICS 2015. ACM, New York (2015). http:\/\/doi.acm.org\/10.1145\/2751205.2751240","DOI":"10.1145\/2751205.2751240"},{"issue":"2","key":"16_CR19","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1177\/1094342012442424","volume":"27","author":"J Treibig","year":"2013","unstructured":"Treibig, J., Hager, G., Hofmann, H.G., Hornegger, J., Wellein, G.: Pushing the limits for medical image reconstruction on recent standard multicore processors. Int. J. High Perform. Comput. Appl. 27(2), 162\u2013177 (2013). http:\/\/dx.doi.org\/10.1177\/1094342012442424","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"16_CR20","first-page":"27","volume-title":"Tools for High Performance Computing","author":"J Treibig","year":"2011","unstructured":"Treibig, J., Hager, G., Wellein, G.: likwid-bench: an extensible microbenchmarking platform for x86 multicore compute nodes. In: Brunst, H., M\u00fcller, M., Nagel, W., Resch, M. (eds.) Tools for High Performance Computing, pp. 27\u201336. Springer, Heidelberg (2011)"},{"key":"16_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1007\/978-3-319-20119-1_27","volume-title":"High Performance Computing","author":"T Wilde","year":"2015","unstructured":"Wilde, T., Auweter, A., Shoukourian, H., Bode, A.: Taking advantage of node power variation in homogenous HPC systems to save energy. In: Kunkel, J.M., Ludwig, T. (eds.) ISC High Performance 2015. LNCS, vol. 9137, pp. 376\u2013393. Springer, Cham (2015). doi:10.1007\/978-3-319-20119-1_27"},{"issue":"4","key":"16_CR22","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009). http:\/\/doi.acm.org\/10.1145\/1498765.1498785","journal-title":"Commun. ACM"},{"issue":"7","key":"16_CR23","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1002\/cpe.3489","volume":"28","author":"M Wittmann","year":"2016","unstructured":"Wittmann, M., Hager, G., Zeiser, T., Treibig, J., Wellein, G.: Chip-level and multi-node analysis of energy-optimized lattice Boltzmann CFD simulations. Concurr. Comput.: Pract. Exp. 28(7), 2295\u20132315 (2016). http:\/\/dx.doi.org\/10.1002\/cpe.3489","journal-title":"Concurr. Comput.: Pract. Exp."}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-58667-0_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,7]],"date-time":"2021-12-07T13:05:53Z","timestamp":1638882353000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-58667-0_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319586663","9783319586670"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-58667-0_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"12 May 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}