{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,25]],"date-time":"2025-07-25T10:13:57Z","timestamp":1753438437415},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319729701"},{"type":"electronic","value":"9783319729718"}],"license":[{"start":{"date-parts":[[2017,12,23]],"date-time":"2017-12-23T00:00:00Z","timestamp":1513987200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-72971-8_5","type":"book-chapter","created":{"date-parts":[[2017,12,22]],"date-time":"2017-12-22T08:44:54Z","timestamp":1513932294000},"page":"91-113","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Modeling Large Compute Nodes with\u00a0Heterogeneous Memories with\u00a0Cache-Aware Roofline Model"],"prefix":"10.1007","author":[{"given":"Nicolas","family":"Denoyelle","sequence":"first","affiliation":[]},{"given":"Brice","family":"Goglin","sequence":"additional","affiliation":[]},{"given":"Aleksandar","family":"Ilic","sequence":"additional","affiliation":[]},{"given":"Emmanuel","family":"Jeannot","sequence":"additional","affiliation":[]},{"given":"Leonel","family":"Sousa","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,12,23]]},"reference":[{"issue":"6","key":"5_CR1","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1109\/MSP.2009.934110","volume":"26","author":"G Blake","year":"2009","unstructured":"Blake, G., Dreslinski, R.G., Mudge, T.: A survey of multicore processors. IEEE Signal Process. Magaz. 26(6), 26\u201337 (2009)","journal-title":"IEEE Signal Process. Magaz."},{"key":"5_CR2","unstructured":"Blagodurov, S., Zhuravlev, S., Dashti, M., Fedorova, A.: A case for NUMA-aware contention management on multicore systems. In: 2011 USENIX Annual Technical Conference, Portland, OR, USA, 15\u201317 June 2011 (2011)"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Reinders, J., Jeffers, J., Sodani, A.: Intel Xeon Phi Processor High Performance Programming Knights Landing Edition (2016)","DOI":"10.1016\/B978-0-12-809194-4.00002-8"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Ziakas, D., Baum, A., Maddox, R.A., Safranek, R.J.: Intel\u00ae quickpath interconnect architectural features supporting scalable system architectures. In: 2010 IEEE 18th Annual Symposium on High Performance Interconnects (HOTI), pp. 1\u20136. IEEE (2010)","DOI":"10.1109\/HOTI.2010.24"},{"key":"5_CR5","unstructured":"Bull atos technologies: Bull coherent switch. http:\/\/support.bull.com\/ols\/product\/platforms\/hw-extremcomp\/hw-bullx-sup-node\/BCS\/index.htm"},{"issue":"1","key":"5_CR6","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1109\/L-CA.2013.6","volume":"13","author":"A Ilic","year":"2014","unstructured":"Ilic, A., Pratas, F., Sousa, L.: Cache-aware roofline model: upgrading the loft. IEEE Comput. Archit. Lett. 13(1), 21\u201324 (2014)","journal-title":"IEEE Comput. Archit. Lett."},{"issue":"4","key":"5_CR7","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009)","journal-title":"Commun. ACM"},{"key":"5_CR8","unstructured":"Cantalupo, C., Venkatesan, V., Hammond, J., Czurlyo, K., Hammond, S.D.: Memkind: an extensible heap memory manager for heterogeneous memory platforms and mixed memory policies. Technical report, Sandia National Laboratories (SNL-NM), Albuquerque, NM (United States) (2015)"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Broquedis, F., Clet-Ortega, J., Moreaud, S., Furmento, N., Goglin, B., Mercier, G., Thibault, S., Namyst, R.: hwloc: a generic framework for managing hardware affinities in HPC applications. In: The 18th Euromicro International Conference on Parallel, Distributed and Network-Based Computing (PDP 2010), Pisa, Italy. IEEE, February 2010","DOI":"10.1109\/PDP.2010.67"},{"key":"5_CR10","unstructured":"Kleen, A.: A NUMA API for LINUX. Novel Inc. (2005)"},{"key":"5_CR11","unstructured":"Lepers, B., Quema, V., Fedorova, A.: Thread and memory placement on NUMA systems: asymmetry matters. In: 2015 USENIX Annual Technical Conference (USENIX ATC 2015), Santa Clara, CA, pp. 277\u2013289. USENIX Association, July 2015"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Chou, C., Jaleel, A., Qureshi, M.K.: CAMEO: a two-level memory organization with capacity of main memory and flexibility of hardware-managed cache. In: Proceedings of the 47th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO-47), Washington, DC, USA, pp. 1\u201312. IEEE Computer Society (2014)","DOI":"10.1109\/MICRO.2014.63"},{"key":"5_CR13","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1177\/109434209100500306","volume":"5","author":"DH Bailey","year":"1991","unstructured":"Bailey, D.H., Barszcz, E., Barton, J.T., Browning, D.S., Carter, R.L., Fatoohi, R.A., Frederickson, P.O., Lasinski, T.A., Simon, H.D., Venkatakrishnan, V., Weeratunga, S.K.: The NAS parallel benchmarks. Int. J. Supercomput. Appl. 5, 63\u201373 (1991). Technical report","journal-title":"Int. J. Supercomput. Appl."},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Karlin, I., Keasler, J., Neely, R.: Lulesh 2.0 updates and changes. Technical report LLNL-TR-641973, August 2013","DOI":"10.2172\/1090032"},{"key":"5_CR15","unstructured":"Lepers, B., Qu\u00e9ma, V., Fedorova, A.: Thread and memory placement on NUMA systems: asymmetry matters. In: USENIX Annual Technical Conference, pp. 277\u2013289 (2015)"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Ramos, S., Hoefler, T.: Capability Models for Manycore Memory Systems: A Case-Study with Xeon Phi KNL (2017)","DOI":"10.1109\/IPDPS.2017.30"},{"key":"5_CR17","unstructured":"The Memkind Library. http:\/\/memkind.github.io\/memkind"},{"issue":"1","key":"5_CR18","doi-asserted-by":"crossref","first-page":"52","DOI":"10.1109\/TC.2016.2582151","volume":"66","author":"A Ilic","year":"2017","unstructured":"Ilic, A., Pratas, F., Sousa, L.: Beyond the roofline: cache-aware power and energy-efficiency modeling for multi-cores. IEEE Trans. Comput. 66(1), 52\u201358 (2017)","journal-title":"IEEE Trans. Comput."},{"key":"5_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1007\/978-3-319-46079-6_24","volume-title":"High Performance Computing","author":"D Doerfler","year":"2016","unstructured":"Doerfler, D., et al.: Applying the roofline performance model to the intel xeon phi knights landing processor. In: Taufer, M., Mohr, B., Kunkel, J.M. (eds.) ISC High Performance 2016. LNCS, vol. 9945, pp. 339\u2013353. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46079-6_24"},{"issue":"1","key":"5_CR20","first-page":"56","volume":"1","author":"OG Lorenzo","year":"2014","unstructured":"Lorenzo, O.G., Pena, T.F., Cabaleiro, J.C., Pichel, J.C., Rivera, F.F.: Using an extended roofline model to understand data and thread affinities on NUMA systems. Ann. Multicore GPU Program. 1(1), 56\u201367 (2014)","journal-title":"Ann. Multicore GPU Program."},{"key":"5_CR21","unstructured":"Hofmann, J., Eitzinger, J., Fey, D.: Execution-cache-memory performance model: introduction and validation. CoRR abs\/1509.03118 (2015)"},{"key":"5_CR22","unstructured":"Intel: Intel Advisor Roofline (2017)"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Marques, D., Duarte, H., Ilic, A., Sousa, L., Belenov, R., Thierry, P., Matveev, Z.A.: Performance analysis with cache-aware roofline model in intel advisor. In: 2017 International Conference on High Performance Computing Simulation (HPCS), pp. 898\u2013907, July 2017","DOI":"10.1109\/HPCS.2017.150"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-72971-8_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,8]],"date-time":"2019-10-08T13:29:48Z","timestamp":1570541388000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-72971-8_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,23]]},"ISBN":["9783319729701","9783319729718"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-72971-8_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017,12,23]]}}}