{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,22]],"date-time":"2025-05-22T06:06:26Z","timestamp":1747893986227,"version":"3.37.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319920399"},{"type":"electronic","value":"9783319920405"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-92040-5_12","type":"book-chapter","created":{"date-parts":[[2018,5,28]],"date-time":"2018-05-28T07:55:05Z","timestamp":1527494105000},"page":"226-245","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["A Novel Multi-level Integrated Roofline Model Approach for Performance Characterization"],"prefix":"10.1007","author":[{"given":"Tuomas","family":"Koskela","sequence":"first","affiliation":[]},{"given":"Zakhar","family":"Matveev","sequence":"additional","affiliation":[]},{"given":"Charlene","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Adetokunbo","family":"Adedoyin","sequence":"additional","affiliation":[]},{"given":"Roman","family":"Belenov","sequence":"additional","affiliation":[]},{"given":"Philippe","family":"Thierry","sequence":"additional","affiliation":[]},{"given":"Zhengji","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Rahulkumar","family":"Gayatri","sequence":"additional","affiliation":[]},{"given":"Hongzhang","family":"Shan","sequence":"additional","affiliation":[]},{"given":"Leonid","family":"Oliker","sequence":"additional","affiliation":[]},{"given":"Jack","family":"Deslippe","sequence":"additional","affiliation":[]},{"given":"Ron","family":"Green","sequence":"additional","affiliation":[]},{"given":"Samuel","family":"Williams","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,5,29]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Williams, S., et al.: CACM 52(4), 65\u201376 (2009)","DOI":"10.1145\/1498765.1498785"},{"issue":"1","key":"12_CR2","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/L-CA.2012.8","volume":"12","author":"A Ilic","year":"2013","unstructured":"Ilic, A., et al.: IEEE Comput. Architect. Lett. 12(1), 21\u201324 (2013)","journal-title":"IEEE Comput. Architect. Lett."},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Marques, D., et al.: Performance analysis with cache-aware roofline model in intel advisor. In: 2017 International Conference on High Performance Computing & Simulation (HPCS), pp. 898\u2013907. IEEE, 17 July 2017","DOI":"10.1109\/HPCS.2017.150"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Doerfler, D., et al.: Applying the roofline performance model to the intel xeon phi knights landing processor. In: ISC Workshops (2016)","DOI":"10.1007\/978-3-319-46079-6_24"},{"key":"12_CR5","unstructured":"Intel Advisor Roofline. \nhttps:\/\/software.intel.com\/en-us\/articles\/intel-advisor-roofline"},{"key":"12_CR6","unstructured":"Intel(r) Advisor Roofline Analysis. CodeProject, February 2017 \nhttps:\/\/www.codeproject.com\/Articles\/1169323\/Intel-Advisor-Roofline-Analysis"},{"key":"12_CR7","unstructured":"How to use Intel Advisor Python. Intel Developer Zone, June 2017. \nhttps:\/\/software.intel.com\/en-us\/articles\/how-to-use-the-intel-advisor-python-api"},{"key":"12_CR8","unstructured":"Koskela, T., et al.: Performance tuning of scientific codes with the roofline model. Tutorial in SC 2017 (2017). \nhttp:\/\/bit.ly\/tut160\n\n, \nhttps:\/\/sc17.supercomputing.org\/full-program\/"},{"key":"12_CR9","unstructured":"Koskela, T., et al.: A practical approach to application performance tuning with the Roofline Model, Tutorial submitted to ISC 2018 (2018)"},{"key":"12_CR10","unstructured":"Classical molecular dynamics proxy application, Exascale Co-Design Center for Materials in Extreme Environments. \nexmatex.org\n\n, \nhttps:\/\/github.com\/ECP-copa\/CoMD.git"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Ku, S., et al.: Nuclear Fusion, vol. 49 no. 11, Article 115021 (2009)","DOI":"10.1088\/0029-5515\/49\/11\/115021"},{"key":"12_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1007\/978-3-319-67630-2_32","volume-title":"High Performance Computing","author":"T Koskela","year":"2017","unstructured":"Koskela, T., Deslippe, J.: Optimizing fusion PIC code performance at scale on cori phase two. In: Kunkel, J.M., Yokota, R., Taufer, M., Shalf, J. (eds.) ISC High Performance 2017. LNCS, vol. 10524, pp. 430\u2013440. Springer, Cham (2017). \nhttps:\/\/doi.org\/10.1007\/978-3-319-67630-2_32"},{"key":"12_CR13","unstructured":"https:\/\/software.intel.com\/en-us\/articles\/intel-xeon-processor-scalable-family-technical-overview"},{"issue":"1","key":"12_CR14","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/0927-0256(96)00008-0","volume":"6","author":"G. Kresse","year":"1996","unstructured":"Kresse, G., Furthm\u00fcller, J.: Efficiency of ab-initio total energy calculations for metals and semiconductors using a plane-wave basis set. Comput. Mat. Sci. 6, 15 (1996)","journal-title":"Computational Materials Science"},{"key":"12_CR15","unstructured":"http:\/\/www.vasp.at\/"},{"key":"12_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/978-3-319-65578-9_8","volume-title":"Scaling OpenMP for Exascale Performance and Portability","author":"F Wende","year":"2017","unstructured":"Wende, F., Marsman, M., Zhao, Z., Kim, J.: Porting VASP from MPI to MPI+OpenMP [SIMD]. In: de Supinski, B.R., Olivier, S.L., Terboven, C., Chapman, B.M., M\u00fcller, M.S. (eds.) IWOMP 2017. LNCS, vol. 10468, pp. 107\u2013122. Springer, Cham (2017). \nhttps:\/\/doi.org\/10.1007\/978-3-319-65578-9_8"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Shan, H., et al.: Parallel implementation and performance optimization of the configuration-interaction method. In: Supercomputing (SC) (2015)","DOI":"10.1145\/2807591.2807618"},{"issue":"5","key":"12_CR18","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1109\/MCSE.2017.3421558","volume":"19","author":"H Johansen","year":"2017","unstructured":"Johansen, H., et al.: Toward exascale earthquake ground motion simulations for near-fault engineering analysis. Comput. Sci. Eng. 19(5), 27 (2017)","journal-title":"Comput. Sci. Eng."},{"key":"12_CR19","unstructured":"Mohd-Yusof, J.: CoDesign Molecular Dynamics (CoMD) Proxy App, LA-UR-12-21782, Los Alamos National Lab (2012)"},{"key":"12_CR20","doi-asserted-by":"publisher","unstructured":"Cicotti, P., et al.: An evaluation of threaded models for a classical MD proxy application. In: 2014 Hardware-Software Co-Design for High Performance Computing, New Orleans, LA, pp. 41\u201348 (2014). \nhttps:\/\/doi.org\/10.1109\/Co-HPC.2014.6","DOI":"10.1109\/Co-HPC.2014.6"},{"key":"12_CR21","unstructured":"Adedoyin, A.: A Case Study on Software Modernizationusing CoMD - A Molecular Dynamics Proxy Application, LA-UR-17-22676, Los Alamos National Lab (2017)"},{"key":"12_CR22","unstructured":"Gunter, D., Adedoyin, A.: Kokkos Port of CoMD Mini-App, DOE COE Performance Portability Meeting (2017)"},{"issue":"17","key":"12_CR23","doi-asserted-by":"publisher","first-page":"2143","DOI":"10.1002\/cpe.1483","volume":"21","author":"TC Germann","year":"2009","unstructured":"Germann, T.C., et al.: 369 Tflop-s molecular dynamics simulations on the petaflop hybrid supercomputer \u2018Roadrunner\u2019. Concurrency Comput. Pract. Experience 21(17), 2143\u20132159 (2009)","journal-title":"Concurrency Comput. Pract. Experience"},{"key":"12_CR24","unstructured":"https:\/\/berkeleygw.org"},{"key":"12_CR25","unstructured":"https:\/\/github.com\/cyanguwa\/BerkeleyGW-GPP"},{"issue":"17","key":"12_CR26","doi-asserted-by":"publisher","first-page":"2573","DOI":"10.1088\/0953-8984\/15\/17\/312","volume":"15","author":"JA Soininen","year":"2003","unstructured":"Soininen, J.A., et al.: Electron self-energy calculation using a general multi-pole approximation. J. Phys. Condensed Matter 15(17), 2573 (2003)","journal-title":"J. Phys. Condensed Matter"},{"key":"12_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1007\/978-3-642-14390-8_64","volume-title":"Parallel Processing and Applied Mathematics","author":"J Treibig","year":"2010","unstructured":"Treibig, J., Hager, G.: Introducing a performance model for bandwidth-limited loop kernels. In: Wyrzykowski, R., Dongarra, J., Karczewski, K., Wasniewski, J. (eds.) PPAM 2009. LNCS, vol. 6067, pp. 615\u2013624. Springer, Heidelberg (2010). \nhttps:\/\/doi.org\/10.1007\/978-3-642-14390-8_64"},{"key":"12_CR28","unstructured":"http:\/\/icl.cs.utk.edu\/papi"},{"key":"12_CR29","unstructured":"https:\/\/github.com\/RRZE-HPC\/likwid"},{"issue":"7","key":"12_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/173284.155333","volume":"28","author":"David Culler","year":"1993","unstructured":"Culler, D., et al.: LogP: towards a realistic model of parallel computation. In: PPoPP (1993)","journal-title":"ACM SIGPLAN Notices"},{"issue":"1","key":"12_CR31","first-page":"71","volume":"44","author":"A Alexandrov","year":"1997","unstructured":"Alexandrov, A., et al.: LogGP: incorporating long messages into the LogP model. JPDC 44(1), 71\u201379 (1997)","journal-title":"JPDC"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Altaf, M.B., Wood, D.A.: LogCA: a performance model for hardware accelerators. In: ISCA (2017)","DOI":"10.1145\/3079856.3080216"},{"issue":"2","key":"12_CR33","first-page":"287","volume":"20","author":"S Shende","year":"2005","unstructured":"Shende, S., Malony, A.: The TAU parallel performance system. IJHPCA 20(2), 287\u2013311 (2005)","journal-title":"IJHPCA"},{"key":"12_CR34","unstructured":"Adhianto, L., et al.: HPCToolkit: performance measurement and analysis for supercomputers with node-level parallelism. In: Workshop on Node Level Parallelism for Large Scale Supercomputers (2008)"},{"key":"12_CR35","unstructured":"http:\/\/docs.cray.com"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-92040-5_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,5,28]],"date-time":"2018-05-28T08:00:11Z","timestamp":1527494411000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-92040-5_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319920399","9783319920405"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-92040-5_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}