{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T07:35:33Z","timestamp":1725867333799},"publisher-location":"Cham","reference-count":45,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319460789"},{"type":"electronic","value":"9783319460796"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46079-6_15","type":"book-chapter","created":{"date-parts":[[2016,10,5]],"date-time":"2016-10-05T12:01:40Z","timestamp":1475668900000},"page":"207-227","source":"Crossref","is-referenced-by-count":0,"title":["Exploring Energy Efficiency for GPU-Accelerated POWER Servers"],"prefix":"10.1007","author":[{"given":"Thorsten","family":"Hater","sequence":"first","affiliation":[]},{"given":"Benedikt","family":"Anlauf","sequence":"additional","affiliation":[]},{"given":"Paul","family":"Baumeister","sequence":"additional","affiliation":[]},{"given":"Markus","family":"B\u00fchler","sequence":"additional","affiliation":[]},{"given":"Jiri","family":"Kraus","sequence":"additional","affiliation":[]},{"given":"Dirk","family":"Pleiter","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,10,6]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1016\/j.softx.2015.06.001","volume":"1","author":"MJ Abraham","year":"2015","unstructured":"Abraham, M.J., Murtola, T., Schulz, R., P\u00e1ll, S., Smith, J.C., Hess, B., Lindahl, E.: GROMACS: high performance molecular simulations through multi-level parallelism from laptops to supercomputers. SoftwareX 1, 19\u201325 (2015)","journal-title":"SoftwareX"},{"issue":"2","key":"15_CR2","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/s00450-012-0227-z","volume":"29","author":"P Alonso","year":"2012","unstructured":"Alonso, P., Dolz, M.F., Mayo, R., Quintana-Ort\u00ed, E.S.: Modeling power and energy of the task-parallel Cholesky factorization on multicore processors. Comput. Sci. Res. Dev. 29(2), 105\u2013112 (2012). doi: 10.1007\/s00450-012-0227-z","journal-title":"Comput. Sci. Res. Dev."},{"doi-asserted-by":"crossref","unstructured":"Baumeister, P.F., Hater, T., Kraus, J., Pleiter, D., Wahl, P.: A performance model for GPU-accelerated FDTD applications. In: 2015 IEEE 22nd International Conference on High Performance Computing (HiPC), pp. 185\u2013193, December 2015","key":"15_CR3","DOI":"10.1109\/HiPC.2015.24"},{"issue":"1468","key":"15_CR4","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1098\/rspa.1967.0230","volume":"302","author":"J Beeby","year":"1967","unstructured":"Beeby, J.: The density of electrons in a perfect or imperfect lattice. Proc. R. Soc. Lond. A Math. Phys. Eng. Sci. 302(1468), 113\u2013136 (1967). The Royal Society","journal-title":"Proc. R. Soc. Lond. A Math. Phys. Eng. Sci."},{"key":"15_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1007\/11602569_41","volume-title":"High Performance Computing \u2013 HiPC 2005","author":"G Bilardi","year":"2005","unstructured":"Bilardi, G., Pietracaprina, A., Pucci, G., Schifano, F., Tripiccione, R.: The potential of on-chip multiprocessing for QCD machines. In: Bader, D.A., Parashar, M., Sridhar, V., Prasanna, V.K. (eds.) HiPC 2005. LNCS, vol. 3769, pp. 386\u2013397. Springer, Heidelberg (2005). doi: 10.1007\/11602569_41"},{"unstructured":"Bui, V., Norris, B., Huck, K., McInnes, L.C., Li, L., Hernandez, O., Chapman, B.: A component infrastructure for performance and power modeling of parallel scientific applications. In: Proceedings of the 2008 compFrame\/HPC-GECO Workshop on Component Based High Performance, CBHPC 2008, pp. 6:1\u20136:11. (2008). http:\/\/doi.acm.org\/10.1145\/1456190.1456199","key":"15_CR6"},{"doi-asserted-by":"crossref","unstructured":"Cabrera, A., Almeida, F., Blanco, V., Gim\u00e9nez, D.: Analytical modeling of the energy consumption for the High Performance Linpack. In: 2013 21st Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP), pp. 343\u2013350, February 2013","key":"15_CR7","DOI":"10.1109\/PDP.2013.56"},{"unstructured":"Caldeira, A.B., et al.: IBM Power System S824L technical overview and introduction (2014). http:\/\/www.redbooks.ibm.com\/Redbooks.nsf\/RedbookAbstracts\/redp5139.html","key":"15_CR8"},{"doi-asserted-by":"crossref","unstructured":"David, H., Gorbatov, E., Hanebutte, U.R., Khanna, R., Le, C.: RAPL: memory power estimation and capping. In: 2010 ACM\/IEEE International Symposium on Low-Power Electronics and Design (ISLPED), pp. 189\u2013194, August 2010","key":"15_CR9","DOI":"10.1145\/1840845.1840883"},{"unstructured":"Demmel, J., Gearhart, A.: Instrumenting linear algebra energy consumption via on-chip energy counters. Technical report, UCB\/EECS-2012-168, EECS Department, University of California, Berkeley, June 2012. http:\/\/www.eecs.berkeley.edu\/Pubs\/TechRpts\/2012\/EECS-2012-168.html","key":"15_CR10"},{"doi-asserted-by":"crossref","unstructured":"Dongarra, J., Ltaief, H., Luszczek, P., Weaver, V.M.: Energy footprint of advanced dense numerical linear algebra using tile algorithms on multicore architecture. In: The 2nd International Conference on Cloud and Green Computing, November 2012","key":"15_CR11","DOI":"10.1109\/CGC.2012.113"},{"issue":"11","key":"15_CR12","doi-asserted-by":"crossref","first-page":"1576","DOI":"10.1109\/TC.2010.65","volume":"59","author":"S Eyerman","year":"2010","unstructured":"Eyerman, S., Eeckhout, L.: A counter architecture for online DVFS profitability estimation. IEEE Trans. Comput. 59(11), 1576\u20131583 (2010)","journal-title":"IEEE Trans. Comput."},{"unstructured":"Feng, W.C., et al.: Green500 list, November 2015. http:\/\/green500.org","key":"15_CR13"},{"unstructured":"Flinn, J., Satyanarayanan, M.: PowerScope: a tool for profiling the energy usage of mobile applications. In: Proceedings of the Second IEEE Workshop on Mobile Computer Systems and Applications, WMCSA 1999, p. 2 (1999). http:\/\/dl.acm.org\/citation.cfm?id=520551.837522","key":"15_CR14"},{"issue":"2","key":"15_CR15","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1109\/MM.2011.29","volume":"31","author":"M Floyd","year":"2011","unstructured":"Floyd, M., et al.: Introducing the adaptive energy management features of the POWER7 chip. IEEE Micro 31(2), 60\u201375 (2011)","journal-title":"IEEE Micro"},{"issue":"1","key":"15_CR16","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1007\/BF01385726","volume":"60","author":"RW Freund","year":"1991","unstructured":"Freund, R.W., Nachtigal, N.: QMR: a quasi-minimal residual method for non-Hermitian linear systems. Numer. Math. 60(1), 315\u2013339 (1991)","journal-title":"Numer. Math."},{"doi-asserted-by":"crossref","unstructured":"Friedrich, J., Le, H., Starke, W., Stuechli, J., Sinharoy, B., Fluhr, E., Dreps, D., Zyuban, V., Still, G., Gonzalez, C., Hogenmiller, D., Malgioglio, F., Nett, R., Puri, R., Restle, P., Shan, D., Deniz, Z., Wendel, D., Ziegler, M., Victor, D.: The POWER8 $$^{\\rm {TM}}$$ processor: designed for big data, analytics, and cloud environments. In: 2014 IEEE International Conference on IC Design Technology (ICICDT), pp. 1\u20134, May 2014","key":"15_CR17","DOI":"10.1109\/ICICDT.2014.6838618"},{"issue":"5","key":"15_CR18","doi-asserted-by":"crossref","first-page":"658","DOI":"10.1109\/TPDS.2009.76","volume":"21","author":"R Ge","year":"2010","unstructured":"Ge, R., Feng, X., Song, S., Chang, H.C., Li, D., Cameron, K.W.: PowerPack: energy profiling and analysis of high-performance systems and applications. IEEE Trans. Parallel Distrib. Syst. 21(5), 658\u2013671 (2010)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"doi-asserted-by":"crossref","unstructured":"Ghosh, S., Chandrasekaran, S., Chapman, B.: Statistical modeling of power\/energy of scientific kernels on a multi-GPU system. In: 2013 International Green Computing Conference (IGCC), pp. 1\u20136, June 2013","key":"15_CR19","DOI":"10.1109\/IGCC.2013.6604488"},{"doi-asserted-by":"crossref","unstructured":"Hackenberg, D., Ilsche, T., Schuchart, J., Sch\u00f6ne, R., Nagel, W.E., Simon, M., Georgiou, Y.: HDEEM: high definition energy efficiency monitoring. In: Energy Efficient Supercomputing Workshop, E2SC 2014, pp. 1\u201310, November 2014","key":"15_CR20","DOI":"10.1109\/E2SC.2014.13"},{"doi-asserted-by":"crossref","unstructured":"Isci, C., Martonosi, M.: Runtime power monitoring in high-end processors: methodology and empirical data. In: 2003 Proceedings of the 36th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO-36, pp. 93\u2013104, December 2003","key":"15_CR21","DOI":"10.1109\/MICRO.2003.1253186"},{"issue":"2018","key":"15_CR22","doi-asserted-by":"crossref","first-page":"20130278","DOI":"10.1098\/rsta.2013.0278","volume":"372","author":"P Klav\u00edk","year":"2014","unstructured":"Klav\u00edk, P., Malossi, A.C.I., Bekas, C., Curioni, A.: Changing computing paradigms towards power efficiency. Philos. Trans. R. Soc. Lond. A: Math. Phys. Eng. Sci. 372(2018), 20130278 (2014)","journal-title":"Philos. Trans. R. Soc. Lond. A: Math. Phys. Eng. Sci."},{"issue":"3","key":"15_CR23","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s00450-013-0245-5","volume":"29","author":"M Knobloch","year":"2013","unstructured":"Knobloch, M., Foszczynski, M., Homberg, W., Pleiter, D., B\u00f6ttiger, H.: Mapping fine-grained power measurements to HPC application runtime characteristics on IBM POWER7. Comput. Sci. Res. Dev. 29(3), 211\u2013219 (2013). doi: 10.1007\/s00450-013-0245-5","journal-title":"Comput. Sci. Res. Dev."},{"key":"15_CR24","doi-asserted-by":"crossref","first-page":"1111","DOI":"10.1103\/PhysRev.94.1111","volume":"94","author":"W Kohn","year":"1954","unstructured":"Kohn, W., Rostoker, N.: Solution of the Schr\u00f6dinger equation in periodic lattices with an application to metallic Lithium. Phys. Rev. 94, 1111\u20131120 (1954)","journal-title":"Phys. Rev."},{"issue":"6","key":"15_CR25","doi-asserted-by":"crossref","first-page":"392","DOI":"10.1016\/0031-8914(47)90013-X","volume":"13","author":"J Korringa","year":"1947","unstructured":"Korringa, J.: On the calculation of the energy of a Bloch wave in a metal. Physica 13(6), 392\u2013400 (1947)","journal-title":"Physica"},{"unstructured":"Kraus, J.: Increase performance with GPU boost and K80 autoboost (2014). https:\/\/devblogs.nvidia.com\/parallelforall\/increase-performance-gpu-boost-k80-autoboost\/","key":"15_CR26"},{"key":"15_CR27","doi-asserted-by":"crossref","first-page":"441","DOI":"10.2528\/PIER11042002","volume":"116","author":"KH Lee","year":"2011","unstructured":"Lee, K.H., Ahmed, I., Goh, R.S., Khoo, E.H., Li, E.P., Hung, T.G.: Implementation of the FDTD method based on Lorentz-Drude dispersive model on GPU for plasmonics applications. Prog. Electromagnet. Res. 116, 441\u2013456 (2011)","journal-title":"Prog. Electromagnet. Res."},{"doi-asserted-by":"crossref","unstructured":"Lefurgy, C., Wang, X., Ware, M.: Server-level power control. In: 2007 Fourth International Conference on Autonomic Computing, ICAC 2007, p. 4, June 2007","key":"15_CR28","DOI":"10.1109\/ICAC.2007.35"},{"unstructured":"Lindahl, E.: Molecular simulation with GROMACS on CUDA GPUs (2013). http:\/\/on-demand.gputechconf.com\/gtc\/2013\/webinar\/gromacs-kepler-gpus-gtc-express-webinar.pdf","key":"15_CR29"},{"unstructured":"Pleiter, D.: Parallel computer architectures. In: 45th IFF Spring School 2014 \u201cComputing Solids Models, ab-initio methods and supercomputing\u201d. Schriften des Forschungszentrums J\u00fclich, Reihe Schl\u00fcsseltechnologien, vol. 74 (2014)","key":"15_CR30"},{"doi-asserted-by":"crossref","unstructured":"Rountree, B., Lowenthal, D.K., Schulz, M., de Supinski, B.R.: Practical performance prediction under dynamic voltage frequency scaling. In: 2011 International Green Computing Conference and Workshops (IGCC), pp. 1\u20138, July 2011","key":"15_CR31","DOI":"10.1109\/IGCC.2011.6008553"},{"unstructured":"Ryffel, S.: LEA $$^2$$ P: the Linux energy attribution and accounting platform. Master\u2019s thesis, Swiss Federal Institute of Technology (ETH) (2009). http:\/\/ftp.tik.ee.ethz.ch\/pub\/students\/2009-FS\/MA-2009-04.pdf","key":"15_CR32"},{"key":"15_CR33","doi-asserted-by":"crossref","first-page":"55","DOI":"10.2528\/PIER12010505","volume":"125","author":"A Shahmansouri","year":"2012","unstructured":"Shahmansouri, A., Rashidian, B.: GPU implementation of split-field finite-difference time-domain method for Drude-Lorentz dispersive media. Prog. Electromagnet. Res. 125, 55\u201377 (2012)","journal-title":"Prog. Electromagnet. Res."},{"doi-asserted-by":"crossref","unstructured":"Song, S., Su, C., Rountree, B., Cameron, K.W.: A simplified and accurate model of power-performance efficiency on emergent GPU architectures. In: 2013 IEEE 27th International Symposium on Parallel Distributed Processing (IPDPS), pp. 673\u2013686, May 2013","key":"15_CR34","DOI":"10.1109\/IPDPS.2013.73"},{"unstructured":"Song, S.L., Barker, K., Kerbyson, D.: Unified performance and power modeling of scientific workloads. In: Proceedings of the 1st International Workshop on Energy Efficient Supercomputing, E2SC 2013, pp. 4:1\u20134:8. (2013). http:\/\/doi.acm.org\/10.1145\/2536430.2536435","key":"15_CR35"},{"doi-asserted-by":"crossref","unstructured":"Subramaniam, B., Feng, W.C.: Statistical power and performance modeling for optimizing the energy efficiency of scientific computing. In: Green Computing and Communications (GreenCom), pp. 139\u2013146, December 2010","key":"15_CR36","DOI":"10.1109\/GreenCom-CPSCom.2010.138"},{"key":"15_CR37","volume-title":"Others: Computational Electrodynamics: The Finite-Difference Time-Domain Method","author":"A Taflove","year":"1995","unstructured":"Taflove, A., Hagness, S.C.: Others: Computational Electrodynamics: The Finite-Difference Time-Domain Method. Artech House, Norwood (1995)"},{"issue":"10","key":"15_CR38","doi-asserted-by":"crossref","first-page":"559","DOI":"10.1016\/j.parco.2014.09.001","volume":"40","author":"L Tan","year":"2014","unstructured":"Tan, L., Kothapalli, S., Chen, L., Hussaini, O., Bissiri, R., Chen, Z.: A survey of power and energy efficient techniques for high performance numerical linear algebra operations. Parallel Comput. 40(10), 559\u2013573 (2014)","journal-title":"Parallel Comput."},{"key":"15_CR39","doi-asserted-by":"crossref","first-page":"235103","DOI":"10.1103\/PhysRevB.85.235103","volume":"85","author":"A Thiess","year":"2012","unstructured":"Thiess, A., et al.: Massively parallel density functional calculations for thousands of atoms: KKRnano. Phys. Rev. B 85, 235103 (2012)","journal-title":"Phys. Rev. B"},{"doi-asserted-by":"crossref","unstructured":"Tiwari, A., Laurenzano, M.A., Carrington, L., Snavely, A.: Modeling power and energy usage of HPC kernels. In: 2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops PhD Forum (IPDPSW), pp. 990\u2013998, May 2012","key":"15_CR40","DOI":"10.1109\/IPDPSW.2012.121"},{"doi-asserted-by":"crossref","unstructured":"Wahl, P., Ly-Gagnon, D., Debaes, C., Miller, D., Thienpont, H.: B-CALM: an open-source GPU-based 3D-FDTD with multi-pole dispersion for plasmonics. In: 2011 11th International Conference on Numerical Simulation of Optoelectronic Devices (NUSOD), pp. 11\u201312, September 2011","key":"15_CR41","DOI":"10.1109\/NUSOD.2011.6041169"},{"doi-asserted-by":"crossref","unstructured":"Weaver, V.M., Johnson, M., Kasichayanula, K., Ralph, J., Luszczek, P., Terpstra, D., Moore, S.: Measuring energy and power with PAPI. In: 2012 41st International Conference on Parallel Processing Workshops (ICPPW), pp. 262\u2013268, September 2012","key":"15_CR42","DOI":"10.1109\/ICPPW.2012.39"},{"key":"15_CR43","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1002\/cpe.3489","volume":"28","author":"M Wittmann","year":"2016","unstructured":"Wittmann, M., Hager, G., Zeiser, T., Treibig, J., Wellein, G.: Chip-level and multi-node analysis of energy-optimized lattice Boltzmann CFD simulations. Concur. Comput. Pract. Exper. 28, 2295\u20132315 (2016). doi: 10.1002\/cpe.3489","journal-title":"Concur. Comput. Pract. Exper."},{"doi-asserted-by":"crossref","unstructured":"Wu, G., Greathouse, J.L., Lyashevsky, A., Jayasena, N., Chiou, D.: GPGPU performance and power estimation using machine learning. In: 2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA), pp. 564\u2013576, February 2015","key":"15_CR44","DOI":"10.1109\/HPCA.2015.7056063"},{"issue":"6","key":"15_CR45","doi-asserted-by":"crossref","first-page":"1:1","DOI":"10.1147\/JRD.2013.2279597","volume":"57","author":"V Zyuban","year":"2013","unstructured":"Zyuban, V., Taylor, S.A., Christensen, B., Hall, A.R., Gonzalez, C.J., Friedrich, J., Clougherty, F., Tetzloff, J., Rao, R.: IBM POWER7+ design for higher frequency at fixed power. IBM J. Res. Dev. 57(6), 1:1\u20131:18 (2013)","journal-title":"IBM J. Res. Dev."}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46079-6_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,14]],"date-time":"2019-09-14T04:51:21Z","timestamp":1568436681000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46079-6_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319460789","9783319460796"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46079-6_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}