{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T20:30:21Z","timestamp":1782937821604,"version":"3.54.5"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319102139","type":"print"},{"value":"9783319102146","type":"electronic"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-10214-6_12","type":"book-chapter","created":{"date-parts":[[2014,9,30]],"date-time":"2014-09-30T11:56:12Z","timestamp":1412078172000},"page":"239-260","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Multi Objective Optimization of HPC Kernels for Performance, Power, and Energy"],"prefix":"10.1007","author":[{"given":"Prasanna","family":"Balaprakash","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ananta","family":"Tiwari","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Stefan M.","family":"Wild","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2014,10,1]]},"reference":[{"issue":"2","key":"12_CR1","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1109\/MSPEC.2011.5693074","volume":"48","author":"P Kogge","year":"2011","unstructured":"Kogge, P.: The tops in flops. IEEE Spectrum 48(2), 48\u201354 (2011)","journal-title":"IEEE Spectrum"},{"key":"12_CR2","unstructured":"TOP500 List: June 2013 Report, http:\/\/www.top500.org"},{"key":"12_CR3","doi-asserted-by":"publisher","first-page":"2136","DOI":"10.1016\/j.procs.2011.04.234","volume":"4","author":"P Balaprakash","year":"2011","unstructured":"Balaprakash, P., Wild, S.M., Hovland, P.D.: Can search algorithms save large-scale automatic performance tuning? Procedia Computer Science 4, 2136\u20132145 (2011)","journal-title":"Procedia Computer Science"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Kadayif, I., Kandemir, M., Vijaykrishnan, N., Irwin, M., Sivasubramaniam, A.: EAC: A compiler framework for high-level energy estimation and optimization. In: Proceedings of the Design, Automation and Test in Europe Conference and Exhibition, pp. 436\u2013442. IEEE (2002)","DOI":"10.1109\/DATE.2002.998310"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Kodi, A., Louri, A.: Performance adaptive power-aware reconfigurable optical interconnects for high-performance computing (HPC) systems. In: Proceedings of the 2007 ACM\/IEEE Conference on Supercomputing (SC), pp. 1\u201312 (2007)","DOI":"10.1145\/1362622.1362631"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Ahmad, I., Ranka, S., Khan, S.U.: Using game theory for scheduling tasks on multi-core processors for simultaneous optimization of performance and energy. In: IEEE International Symposium on Parallel and Distributed Processing (IPDPS), pp. 1\u20136. IEEE (2008)","DOI":"10.1109\/IPDPS.2008.4536420"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Azizi, O., Mahesri, A., Lee, B.C., Patel, S.J., Horowitz, M.: Energy-performance tradeoffs in processor architecture and circuit design: A marginal cost analysis. In: ACM SIGARCH Computer Architecture News, vol. 38, pp. 26\u201336. ACM (2010)","DOI":"10.1145\/1816038.1815967"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Tiwari, A., Laurenzano, M.A., Carrington, L., Snavely, A.: Modeling power and energy usage of HPC kernels. In: IEEE 26th International Parallel and Distributed Processing Symposium Workshops & PhD Forum (IPDPSW), pp. 990\u2013998. IEEE (2012)","DOI":"10.1109\/IPDPSW.2012.121"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Choi, J.W., Bedard, D., Fowler, R., Vuduc, R.: A roofline model of energy. In: 2013 IEEE 27th International Symposium on Parallel Distributed Processing (IPDPS), pp. 661\u2013672. IEEE (May 2013)","DOI":"10.1109\/IPDPS.2013.77"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Ascia, G., Catania, V., Palesi, M.: Multi-objective mapping for mesh-based NoC architectures. In: Proceedings of the 2nd IEEE\/ACM\/IFIP International Conference on Hardware\/Software Codesign and System Synthesis, pp. 182\u2013187. ACM (2004)","DOI":"10.1145\/1016720.1016765"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Jahr, R., Ungerer, T., Calborean, H., Vintan, L.: Automatic multi-objective optimization of parameters for hardware and code optimizations. In: International Conference on High Performance Computing and Simulation (HPCS), pp. 308\u2013316. IEEE (2011)","DOI":"10.1109\/HPCSim.2011.5999839"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Park, S., Jiang, W., Zhou, Y., Adve, S.: Managing energy-performance tradeoffs for multithreaded applications on multiprocessor architectures. In: ACM SIGMETRICS Performance Evaluation Review, vol. 35, pp. 169\u2013180 (2007)","DOI":"10.1145\/1269899.1254902"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Bedard, D., Lim, M.Y., Fowler, R., Porterfield, A.: PowerMon: Fine-grained and integrated power monitoring for commodity computer systems. In: IEEE SoutheastCon 2010, pp. 479\u2013484 (2010)","DOI":"10.1109\/SECON.2010.5453824"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Li, D., de Supinski, B.R., Schulz, M., Cameron, K., Nikolopoulos, D.S.: Hybrid MPI\/OpenMP power-aware computing. In: IEEE International Symposium on Parallel & Distributed Processing (IPDPS), pp. 1\u201312. IEEE (2010)","DOI":"10.1109\/IPDPS.2010.5470463"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"Rahman, S.F., Guo, J., Yi, Q.: Automated empirical tuning of scientific codes for performance and power consumption. In: Proceedings of the 6th International Conference on High Performance and Embedded Architectures and Compilers, pp. 107\u2013116. ACM (2011)","DOI":"10.1145\/1944862.1944880"},{"issue":"3","key":"12_CR16","doi-asserted-by":"publisher","first-page":"342","DOI":"10.1177\/1094342011414749","volume":"25","author":"C Lively","year":"2011","unstructured":"Lively, C., Wu, X., Taylor, V., Moore, S., Chang, H.C., Cameron, K.: Energy and performance characteristics of different parallel implementations of scientific applications on multicore systems. International Journal of High Performance Computing Applications 25(3), 342\u2013350 (2011)","journal-title":"International Journal of High Performance Computing Applications"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"\u0162\u0103pu\u015f, C., Chung, I.H., Hollingsworth, J.K.: Active harmony: towards automated performance tuning. In: Proceedings of the 2002 ACM\/IEEE conference on Supercomputing, Supercomputing 2002, pp. 1\u201311. IEEE Computer Society Press, Los Alamitos (2002)","DOI":"10.1109\/SC.2002.10062"},{"key":"12_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1007\/978-3-642-29740-3_21","volume-title":"Euro-Par 2011: Parallel Processing Workshops","author":"A Tiwari","year":"2012","unstructured":"Tiwari, A., Laurenzano, M.A., Carrington, L., Snavely, A.: Auto-tuning for energy usage in scientific applications. In: Alexander, M., et al. (eds.) Euro-Par 2011, Part II. LNCS, vol. 7156, pp. 178\u2013187. Springer, Heidelberg (2012)"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Laros III, J.H.: Measuring and tuning energy efficiency on large scale high performance computing platforms. Technical Report SAND2011-5702, Sandia National Laboratories (August 2011)","DOI":"10.2172\/1035312"},{"key":"12_CR20","unstructured":"Heydemann, K., Bodin, F.: Iterative compilation for two antagonistic criteria: Application to code size and performance. In: Proceedings of the 4th Workshop on Optimizations for DSP and Embedded Systems (2006)"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Hoste, K., Eeckhout, L.: Cole: Compiler optimization level exploration. In: Proceedings of the 6th Annual IEEE\/ACM International Symposium on Code Generation and Optimization, pp. 165\u2013174. ACM (2008)","DOI":"10.1145\/1356058.1356080"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Lokuciejewski, P., Plazar, S., Falk, H., Marwedel, P., Thiele, L.: Multi-objective exploration of compiler optimizations for real-time systems. In: 13th IEEE International Symposium on Object\/Component\/Service-Oriented Real-Time Distributed Computing (ISORC), pp. 115\u2013122 (2010)","DOI":"10.1109\/ISORC.2010.15"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Hoste, K., Georges, A., Eeckhout, L.: Automated just-in-time compiler tuning. In: Proceedings of the 8th Annual IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), pp. 62\u201372. ACM (2010)","DOI":"10.1145\/1772954.1772965"},{"issue":"3","key":"12_CR24","doi-asserted-by":"publisher","first-page":"296","DOI":"10.1007\/s10766-010-0161-2","volume":"39","author":"G Fursin","year":"2011","unstructured":"Fursin, G., Kashnikov, Y., Memon, A.W., Chamski, Z., Temam, O., Namolaru, M., Yom-Tov, E., Mendelson, B., Zaks, A., Courtois, E., et al.: Milepost gcc: Machine learning enabled self-tuning compiler. International Journal of Parallel Programming 39(3), 296\u2013327 (2011)","journal-title":"International Journal of Parallel Programming"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Jordan, H., Thoman, P., Durillo, J.J., Pellegrini, S., Gschwandtner, P., Fahringer, T., Moritsch, H.: A multi-objective auto-tuning framework for parallel codes. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis (SC), pp. 10:1\u201310:12. IEEE Computer Society Press, Los Alamitos (2012)","DOI":"10.1109\/SC.2012.7"},{"key":"12_CR26","unstructured":"Ehrgott, M.: Multicriteria Optimization. 2nd edn. Springer (2005)"},{"key":"12_CR27","doi-asserted-by":"publisher","first-page":"1959","DOI":"10.1016\/j.procs.2012.04.214","volume":"9","author":"P Balaprakash","year":"2012","unstructured":"Balaprakash, P., Wild, S.M., Norris, B.: SPAPT: Search problems in automatic performance tuning. Procedia Computer Science 9, 1959\u20131968 (2012)","journal-title":"Procedia Computer Science"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Kaiser, A., Williams, S., Madduri, K., Ibrahim, K., Bailey, D., Demmel, J., Strohmaier, E.: TORCH computational reference kernels: A testbed for computer science research. Technical Report UCB\/EECS-2010-144, EECS Department, University of California, Berkeley (December 2010)","DOI":"10.2172\/1004197"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Davis, T.A.: Direct methods for sparse linear systems, vol. 2. SIAM (2006)","DOI":"10.1137\/1.9780898718881"},{"key":"12_CR30","unstructured":"Heroux, M.A., Doerer, D.W., Crozier, P.S., Willenbring, J.M.: Improving performance via mini-applications. Technical Report SAND2009-5574, Sandia National Laboratories (September 2009)"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Norris, B., Hartono, A., Gropp, W.: Annotations for productivity and performance portability. In: Petascale Computing: Algorithms and Applications. Computational Science, pp. 443\u2013462. Chapman & Hall\/CRC Press (2007)","DOI":"10.1201\/9781584889106.ch21"},{"key":"12_CR32","unstructured":"Intel Xeon Phi Coprocessor - the Architecture: http:\/\/software.intel.com\/en-us\/articles\/intel-xeon-phi-coprocessor-codename-knights-corner"},{"key":"12_CR33","doi-asserted-by":"crossref","unstructured":"Albers, S., Antoniadis, A.: Race to idle: New algorithms for speed scaling with a sleep state. In: Proceedings of the 23rd Annual ACM-SIAM Symposium on Discrete Algorithms (SODA), pp. 1266\u20131285. SIAM (2012)","DOI":"10.1137\/1.9781611973099.100"},{"key":"12_CR34","unstructured":"Intel Xeon Phi Coprocessor System Software Developers Guide: http:\/\/software.intel.com\/en-us\/articles\/intel-xeon-phi-coprocessor-system-software-developers-guide"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Alonso, P., Dolz, M.F., Igual, F.D., Mayo, R., Quintana-Orti, E.S.: Saving energy in the LU factorization with partial pivoting on multi-core processors. In: 20th Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP), pp. 353\u2013358. IEEE (2012)","DOI":"10.1109\/PDP.2012.28"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Springer, R., Lowenthal, D.K., Rountree, B., Freeh, V.W.: Minimizing execution time in MPI programs on an energy-constrained, power-scalable cluster. In: Proceedings of the Eleventh ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 230\u2013238. ACM (2006)","DOI":"10.1145\/1122971.1123006"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Davis, T.A., Hu, Y.: The University of Florida sparse matrix collection. ACM Transactions on Mathematical Software 38(1) 1:1\u20131:25 (2011)","DOI":"10.1145\/2049662.2049663"},{"key":"12_CR38","unstructured":"CPU Freq. Scaling, https:\/\/wiki.archlinux.org\/index.php\/Cpufrequtils"},{"key":"12_CR39","unstructured":"WattsUp? Meters, https:\/\/www.wattsupmeters.com\/"},{"key":"12_CR40","unstructured":"IBM System Blue Gene Solution - Overview, http:\/\/www-03.ibm.com\/systems\/technicalcomputing\/solutions\/bluegene\/"},{"key":"12_CR41","doi-asserted-by":"crossref","unstructured":"Yoshii, K., Iskra, K., Gupta, R., Beckman, P., Vishwanath, V., Yu, C., Coghlan, S.: Evaluating power-monitoring capabilities on IBM Blue Gene\/P and Blue Gene\/Q. In: IEEE International Conference on Cluster Computing (CLUSTER), pp. 36\u201344. IEEE (2012)","DOI":"10.1109\/CLUSTER.2012.62"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing Systems. Performance Modeling, Benchmarking and Simulation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-10214-6_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T22:49:21Z","timestamp":1746398961000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-10214-6_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319102139","9783319102146"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-10214-6_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"1 October 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}