{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T18:00:40Z","timestamp":1775671240775,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2018,4,11]],"date-time":"2018-04-11T00:00:00Z","timestamp":1523404800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,4,11]],"date-time":"2018-04-11T00:00:00Z","timestamp":1523404800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1618776"],"award-info":[{"award-number":["CCF-1618776"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1422009"],"award-info":[{"award-number":["CCF-1422009"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2018,7]]},"DOI":"10.1007\/s11227-018-2368-8","type":"journal-article","created":{"date-parts":[[2018,4,11]],"date-time":"2018-04-11T12:11:57Z","timestamp":1523448717000},"page":"3168-3192","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["System-wide trade-off modeling of performance, power, and resilience on petascale systems"],"prefix":"10.1007","volume":"74","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9613-7404","authenticated-orcid":false,"given":"Li","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhou","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuping","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael E.","family":"Papka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiling","family":"Lan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,4,11]]},"reference":[{"key":"2368_CR1","unstructured":"Balbo G (2007) Introduction to generalized stochastic petri nets. In: Proceedings of SFM"},{"key":"2368_CR2","doi-asserted-by":"crossref","unstructured":"Bautista-Gomez L, Komatitsch D, Maruyama N, Tsuboi S, Cappello F, Matsuoka S (2011) FTI: high performance fault tolerance interface for hybrid systems. In: Proceedings of SC","DOI":"10.1145\/2063384.2063427"},{"key":"2368_CR3","doi-asserted-by":"crossref","unstructured":"Bircher W, John L (2008) Analysis of dynamic power management on multi-core processors. In: Proceedings of ICS","DOI":"10.1145\/1375527.1375575"},{"key":"2368_CR4","doi-asserted-by":"crossref","unstructured":"Bodas D, Song J, Rajappa M, Hoffman A (2014) Simple power-aware scheduler to limit power consumption by HPC system within a budget. In: Proceedings of E2SC","DOI":"10.1109\/E2SC.2014.8"},{"key":"2368_CR5","doi-asserted-by":"crossref","unstructured":"Chen X, Xu C, Dick R, Mao Z (2010) Performance and power modeling in a multi-programmed multi-core environment. In: Proceedings of DAC","DOI":"10.1145\/1837274.1837479"},{"key":"2368_CR6","doi-asserted-by":"publisher","first-page":"868","DOI":"10.1109\/TPDS.2014.2315203","volume":"26","author":"M Chiesi","year":"2015","unstructured":"Chiesi M, Vanzolini L, Mucci C, Scarselli E, Guerrieri R (2015) Power-aware job scheduling on heterogeneous multicore architectures. IEEE Trans Parallel Distrib Syst 26:868\u2013877","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"2368_CR7","unstructured":"Cobalt Resource Manager http:\/\/trac.mcs.anl.gov\/projects\/cobalt"},{"key":"2368_CR8","unstructured":"Crovella M, Bianchini R, Leblanc T, Markatos E, Wisniewski R (1992) Using communication-to-computation ratio in parallel program design and performance prediction. In: Proceedings of IPDPS"},{"key":"2368_CR9","unstructured":"CPN Tools (2015) http:\/\/cpntools.org\/"},{"key":"2368_CR10","doi-asserted-by":"crossref","unstructured":"Curtis-Maury M, Dzierwa J, Antonopoulos C, Nikolopoulos D (2006) Online power-performance adaptation of multithreaded programs using hardware event-based prediction. In: Proceedings of ICS","DOI":"10.1145\/1183401.1183426"},{"key":"2368_CR11","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/j.future.2004.11.016","volume":"22","author":"J Daly","year":"2006","unstructured":"Daly J (2006) A higher order estimate of the optimum checkpoint interval for restart dumps. Future Gener Comput Syst 22:303\u2013312","journal-title":"Future Gener Comput Syst"},{"key":"2368_CR12","doi-asserted-by":"crossref","unstructured":"Di S, Bouguerra M-S, Bautista-Gomez LA, Cappello F (2014) Optimization of multi-level checkpoint model for large scale HPC applications. In: Proceedings of IPDPS","DOI":"10.1109\/IPDPS.2014.122"},{"key":"2368_CR13","doi-asserted-by":"crossref","unstructured":"Elliott J, Kharbas K, Fiala D, Mueller F, Ferreira K, Engelmann C (2012) Combining partial redundancy and checkpointing for HPC. In: Proceedings of ICDCS","DOI":"10.1109\/ICDCS.2012.56"},{"key":"2368_CR14","unstructured":"ExSpecT (2015) http:\/\/www.exspect.com\/"},{"key":"2368_CR15","doi-asserted-by":"crossref","unstructured":"Fan X, Weber W-D, Barroso L (2007) Power provisioning for a warehouse-sized computer. In: Proceedings of ISCA","DOI":"10.1145\/1250662.1250665"},{"key":"2368_CR16","doi-asserted-by":"crossref","unstructured":"Feitelson D, Rudolph L, Schwiegelshohn U, Sevcik K, Wong P (1997) Theory and practice in parallel job scheduling. In: Proceedings of JSSPP","DOI":"10.1007\/3-540-63574-2_14"},{"key":"2368_CR17","unstructured":"Feng X, Ge R, Cameron K (2005) Power and energy profiling of scientific applications on distributed systems. In: Proceedings of IPDPS"},{"key":"2368_CR18","doi-asserted-by":"crossref","unstructured":"Ferreira K, Stearley J, Laros III J, Oldfield R et al (2011) Evaluating the viability of process replication reliability for exascale systems. In: Proceedings of SC","DOI":"10.1145\/2063384.2063443"},{"key":"2368_CR19","doi-asserted-by":"publisher","first-page":"1123","DOI":"10.1016\/j.peva.2010.07.004","volume":"67","author":"A Gandhi","year":"2010","unstructured":"Gandhi A, Harchol-Balter M, Adan I (2010) Server farms with setup costs. Perform Eval 67:1123\u20131138","journal-title":"Perform Eval"},{"key":"2368_CR20","doi-asserted-by":"crossref","unstructured":"Ge R, Feng X, Cameron K (2005) Performance-constrained distributed DVS scheduling for scientific applications on power-aware clusters. In: Proceedings of SC","DOI":"10.1109\/MC.2005.380"},{"key":"2368_CR21","doi-asserted-by":"publisher","first-page":"658","DOI":"10.1109\/TPDS.2009.76","volume":"21","author":"R Ge","year":"2010","unstructured":"Ge R, Feng X, Song S, Chang H-C, Li D, Cameron K (2010) PowerPack: energy profiling and analysis of high-performance systems and applications. IEEE Trans Parallel Distrib Syst 21:658\u2013671","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"2368_CR22","doi-asserted-by":"publisher","first-page":"641","DOI":"10.1109\/TC.2006.87","volume":"55","author":"C Gniady","year":"2006","unstructured":"Gniady C, Butt A, Hu Y, Lu Y-H (2006) Program counter-based prediction techniques for dynamic power management. IEEE Trans Comput 55:641\u2013658","journal-title":"IEEE Trans Comput"},{"key":"2368_CR23","doi-asserted-by":"crossref","unstructured":"Goiri I, Kien L, Haque M, Beauchea R, Nguyen T, Guitart J, Torres J, Bianchini R (2011) GreenSlot: scheduling energy consumption in green datacenters. In: Proceedings of SC","DOI":"10.1145\/2063384.2063411"},{"key":"2368_CR24","doi-asserted-by":"crossref","unstructured":"Guenter B, Jain N, Williams C (2011) Managing cost, performance, and reliability tradeoffs for energy-aware server provisioning. In: Proceedings of INFOCOM","DOI":"10.1109\/INFCOM.2011.5934917"},{"key":"2368_CR25","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1016\/0304-3975(81)90049-9","volume":"14","author":"K Jensen","year":"1981","unstructured":"Jensen K (1981) Colored petri nets and the invariant-method. Theoret Comput Sci 14:317\u2013336","journal-title":"Theoret Comput Sci"},{"key":"2368_CR26","doi-asserted-by":"crossref","unstructured":"Kanev S, Hazelwood KM, Wei G-Y, Brooks DM (2014) Tradeoffs between power management and tail latency in warehouse-scale applications. In: Proceedings of IISWC","DOI":"10.1109\/IISWC.2014.6983037"},{"key":"2368_CR27","doi-asserted-by":"crossref","unstructured":"LeBlanc T, Anand R, Gabriel E, Subhlok J (2009) Volpexmpi: an MPI Library for execution of parallel applications on volatile nodes. In: European PVM\/MPI users\u2019 group meeting","DOI":"10.1007\/978-3-642-03770-2_19"},{"key":"2368_CR28","doi-asserted-by":"crossref","unstructured":"Lefurgy C, Wang X, Ware M (2007) Server-level power control. In: Proceedings of ICAC","DOI":"10.1109\/ICAC.2007.35"},{"key":"2368_CR29","unstructured":"LLview (2013) Graphical monitoring of loadleveler controlled cluster. http:\/\/www.fz-juelich.de\/jsc\/llview\/"},{"key":"2368_CR30","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/92.920816","volume":"9","author":"T Martin","year":"2001","unstructured":"Martin T, Siewiorek D (2001) Non-ideal battery and main memory effects on CPU speed-setting for low power. IEEE Trans VLSI System 9:29\u201334","journal-title":"IEEE Trans VLSI System"},{"key":"2368_CR31","volume-title":"Petri nets in snoopy: a unifying framework for the graphical display, computational modelling, and simulation of bacterial regulatory networks","author":"W Marwan","year":"2012","unstructured":"Marwan W, Rohr C, Heiner M (2012) Petri nets in snoopy: a unifying framework for the graphical display, computational modelling, and simulation of bacterial regulatory networks. Humana Press, New York"},{"key":"2368_CR32","unstructured":"Mira (2012) Next-generation supercomputer. https:\/\/www.alcf.anl.gov\/mira"},{"key":"2368_CR33","doi-asserted-by":"crossref","unstructured":"Moody A, Bronevetsky G, Mohror K, Supinski B (2010) Design, modeling, and evaluation of a scalable multi-level checkpointing system. In: Proceedings of SC","DOI":"10.2172\/984082"},{"key":"2368_CR34","unstructured":"NSF Cyberinfrastructure Framework for $$21^{st}$$ Century Science and Engineering Vision. http:\/\/www.nsf.gov\/pubs\/2010\/nsf10015\/nsf10015.jsp"},{"key":"2368_CR35","doi-asserted-by":"crossref","unstructured":"Patki T, Lowenthal D, Rountree B, Schulz M, de\u00a0Supinski B (2013) Exploring hardware overprovisioning in power-constrained, high performance computing. In: Proceedings of ICS","DOI":"10.1145\/2464996.2465009"},{"key":"2368_CR36","doi-asserted-by":"crossref","unstructured":"Qiu Q, Pedram M (1999) Dynamic power management based on continuous-time Markov decision processes. In: Proceedings of DAC","DOI":"10.1145\/309847.309997"},{"key":"2368_CR37","doi-asserted-by":"crossref","unstructured":"Reed D, Lu C, Mendes C (2003) Big systems and big reliability challenges. In: Proceedings of ParCo","DOI":"10.1016\/S0927-5452(04)80089-3"},{"key":"2368_CR38","unstructured":"ReNeW (2015) http:\/\/www.renew.de\/"},{"key":"2368_CR39","doi-asserted-by":"crossref","unstructured":"Riesen R, Ferreira K, Silva D, Lemarinier P, Arnold D, Bridges P (2012) Alleviating scalability issues of checkpointing protocols. In: Proceedings of SC","DOI":"10.1109\/SC.2012.18"},{"key":"2368_CR40","unstructured":"Rong P, Pedram M (2006) Battery-aware power management based on Markovian decision processes. In: Proceedings of ICCAD"},{"key":"2368_CR41","unstructured":"Sancho J, Petrini F, Davis K, Gioiosa R, Jiang S (2005) Current practice and a direction forward in checkpoint\/restart implementations for fault tolerance. In: Proceedings of IPDPS"},{"key":"2368_CR42","doi-asserted-by":"crossref","unstructured":"Srinivasan J, Adve S, Bose P, Rivers J (2004) The impact of technology scaling on lifetime reliability. In: Proceedings of DSN","DOI":"10.1109\/DSN.2004.1311888"},{"key":"2368_CR43","doi-asserted-by":"crossref","unstructured":"Tang W, Desai N, Buettner D, Lan Z (2010) Analyzing and adjusting user runtime estimates to improve job scheduling on blue gene\/P. In: Proceedings of IPDPS","DOI":"10.1109\/IPDPS.2010.5470474"},{"key":"2368_CR44","unstructured":"The Standard Workload Format (2007) http:\/\/www.cs.huji.ac.il\/labs\/parallel\/workload\/swf.html"},{"key":"2368_CR45","doi-asserted-by":"crossref","unstructured":"Tian Y, Lin C, Yao M (2012) Modeling and analyzing power management policies in server farms using stochastic petri nets. In: Proceedings of e-Energy","DOI":"10.1145\/2208828.2208854"},{"key":"2368_CR46","doi-asserted-by":"crossref","unstructured":"Tiwari A, Laurenzano M, Carrington L, Snavely A (2012) Modeling power and energy usage of HPC Kernels. In: Proceedings of IPDPSW","DOI":"10.1109\/IPDPSW.2012.121"},{"key":"2368_CR47","unstructured":"TOPPER (2015) http:\/\/bluesky.cs.iit.edu\/topper\/"},{"key":"2368_CR48","doi-asserted-by":"crossref","unstructured":"Wallace S, Vishwanath V, Coghlan S, Lan Z, Papka M (2013) Application profilling benchmarks on IBM blue gene\/Q. In: Proceedings of cluster","DOI":"10.1109\/CLUSTER.2013.6702682"},{"key":"2368_CR49","unstructured":"Wingstrom J (2009) Overcoming the difficulties created by the volatile nature of desktop grids through understanding. Technical report, Ph.D. thesis, University of Hawai\u2019i, Manoa"},{"key":"2368_CR50","doi-asserted-by":"crossref","unstructured":"Yang X, Zhou Z, Wallace S, Lan Z, Tang W, Coghlan S, Papka M (2013) Integrating dynamic pricing of electricity into energy aware scheduling for HPC systems. In: Proceedings of SC","DOI":"10.1145\/2503210.2503264"},{"key":"2368_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.jpdc.2015.06.006","volume":"84","author":"L Yu","year":"2015","unstructured":"Yu L, Zhou Z, Wallace S, Papka M, Lan Z (2015) Quantitative modeling of power-performance tradeoffs on extreme scale systems. J Parallel Distrib Comput Comput 84:1\u201314","journal-title":"J Parallel Distrib Comput Comput"},{"key":"2368_CR52","doi-asserted-by":"crossref","unstructured":"Zhou Z, Lan Z, Tang W, Desai N (2013) Reducing energy costs for IBM blue gene\/P via power-aware job scheduling. In: Proceedings of JSSPP","DOI":"10.1007\/978-3-662-43779-7_6"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-018-2368-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-018-2368-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-018-2368-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,18]],"date-time":"2022-08-18T23:29:14Z","timestamp":1660865354000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-018-2368-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4,11]]},"references-count":52,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2018,7]]}},"alternative-id":["2368"],"URL":"https:\/\/doi.org\/10.1007\/s11227-018-2368-8","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,4,11]]},"assertion":[{"value":"11 April 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}