{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T01:00:15Z","timestamp":1725843615135},"publisher-location":"Cham","reference-count":16,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319273075"},{"type":"electronic","value":"9783319273082"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-27308-2_50","type":"book-chapter","created":{"date-parts":[[2015,12,17]],"date-time":"2015-12-17T08:29:53Z","timestamp":1450340993000},"page":"619-630","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Case Study of Application Structure Aware Resilience Through Differentiated State Saving and Recovery"],"prefix":"10.1007","author":[{"given":"Anshu","family":"Dubey","sequence":"first","affiliation":[]},{"given":"Hajime","family":"Fujita","sequence":"additional","affiliation":[]},{"given":"Zachary","family":"Rubenstein","sequence":"additional","affiliation":[]},{"given":"Brian","family":"Van Straalen","sequence":"additional","affiliation":[]},{"given":"Andrew A.","family":"Chien","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,12,18]]},"reference":[{"key":"50_CR1","doi-asserted-by":"crossref","unstructured":"Berrocal, E., Bautista-Gomez, L., Di, S., Lan, Z., Cappello, F.: Lightweight silent data corruption detection based on runtime data analysis for HPC applications. Technical report (2014)","DOI":"10.1145\/2749246.2749253"},{"key":"50_CR2","doi-asserted-by":"crossref","unstructured":"Chung, J., Lee, I., Sullivan, M., Ryoo, J.H., Kim, D.W., Yoon, D.H., Kaplan, L., Erez, M.: Containment domains: a scalable, efficient, and flexible resilience scheme for exascale systems. In: The Proceedings of SC12 (2012)","DOI":"10.1109\/SC.2012.36"},{"key":"50_CR3","unstructured":"Colella, P., Graves, D., Keen, N., Ligocki, T., Martin, D., McCorquodale, P., Modiano, D., Schwartz, P., Sternberg, T., Van Straalen, B.: Chombo software package for AMR applications design document. Technical report, LBNL, Applied Numerical Algorithms Group, Computational Research Division (2009)"},{"issue":"10\u201311","key":"50_CR4","doi-asserted-by":"publisher","first-page":"512","DOI":"10.1016\/j.parco.2009.08.001","volume":"35","author":"A Dubey","year":"2009","unstructured":"Dubey, A., Antypas, K., Ganapathy, M., Reid, L., Riley, K., Sheeler, D., Siegel, A., Weide, K.: Extensible component-based architecture for FLASH, a massively parallel, multiphysics simulation code. Parallel Comput. 35(10\u201311), 512\u2013522 (2009)","journal-title":"Parallel Comput."},{"key":"50_CR5","doi-asserted-by":"crossref","unstructured":"Dubey, A., Reid, L., Fisher, R.: Introduction to FLASH 3.0, with application to supersonic turbulence. In: Physica Scripta T132, : Topical Issue on Turbulent Mixing and Beyond, Results of a Conference at ICTP. Trieste, Italy, August (2008)","DOI":"10.1088\/0031-8949\/2008\/T132\/014046"},{"key":"50_CR6","unstructured":"Dubey, A., Mohapatra, P., Weide, K.: Fault tolerance using lower fidelity data in adaptive mesh applications. In: Proceedings of the 3rd Workshop on Fault-tolerance for HPC at Extreme Scale, pp. 3\u201310. ACM (2013). \n                      http:\/\/doi.acm.org\/10.1145\/2465813.2465817"},{"key":"50_CR7","doi-asserted-by":"crossref","unstructured":"Fujita, H., Dun, N., Rubenstein, Z.A., Chien, A.A.: Log-structured global array for efficient multi-version snapshots. In: IEEE CCGrid 2015 (2015)","DOI":"10.1109\/CCGrid.2015.80"},{"key":"50_CR8","doi-asserted-by":"crossref","unstructured":"Lu, G., Zheng, Z., Chien, A.A.: When is multi-version checkpointing needed? In: Proceedings of the 3rd Workshop on Fault-tolerance for HPC at Extreme Scale, FTXS 2013. ACM (2013)","DOI":"10.1145\/2465813.2465821"},{"key":"50_CR9","doi-asserted-by":"crossref","unstructured":"Moody, A., Bronevetsky, G., Mohror, K., De\u00a0Supinski, B.R.: Design, modeling,and evaluation of a scalable multi-level checkpointing system. In: SC 2010 (2010)","DOI":"10.1109\/SC.2010.18"},{"key":"50_CR10","unstructured":"NCSA: Heirarchical Data Format 5 (2008). \n                      http:\/\/hdf.ncsa.uiuc.edu\/HDF5\/"},{"issue":"2","key":"50_CR11","first-page":"203","volume":"20","author":"J Nieplocha","year":"2006","unstructured":"Nieplocha, J., Palmer, B., Tipparaju, V., Krishnan, M., Trease, H., Apr, E.: Advances, applications and performance of the global arrays shared memory programming toolkit. IJHPCA 20(2), 203\u2013231 (2006)","journal-title":"IJHPCA"},{"key":"50_CR12","doi-asserted-by":"crossref","unstructured":"Sato, K., Mohror, K., Moody, A., Gamblin, T., de Supinski, B., Maruyama, N., Matsuoka, S.: Design and modeling of a non-blocking checkpointing system. In: SC 2012 (2012)","DOI":"10.1109\/SC.2012.46"},{"key":"50_CR13","doi-asserted-by":"publisher","first-page":"2287","DOI":"10.1016\/j.procs.2011.04.249","volume":"4","author":"AG Shet","year":"2011","unstructured":"Shet, A.G., Elwasif, W.R., Foley, S.S., Park, B.H., Bernholdt, D.E., Bramley, R.: Strategies for fault tolerance in multicomponent applications. Procedia Comput. Sci. 4, 2287\u20132296 (2011)","journal-title":"Procedia Comput. Sci."},{"issue":"2","key":"50_CR14","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1016\/j.future.2009.07.015","volume":"26","author":"X Shi","year":"2010","unstructured":"Shi, X., Pazat, J., Rodriguez, E., Jin, H., Jiang, H.: Adapting grid applications to safety using fault-tolerant methods: design, implementation and evaluations. Future Gener. Comput. Syst. 26(2), 236\u2013244 (2010)","journal-title":"Future Gener. Comput. Syst."},{"issue":"7","key":"50_CR15","doi-asserted-by":"publisher","first-page":"997","DOI":"10.1109\/TPDS.2008.172","volume":"20","author":"J Walters","year":"2009","unstructured":"Walters, J., Chaudhary, V.: Replication-based fault tolerance for MPI applications. IEEE Trans. Parallel Distrib. Syst. 20(7), 997\u20131010 (2009)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"50_CR16","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Chien, A.A., Teranishi, K.: Fault tolerance in an inner-outer solver: a gvr-enabled case study. In: 11th International Meeting High Performance Computing for Computational Science, VECPAR 2014 (2014)","DOI":"10.1007\/978-3-319-17353-5_11"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2015: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-27308-2_50","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,19]],"date-time":"2020-04-19T20:41:55Z","timestamp":1587328915000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-27308-2_50"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319273075","9783319273082"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-27308-2_50","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"18 December 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}