{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T08:59:57Z","timestamp":1775638797394,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":14,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642143892","type":"print"},{"value":"9783642143908","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-14390-8_22","type":"book-chapter","created":{"date-parts":[[2010,7,7]],"date-time":"2010-07-07T09:11:54Z","timestamp":1278493914000},"page":"206-215","source":"Crossref","is-referenced-by-count":20,"title":["A Flexible Checkpoint\/Restart Model in Distributed Systems"],"prefix":"10.1007","author":[{"given":"Mohamed-Slim","family":"Bouguerra","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thierry","family":"Gautier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Denis","family":"Trystram","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jean-Marc","family":"Vincent","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"22_CR1","unstructured":"Adiga, N., et al.: An Overview of the BlueGene\/L Supercomputer. In: ACM\/IEEE 2002 Conference on Supercomputing, p. 60 (2002)"},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Schroeder, B., Gibson, G.A.: A large-scale study of failures in high-performance computing systems. In: DSN 2006: Proceedings of the International Conference on Dependable Systems and Networks, Washington, DC, USA, pp. 249\u2013258 (2006)","DOI":"10.1109\/DSN.2006.5"},{"issue":"7","key":"22_CR3","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1016\/j.jpdc.2009.03.007","volume":"69","author":"T.J. Hacker","year":"2009","unstructured":"Hacker, T.J., Romero, F., Carothers, C.D.: An analysis of clustered failures on large supercomputing systems. J. Parallel Distrib. Comput.\u00a069(7), 652\u2013665 (2009)","journal-title":"J. Parallel Distrib. Comput."},{"issue":"3","key":"22_CR4","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/j.future.2004.11.016","volume":"22","author":"J.T. Daly","year":"2006","unstructured":"Daly, J.T.: A higher order estimate of the optimum checkpoint interval for restart dumps. Future Generation Computer Systems\u00a022(3), 303\u2013312 (2006)","journal-title":"Future Generation Computer Systems"},{"issue":"2","key":"22_CR5","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1109\/TDSC.2004.15","volume":"1","author":"E.N. Elnozahy","year":"2004","unstructured":"Elnozahy, E.N., Plank, J.S.: Checkpointing for peta-scale systems: A look into the future of practical rollback-recovery. IEEE Trans. Dependable Secur. Comput.\u00a01(2), 97\u2013108 (2004)","journal-title":"IEEE Trans. Dependable Secur. Comput."},{"key":"22_CR6","doi-asserted-by":"crossref","unstructured":"Liu, Y., Nassar, R., Leangsuksun, C., Naksinehaboon, N., Paun, M., Scott, S.: An optimal checkpoint\/restart model for a large scale high performance computing system. In: IEEE International Symposium on Parallel and Distributed Processing, pp. 1\u20139 (2008)","DOI":"10.1109\/IPDPS.2008.4536279"},{"key":"22_CR7","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1145\/1183401.1183406","volume-title":"Proceedings of The 20th Annual International Conference on Supercomputing","author":"A.J. Oliner","year":"2006","unstructured":"Oliner, A.J., Rudolph, L., Sahoo, R.K.: Cooperative checkpointing: a robust approach to large-scale systems reliability. In: Proceedings of The 20th Annual International Conference on Supercomputing, pp. 14\u201323. ACM, New York (2006)"},{"issue":"9","key":"22_CR8","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1145\/361147.361115","volume":"17","author":"J.W. Young","year":"1974","unstructured":"Young, J.W.: A first order approximation to the optimum checkpoint interval. ACM Commun.\u00a017(9), 530\u2013531 (1974)","journal-title":"ACM Commun."},{"issue":"1","key":"22_CR9","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/214451.214456","volume":"3","author":"K.M. Chandy","year":"1985","unstructured":"Chandy, K.M., Lamport, L.: Distributed snapshots: determining global states of distributed systems. ACM Trans. Comput. Syst.\u00a03(1), 63\u201375 (1985)","journal-title":"ACM Trans. Comput. Syst."},{"key":"22_CR10","unstructured":"Bouguerra, M.S., Gautier, T., Trystram, D., Vincent, J.M.: A new flexible checkpoint\/restart model. Technical report, RR-6751, INRIA (2008)"},{"key":"22_CR11","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1109\/24.9847","volume":"37","author":"R. Geist","year":"1988","unstructured":"Geist, R., Reynolds, R., Westall, J.: Selection of a checkpoint interval in a critical-task environment. IEEE Transactions on Reliability\u00a037, 395\u2013400 (1988)","journal-title":"IEEE Transactions on Reliability"},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Plank, J.S., Thomason, M.G.: The average availability of parallel checkpointing systems and its importance in selecting runtime parameters. In: 29th International Symposium on Fault-Tolerant Computing, pp. 250\u2013259 (1999)","DOI":"10.1109\/FTCS.1999.781059"},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Naksinehaboon, N., Liu, Y., Leangsuksun, C., Nassar, R., Paun, M., Scott, S.: Reliability-Aware Approach: An Incremental Checkpoint\/Restart Model in HPC Environments. In: IEEE International Symposium on Cluster Computing and the Grid, pp. 783\u2013788 (2008)","DOI":"10.1109\/CCGRID.2008.109"},{"key":"22_CR14","doi-asserted-by":"publisher","DOI":"10.1002\/047001363X","volume-title":"A First Course in Stochastic Models","author":"H.C. Tijms","year":"2003","unstructured":"Tijms, H.C.: A First Course in Stochastic Models. John Wiley, Chichester (2003)"}],"container-title":["Lecture Notes in Computer Science","Parallel Processing and Applied Mathematics"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-14390-8_22.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,22]],"date-time":"2025-02-22T18:54:05Z","timestamp":1740250445000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-14390-8_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642143892","9783642143908"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-14390-8_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010]]}}}