{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T10:40:02Z","timestamp":1738320002134,"version":"3.35.0"},"publisher-location":"Boston, MA","reference-count":22,"publisher":"Springer US","isbn-type":[{"type":"print","value":"9780387784472"},{"type":"electronic","value":"9780387784489"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-0-387-78448-9_24","type":"book-chapter","created":{"date-parts":[[2008,8,5]],"date-time":"2008-08-05T17:18:44Z","timestamp":1217956724000},"page":"295-306","source":"Crossref","is-referenced-by-count":1,"title":["A Distributed and Replicated Service for Checkpoint Storage"],"prefix":"10.1007","author":[{"given":"Fatiha","family":"Bouabache","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"Herault","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gilles","family":"Fedak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Franck","family":"Cappello","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"unstructured":"W. Groop and E. Lusk, Fault Tolerance in MP Programs. OAI-PMH server at cs1.ist.psu.edu, 2002.","key":"24_CR1_24"},{"doi-asserted-by":"crossref","unstructured":"E. N. Elnozahy et al.A survey of Rollback-Recovery Protocols in Message-Passing Sys-tems, Journal \"CSURV: Computer Surveys\", volume 34, 2002.","key":"24_CR2_24","DOI":"10.1145\/568522.568525"},{"issue":"1","key":"24_CR3_24","doi-asserted-by":"crossref","first-page":"63?","DOI":"10.1145\/214451.214456","volume":"3","author":"K.M. Chandy","year":"1985","unstructured":"K.M. Chandy and L. Lamport, Distributed snapshots: Determining global states of dis-tributed systems. ACM Transactions on Computer Systems (TOCS), 3(1):63? 75, 1985.","journal-title":"ACM Transactions on Computer Systems (TOCS)"},{"unstructured":"A. Bouteiller et al.Mpich-v: a multiprotocol fault tolerant mpi. International Journal of High Performance Computing and Applications, 20(8):319?333, fall, 2006.","key":"24_CR4_24"},{"unstructured":"G. Burns, R. Daoud, and J. Vaigl. LAM: An open cluster environment forMPI, 1994.","key":"24_CR5_24"},{"doi-asserted-by":"crossref","unstructured":"L. Alvisi et al.An analysis of communication induced checkpointing. In Proceedings of the symposium on fault-tolerant computing, pages 242?249, 1999.","key":"24_CR6_24","DOI":"10.1109\/FTCS.1999.781058"},{"doi-asserted-by":"crossref","unstructured":"F. Baude et al.A hybrid message logging-cic protocol for constrained checkpointability. In Proceedings of EuroPar2005, LNCS, 2005.","key":"24_CR7_24","DOI":"10.1007\/11549468_71"},{"doi-asserted-by":"crossref","unstructured":"James S. Plank and Kai Li, Faster Checkpointing with N+1 Parity, 24th International Symposium on Fault-Tolerant Computing, Austin, TX, June, 1994, pp 288-297.","key":"24_CR8_24","DOI":"10.1109\/FTCS.1994.315631"},{"unstructured":"Z. Chen et al.Building fault survivable MPI programs with FT-MPI using diskless-checkpointing. In Proceedings of the tenth ACM SIGPLAN Symposium on (PPoPP), June 2005.","key":"24_CR9_24"},{"unstructured":"G. Zheng, L. Shi, and L. V. Kale. Ftc-charm++: an inmemory checkpoint-based fault toler-ant runtime for charm++ and mpi. In Proceedings of the IEEE International Conference on Cluster Computing, USA, 2004. IEEE Computer Society.","key":"24_CR10_24"},{"doi-asserted-by":"crossref","unstructured":"C. Huang et al.Performance evaluation of adaptive MPI. PPOPP 2006: 12-21","key":"24_CR11_24","DOI":"10.1145\/1122971.1122976"},{"unstructured":"L. V. Kale and S. Krishnan. Charm++: Parallel programming with message-driven objects. In Wilson, G.V., Lu, P., eds.: Parallel programming using C++. MIT Press (1996) 175-213.","key":"24_CR12_24"},{"unstructured":"L. V. Kale. The Virtualization approach to Parallel Programming: Runtime Optimization and the State of Art. In LACSI 2002, Albuquerque, October 2002.","key":"24_CR13_24"},{"doi-asserted-by":"crossref","unstructured":"S. Chakravorty, C. L. Mendes, and L. V. Kal\u00e9, Proactive Fault Tolerance in MPI Applica-tions Via Task Migration. HiPC 2006: 485-496","key":"24_CR14_24","DOI":"10.1007\/11945918_47"},{"unstructured":"L. V. Kale and S. Krishnan. Charm++: Parallel programming with message-driven objects. In Wilson, G.V., Lu, P., eds.: Parallel programming using C++. MIT Press (1996) 175-213.","key":"24_CR15_24"},{"doi-asserted-by":"crossref","unstructured":"S. Chakravorty and L. V. Kal\u00e9, A fault tolerance Protocol with Fast Fault Recovery, Accepted for publication at IPDPS 2007.","key":"24_CR16_24","DOI":"10.1109\/IPDPS.2007.370310"},{"doi-asserted-by":"crossref","unstructured":"R. Guerraoui and A. Schiper. Software based replication for fault tolerance. IEEE Com-puter, 30(4):68?74, Apr. 1997.","key":"24_CR17_24","DOI":"10.1109\/2.585156"},{"unstructured":"N. Budhiraja et al.The primary-backup approach, Dec. 01 1993.","key":"24_CR18_24"},{"doi-asserted-by":"crossref","unstructured":"L. Rilling and C. Morin. A practical transparent data sharing service for the grid. In Proc. Fifth InternationalWorkshop on Distributed SharedMemory (DSM 2005), Cardiff, UK, May 2005. Held in conjunction with CCGrid 2005.","key":"24_CR19_24","DOI":"10.1109\/CCGRID.2005.1558657"},{"unstructured":"C. Leangsuksun et al.Asymmetric active-active high availability for high-end computing. In Proceedings of (COSET-2), in conjunction with the 19th ACM International Conference on Supercomputing (ICS), Cambridge, MA, USA, 2005.","key":"24_CR20_24"},{"doi-asserted-by":"crossref","unstructured":"C. Engelmann et al.Symmetric active\/active high availability for high-performance com-puting system services. Journal of Computers (JCP), 1(8), 2006.","key":"24_CR21_24","DOI":"10.4304\/jcp.1.8.43-54"},{"unstructured":"INRIA. Simgrid project. http:\/\/simgrid.gforge.inria.fr .","key":"24_CR22_24"}],"container-title":["Making Grids Work"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-0-387-78448-9_24.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T09:59:59Z","timestamp":1738317599000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-0-387-78448-9_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9780387784472","9780387784489"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-0-387-78448-9_24","relation":{},"subject":[],"published":{"date-parts":[[2008]]}}}