{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T05:51:41Z","timestamp":1747806701620,"version":"3.28.0"},"reference-count":21,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2003]]},"DOI":"10.1109\/clustr.2003.1253321","type":"proceedings-article","created":{"date-parts":[[2004,5,25]],"date-time":"2004-05-25T16:11:16Z","timestamp":1085501476000},"page":"242-250","source":"Crossref","is-referenced-by-count":41,"title":["Coordinated checkpoint versus message log for fault tolerant MPI"],"prefix":"10.1109","author":[{"family":"Bouteiller","sequence":"first","affiliation":[]},{"family":"Lemarinier","sequence":"additional","affiliation":[]},{"family":"Krawezik","sequence":"additional","affiliation":[]},{"family":"Capello","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"A survey of rollback-recovery protocols in message passing systems","author":"elnozahy","year":"1996","journal-title":"Technical Report CMU-CS-96-181"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/514191.514205"},{"journal-title":"special issue of the Journal High Performance Computing Applications (IJHPCA) 2002","article-title":"William Gropp and Ewing Lusk. Fault tolerance in MPI programs","year":"0","key":"ref12"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"789","DOI":"10.1016\/0167-8191(96)00024-5","volume":"22","author":"gropp","year":"1996","journal-title":"Parallel Computing"},{"key":"ref14","article-title":"Checkpoint and migration of UNIX processes in the condor distributed processing system","author":"litzkow","year":"1997","journal-title":"Technical Report Technical Report 1346"},{"key":"ref15","article-title":"Processor allocation and checkpoint interval selection in cluster computing systems","author":"james planck","year":"2001","journal-title":"Journal of Parallel and Distributed Computing"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/FTCS.1998.689454"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/RELDIS.1998.740469"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/FTCS.1999.781033"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IPPS.1996.508106"},{"key":"ref4","article-title":"The NAS Parallel Benchmarks 2.0. Report NAS-95&#x2013;020","author":"bailey","year":"1995","journal-title":"Numerical Aerodynamic Simulation Facility NASA Ames Research Center"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.1995.500024"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1048935.1050176"},{"key":"ref5","article-title":"MPICH-V: Toward a scalable fault tolerant MPI for volatile nodes","author":"bosilca","year":"2002","journal-title":"Sc2002 High Performance Networking and Computing"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/HPCSA.2002.1019157"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/214451.214456"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/FTCS.1999.781058"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/HPDC.1999.805295"},{"journal-title":"IEEE Transactions on Computers","article-title":"Transparent rollback-recovery with low overheads, limited rollback and fast output","year":"1992","key":"ref9"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3959.3962"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/HPDC.1993.263838"}],"event":{"name":"Proceedings IEEE International Conference on Cluster Computing CLUSTR-03","start":{"date-parts":[[2003,12,4]]},"location":"Hong Kong, China","end":{"date-parts":[[2003,12,4]]}},"container-title":["Proceedings IEEE International Conference on Cluster Computing CLUSTR-03"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/8878\/28041\/01253321.pdf?arnumber=1253321","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,16]],"date-time":"2017-06-16T03:32:12Z","timestamp":1497583932000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/1253321\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/clustr.2003.1253321","relation":{},"subject":[],"published":{"date-parts":[[2003]]}}}