{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T22:10:06Z","timestamp":1747519806864,"version":"3.40.5"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2015,1,29]],"date-time":"2015-01-29T00:00:00Z","timestamp":1422489600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Theory Comput Syst"],"published-print":{"date-parts":[[2015,8]]},"DOI":"10.1007\/s00224-014-9599-8","type":"journal-article","created":{"date-parts":[[2015,1,28]],"date-time":"2015-01-28T02:09:44Z","timestamp":1422410984000},"page":"397-425","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["FNB: Fast Non-Blocking Coordinated Checkpointing Protocol for Distributed Systems"],"prefix":"10.1007","volume":"57","author":[{"given":"Zohra","family":"Abdelhafidi","sequence":"first","affiliation":[]},{"given":"Mohamed","family":"Djoudi","sequence":"additional","affiliation":[]},{"given":"Nasreddine","family":"Lagraa","sequence":"additional","affiliation":[]},{"given":"Mohamed Bachir","family":"Yagoubi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,1,29]]},"reference":[{"key":"9599_CR1","doi-asserted-by":"crossref","unstructured":"Abdelhafidi, Z., Djoudi, M., Yagoubi, M.B.: An improved schema of coordinated checkpointing protocol for distributed systems based on popular process. In: 12 t h International Conference Innovations in Information Technology (IIT), pp. 367\u2013372 (2012)","DOI":"10.1109\/INNOVATIONS.2012.6207769"},{"issue":"5","key":"9599_CR2","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1016\/j.peva.2007.09.001","volume":"65","author":"A Agbaria","year":"2008","unstructured":"Agbaria, A., Friedman, R.: Model-based performance evaluation of distributed checkpointing protocols. Perform Eval. 65(5), 345\u2013365 (2008)","journal-title":"Perform Eval."},{"issue":"1","key":"9599_CR3","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1006\/jpdc.1997.1346","volume":"44","author":"A Alexandrov","year":"1997","unstructured":"Alexandrov, A., Ionescu, M.F., Schauser, K.E., Scheiman, C.: LogGP: Incorporating long messages into the LogP model for parallel computation. J Parallel Distrib. Comput. 44(1), 71\u201379 (1997)","journal-title":"J Parallel Distrib. Comput."},{"key":"9599_CR4","unstructured":"Alvisi, L.: Understanding the message logging paradigm for masking process crashes. Ph.D. thesis, Cornell University (1998)"},{"key":"9599_CR5","doi-asserted-by":"crossref","unstructured":"Bhargava, B., Lian, S.R.: Independent checkpointing and concurrent rollbackfor recovery-an optimistic approach. In: 7th Symposium on Reliable Distributed Systems, pp. 3\u201312 (1988)","DOI":"10.1109\/RELDIS.1988.25775"},{"key":"9599_CR6","doi-asserted-by":"crossref","unstructured":"Borg, A., Baumbach, J., Glazer, S.: A message system supporting fault tolerance. In: Symposium on Operating Systems Principles (ACM SIGOPS), pp. 90\u201399 (1983)","DOI":"10.1145\/800217.806617"},{"key":"9599_CR7","doi-asserted-by":"crossref","unstructured":"Bosilca, G., Bouteiller, A., Cappello F., Djilali, S., Fedak, G., Germain, C., Herault, T., Lemarinier, P., Lodygensky, O., Magniette, F., Neri, V., Selikhov, A.: Mpich-v: Toward a scalable fault tolerant mpi for volatile nodes. In: ACM\/IEEE conf on Supercomputing, ser. Supercomputing \u201902. Los Alamitos, CA, USA: IEEE Computer Society Press (2002)","DOI":"10.1109\/SC.2002.10048"},{"key":"9599_CR8","doi-asserted-by":"crossref","unstructured":"Bouteiller, A., Cappello F., Herault, T., Krawezik, G., Lemarinier, P., Magniette, F.: Mpich-v2: a fault tolerant mpi for volatile nodes based on pessimistic sender based message logging. In: ACM\/IEEE conference on Supercomputing. New York, NY, USA (2003)","DOI":"10.1145\/1048935.1050176"},{"issue":"1","key":"9599_CR9","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1016\/j.future.2007.02.002","volume":"24","author":"D Buntinas","year":"2008","unstructured":"Buntinas, D., Coti, C., Herault, T., Lemarinier, P., Pilard, L., Rezmerita, A., Rodriguez, E., Cappello, F.: Blocking vs. non-blocking coordinated checkpointing for large-scale fault tolerant MPI Protocols. Futur. Gener. Comput. Syst. 24(1), 73\u201384 (2008)","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"2","key":"9599_CR10","doi-asserted-by":"crossref","first-page":"1127","DOI":"10.1016\/S0304-3975(02)00566-2","volume":"290","author":"G Cao","year":"2003","unstructured":"Cao, G., Singhal, M.: Checkpointing with mutable checkpoints. Theor. Comput. Sci 290(2), 1127\u20131148 (2003)","journal-title":"Theor. Comput. Sci"},{"issue":"1","key":"9599_CR11","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1145\/214451.214456","volume":"3","author":"KM Chandy","year":"1985","unstructured":"Chandy, K.M., Lamport, L.: Distributed snapshots: determining global states of distributed systems. ACM Trans. Comput. Syst. 3(1), 63\u201375 (1985)","journal-title":"ACM Trans. Comput. Syst."},{"key":"9599_CR12","doi-asserted-by":"crossref","unstructured":"Culler, D., Karp, R., Patterson, D., Sahay, A., Schauser, K.E., Santos, E., Subramonian, R., Von Eicken, T.: Logp: Towards a realistic model of parallel computation. In: Fourth ACM SIGPLAN Symp on Principles and Practice of Parallel Programming, pp 1\u201312. San Diego, California, USA (1993)","DOI":"10.1145\/155332.155333"},{"issue":"3","key":"9599_CR13","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1016\/j.future.2004.11.016","volume":"22","author":"JT Daly","year":"2006","unstructured":"Daly, J.T.: A higher order estimate of the optimum checkpoint interval for restart dumps. Futur. Gener. Comput. Syst. 22(3), 303\u2013312 (2006)","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"3","key":"9599_CR14","doi-asserted-by":"crossref","first-page":"375","DOI":"10.1145\/568522.568525","volume":"34","author":"ENM Elnozahy","year":"2002","unstructured":"Elnozahy, E.N.M., Alvisi, L., Wang, Y.M., Johnson, D.B.: A survey of rollback-recovery protocols in message-passing systems. ACM Comput. Surv. 34(3), 375\u2013408 (2002)","journal-title":"ACM Comput. Surv."},{"key":"9599_CR15","doi-asserted-by":"crossref","unstructured":"Elnozahy, E.N.M., Johnson, D.B., Zwarnepoel, W.: The performance of consistent checkpointing. In: 11 t h Symp Reliable Distributed Systems, pp. 39\u201347 (1992)","DOI":"10.1109\/RELDIS.1992.235144"},{"issue":"1","key":"9599_CR16","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1016\/j.future.2011.03.012","volume":"28","author":"E Feller","year":"2012","unstructured":"Feller, E., Mehnert-Spahn, J., Schoettner, M., Morin, C.: Independent checkpointing in a heterogeneous grid environment. Futur. Gener. Comput. Syst. 28(1), 163\u2013170 (2012)","journal-title":"Futur. Gener. Comput. Syst."},{"issue":"5","key":"9599_CR17","doi-asserted-by":"crossref","first-page":"620","DOI":"10.1109\/TPDS.2009.108","volume":"21","author":"R Garg","year":"2010","unstructured":"Garg, R., Garg, V.K., Sabharwal, Y.: Efficient algorithms for global snapshots in large distributed systems. IEEE Trans. Parallel Distrib. Syst. 21(5), 620\u2013630 (2010)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"9599_CR18","doi-asserted-by":"crossref","unstructured":"Goswami, D., Majumder, S.: A global snapshot collection algorithm with concurrent initiators with non-fifo channel. In: 11th International Conference ICA3PP, 2011, pp. 338\u2013348 (2011)","DOI":"10.1007\/978-3-642-24650-0_29"},{"issue":"1","key":"9599_CR19","doi-asserted-by":"crossref","first-page":"29","DOI":"10.1007\/s004460050003","volume":"13","author":"JM H\u00e9lary","year":"2000","unstructured":"H\u00e9lary, J.M., Mostefaoui, A., Netzer, R.H.B., Raynal, M.: Communication-based prevention of useless checkpoints in distributed computations. Distrib. Comput. 13(1), 29\u201343 (2000)","journal-title":"Distrib. Comput."},{"key":"9599_CR20","doi-asserted-by":"crossref","unstructured":"Ibtesham, D., Arnold, D., Ferreira, K.B., Bridges, P.G.: On the viability of checkpoint compression for extreme scale fault tolerance. In: Euro-Par 2011: Parallel Processing Workshops, pp. 302\u2013311. Springer (2012)","DOI":"10.1007\/978-3-642-29740-3_34"},{"issue":"12","key":"9599_CR21","doi-asserted-by":"crossref","first-page":"1575","DOI":"10.1016\/j.jpdc.2008.08.003","volume":"68","author":"Q Jiang","year":"2008","unstructured":"Jiang, Q., Luo, Y., Manivannan, D.: An optimistic checkpointing and message logging approach for consistent global checkpoint collection in distributed systems. J. Parallel Distrib. Comput. 68(12), 1575\u20131589 (2008)","journal-title":"J. Parallel Distrib. Comput."},{"key":"9599_CR22","doi-asserted-by":"crossref","unstructured":"Khunteta, A., Sharma, P., Garg, R.: New & efficient low overheads algorithm for mobile distributed systems. In: International Conference & Workshop on Emerging Trends in Technology - ICWET \u201911, pp 447\u2013450. ACM Press, New York, USA (2011)","DOI":"10.1145\/1980022.1980077"},{"issue":"1","key":"9599_CR23","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1109\/TSE.1987.232562","volume":"13","author":"R Koo","year":"1987","unstructured":"Koo, R., Toueg, S.: Checkpointing and rollback-recovery. IEEE Trans. Softw. Eng. 13(1), 23\u201331 (1987)","journal-title":"IEEE Trans. Softw. Eng."},{"issue":"9","key":"9599_CR24","doi-asserted-by":"crossref","first-page":"1281","DOI":"10.1109\/TPDS.2010.24","volume":"21","author":"A Kshemkalyani","year":"2010","unstructured":"Kshemkalyani, A.: Fast and message-efficient global snapshot algorithms for large-scale distributed systems. IEEE Trans. Distrib. Syst. 21(9), 1281\u20131289 (2010)","journal-title":"IEEE Trans. Distrib. Syst."},{"key":"9599_CR25","doi-asserted-by":"crossref","unstructured":"Kumar, P., Khunteta, A.: A minimum-process coordinated checkpointing protocol for mobile distributed system. IJCSI Internat. J. Comput. Sci. Issues 7(3) (2010)","DOI":"10.5120\/701-982"},{"issue":"7","key":"9599_CR26","doi-asserted-by":"crossref","first-page":"558","DOI":"10.1145\/359545.359563","volume":"21","author":"L Lamport","year":"1978","unstructured":"Lamport, L.: Time, clocks, and the ordering of events in a distributed system. Commun. ACM 21(7), 558\u2013565 (1978)","journal-title":"Commun. ACM"},{"key":"9599_CR27","doi-asserted-by":"crossref","unstructured":"Lemarinier, P., Bouteiller, A., Herault, T., Krawezik, G., Cappello, F.: Improved message logging versus improved coordinated checkpointing for fault tolerant mpi. In: IEEE International Conference on Cluster Computer, pp. 115\u2013124 (2004)","DOI":"10.1109\/CLUSTR.2004.1392609"},{"issue":"2","key":"9599_CR28","doi-asserted-by":"crossref","first-page":"146","DOI":"10.1504\/IJHPCN.2004.008899","volume":"2","author":"P Lemarinier","year":"2004","unstructured":"Lemarinier, P., Bouteiller, A., Krawezik, G., Cappello, F.: Coordinated checkpoint versus message log for fault tolerant mpi. Int. J. High Perfor. Comput. Netw. 2(2), 146\u2013155 (2004)","journal-title":"Int. J. High Perfor. Comput. Netw."},{"key":"9599_CR29","doi-asserted-by":"crossref","first-page":"527","DOI":"10.1093\/comjnl\/bxk004","volume":"49","author":"G Li","year":"2006","unstructured":"Li, G., Shu, L.: Design and evaluation of a low-latency checkpointing scheme for mobile computing systems. Comput. J. 49, 527\u2013540 (2006)","journal-title":"Comput. J."},{"issue":"2","key":"9599_CR30","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1016\/j.jpdc.2008.07.012","volume":"69","author":"Y Luo","year":"2009","unstructured":"Luo, Y., Manivannan, D.: Fine: A fully informed and efficient communication-induced checkpointing protocol for distributed systems. J. Parallel Distrib. Comput. 69(2), 153\u2013167 (2009)","journal-title":"J. Parallel Distrib. Comput."},{"issue":"7","key":"9599_CR31","doi-asserted-by":"crossref","first-page":"816","DOI":"10.1016\/j.jpdc.2007.02.006","volume":"67","author":"PS Mandal","year":"2007","unstructured":"Mandal, P.S., Mukhopadhyaya, K.: Self-stabilizing algorithm for checkpointing in a distributed system. J. Parallel Distrib. Comput. 67(7), 816\u2013829 (2007)","journal-title":"J. Parallel Distrib. Comput."},{"issue":"23","key":"9599_CR32","first-page":"215","volume":"1","author":"F Mattern","year":"1989","unstructured":"Mattern, F.: Virtual time and global states of distributed systems. Parallel Distrib. Algoritm. 1(23), 215\u2013226 (1989)","journal-title":"Parallel Distrib. Algoritm."},{"issue":"2","key":"9599_CR33","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1109\/71.342127","volume":"6","author":"RHB Netzer","year":"1995","unstructured":"Netzer, R.H.B., Xu, J.: Necessary and sufficient conditions for consistent global snapshots. IEEE Trans. Parallel Distrib. Syst. 6(2), 165\u2013169 (1995)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"9599_CR34","doi-asserted-by":"crossref","unstructured":"Ohara, M., Arai, M., Fukumoto, S., Iwasaki, K.: Finding a recovery line in uncoordinated checkpointing. In: 24th International Conference on Distributed Computing Systems Workshop, pp. 628\u2013633 (2004)","DOI":"10.1109\/ICDCSW.2004.1284098"},{"key":"9599_CR35","doi-asserted-by":"crossref","unstructured":"Prakash, R., Singhal, M.: Maximal global snapshot with concurrent initiators. In: 6th IEEE Symposium on Parallel and Distributed Processing, pp. 344\u2013351. IEEE Computer Society Press (1994)","DOI":"10.1109\/SPDP.1994.346149"},{"issue":"2","key":"9599_CR36","doi-asserted-by":"crossref","first-page":"220","DOI":"10.1109\/TSE.1975.6312842","volume":"1","author":"B Randell","year":"1975","unstructured":"Randell, B.: System structure for software fault-tolerance. IEEE Trans. Softw. Eng. 1(2), 220\u2013232 (1975)","journal-title":"IEEE Trans. Softw. Eng."},{"issue":"1","key":"9599_CR37","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1145\/1057977.1057980","volume":"37","author":"Y Saito","year":"2005","unstructured":"Saito, Y., Shapiro, M.: Optimistic replication. ACM Comput. Surv. 37(1), 42\u201381 (2005)","journal-title":"ACM Comput. Surv."},{"key":"9599_CR38","doi-asserted-by":"crossref","unstructured":"Sakata, T.C., Garcia, I.C.: Non-blocking synchronous checkpointing based on rollback-dependency trackability. In: 25th IEEE Symposium Reliable Distributed Systems, pp. 4\u201311 (2006)","DOI":"10.1109\/SRDS.2006.34"},{"issue":"4","key":"9599_CR39","doi-asserted-by":"crossref","first-page":"337","DOI":"10.1109\/TDSC.2009.4","volume":"7","author":"B Schroeder","year":"2010","unstructured":"Schroeder, B., Gibson, G.A.: A large-scale study of failures in high-performance computing systems. IEEE Trans. Dependable Secure Comput. 7(4), 337\u2013350 (2010)","journal-title":"IEEE Trans. Dependable Secure Comput."},{"issue":"5","key":"9599_CR40","doi-asserted-by":"crossref","first-page":"785","DOI":"10.1109\/90.541326","volume":"4","author":"AP Sistla","year":"1996","unstructured":"Sistla, A.P., Welch, J.L.: Efficient distributed recovery using message logging. IEEE\/ACM Trans. Netw. 4(5), 785\u2013795 (1996)","journal-title":"IEEE\/ACM Trans. Netw."},{"key":"9599_CR41","unstructured":"Spezialetti, M., Kearns, P.: Efficient distributed snapshots. In: 6th International Conference on Distributed Computing Systems, pp. 382\u2013388. Boston (1986)"},{"issue":"3","key":"9599_CR42","doi-asserted-by":"crossref","first-page":"204","DOI":"10.1145\/3959.3962","volume":"3","author":"RE Strom","year":"1985","unstructured":"Strom, R.E., Yemini, S.: Optimistic recovery in distributed systems. Trans. Comput. Systems 3(3), 204\u2013226 (1985)","journal-title":"Trans. Comput. Systems"},{"issue":"3","key":"9599_CR43","doi-asserted-by":"crossref","first-page":"493","DOI":"10.1109\/TPDS.2012.139","volume":"24","author":"J Tsai","year":"2013","unstructured":"Tsai, J.: Flexible symmetrical global-snapshot algorithms for large-scale distributed systems. IEEE Trans. Parallel and Distributed Systems 24(3), 493\u2013505 (2013)","journal-title":"IEEE Trans. Parallel and Distributed Systems"},{"key":"9599_CR44","unstructured":"Wang, Y.M.: Space reclamation for uncoordinated checkpointing in message-passing systems. Ph.D. thesis, University of Illinois, Department of Computer Science (1993)"},{"issue":"4","key":"9599_CR45","doi-asserted-by":"crossref","first-page":"456","DOI":"10.1109\/12.588059","volume":"46","author":"YM Wang","year":"1997","unstructured":"Wang, Y.M.: Consistent global checkpoints that contain a given set of local checkpoints. IEEE Trans on Computers 46(4), 456\u2013468 (1997)","journal-title":"IEEE Trans on Computers"},{"issue":"5","key":"9599_CR46","doi-asserted-by":"crossref","first-page":"383","DOI":"10.1080\/17445760802615688","volume":"24","author":"J Wu","year":"2009","unstructured":"Wu, J., Manivannan, D.: An enhanced model-based checkpointing protocol for preventing useless checkpoints. J Parallel Emergent and Distributed Systems 24(5), 383\u2013406 (2009)","journal-title":"J Parallel Emergent and Distributed Systems"}],"container-title":["Theory of Computing Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00224-014-9599-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00224-014-9599-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00224-014-9599-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,17]],"date-time":"2025-05-17T21:43:35Z","timestamp":1747518215000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00224-014-9599-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,1,29]]},"references-count":46,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2015,8]]}},"alternative-id":["9599"],"URL":"https:\/\/doi.org\/10.1007\/s00224-014-9599-8","relation":{},"ISSN":["1432-4350","1433-0490"],"issn-type":[{"type":"print","value":"1432-4350"},{"type":"electronic","value":"1433-0490"}],"subject":[],"published":{"date-parts":[[2015,1,29]]}}}