{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T10:25:00Z","timestamp":1749896700801,"version":"3.37.3"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"2-3","license":[{"start":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T00:00:00Z","timestamp":1670803200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T00:00:00Z","timestamp":1670803200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-1562659","CCF-1562306","CCF-1617690","CCF-1822191","CCF-1821431"],"award-info":[{"award-number":["CCF-1562659","CCF-1562306","CCF-1617690","CCF-1822191","CCF-1821431"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s10766-022-00749-y","type":"journal-article","created":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T11:02:59Z","timestamp":1670842979000},"page":"128-149","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Fault-Model-Relevant Classification of Consensus Mechanisms for MPI and HPC"],"prefix":"10.1007","volume":"51","author":[{"given":"Grace","family":"Nansamba","sequence":"first","affiliation":[]},{"given":"Amani","family":"Altarawneh","sequence":"additional","affiliation":[]},{"given":"Anthony","family":"Skjellum","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,12,12]]},"reference":[{"key":"749_CR1","doi-asserted-by":"publisher","unstructured":"Fromentin, E., Raynal, M., Tronel, F.: On classes of problems in asyn- chronous distributed systems with process crashes. In: Proceedings. 19th IEEE International Conference on Distributed Computing Systems (Cat. No.99CB37003), pp. 470\u2013477 (1999). https:\/\/doi.org\/10.1109\/ICDCS.1999.776549","DOI":"10.1109\/ICDCS.1999.776549"},{"key":"749_CR2","doi-asserted-by":"publisher","unstructured":"Hassani, A., Skjellum, A., Brightwell, R.: Design and evaluation of FA-MPI, a transactional resilience scheme for non-blocking MPI. In: 2014 44th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks, pp. 750\u2013755, (2014). https:\/\/doi.org\/10.1109\/DSN.2014.78","DOI":"10.1109\/DSN.2014.78"},{"key":"749_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.parco.2019.02.007","volume":"84","author":"N Sultana","year":"2019","unstructured":"Sultana, N., R\u00fcfenacht, M., Skjellum, A., Laguna, I., Mohror, K.: Failure recovery for bulk synchronous applications with MPI stages. Parallel Comput. 84, 1\u201314 (2019). https:\/\/doi.org\/10.1016\/j.parco.2019.02.007","journal-title":"Parallel Comput."},{"key":"749_CR4","unstructured":"Amin, H.: Toward a scalable, transactional, fault-tolerant message passing interface for petascale and exascale machines. PhD dissertation, The University of Alabama at Birmingham (2014)"},{"key":"749_CR5","doi-asserted-by":"publisher","unstructured":"Altarawneh, A., Herschberg, T., Medury, S., Kandah, F., Skjellum, A.: Buterin\u2019s scalability trilemma viewed through a state-change-based classification for common consensus algorithms. In: 2020 10th Annual Computing and Communication Workshop and Conference (CCWC), pp. 0727\u20130736 (2020). https:\/\/doi.org\/10.1109\/CCWC47524.2020.9031204","DOI":"10.1109\/CCWC47524.2020.9031204"},{"issue":"1","key":"749_CR6","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1145\/2455.214112","volume":"32","author":"D Dolev","year":"1985","unstructured":"Dolev, D., Reischuk, R.: Bounds on information exchange for byzantine agreement. J. ACM (JACM) 32(1), 191\u2013204 (1985)","journal-title":"J. ACM (JACM)"},{"key":"749_CR7","doi-asserted-by":"publisher","unstructured":"Gim\u00e9nez, A., Gamblin, T., Bhatele, A., Wood, C., Shoga, K., Marathe, A., Bremer, P.-T., Hamann, B., Schulz, M.: Scrubjay: Deriving knowledge from the disarray of hpc performance data. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. Association for Computing Machinery, SC \u201917, New York, (2017). https:\/\/doi.org\/10.1145\/3126908.3126935","DOI":"10.1145\/3126908.3126935"},{"key":"749_CR8","unstructured":"Guo, H., Di, S., Gupta, R., Peterka, T., Cappello, F.: La VALSE: scalable log visualization for fault characterization in supercomputers. In: Childs, H., Cucchietti, F. (eds.) Eurographics Symposium on Parallel Graphics and Visualization. The Eurographics Association (2018)"},{"key":"749_CR9","doi-asserted-by":"publisher","unstructured":"Martino, C. D., Jha, S., Kramer, W., Kalbarczyk, Z., Iyer, R. K.: Logdiver: A tool for measuring resilience of extreme-scale systems and applications. In: Proceedings of the 5th Workshop on Fault Tolerance for HPC at EXtreme Scale, pp. 11\u201318. Association for Computing Machinery, FTXS \u201915, New York, (2015). https:\/\/doi.org\/10.1145\/2751504.2751511","DOI":"10.1145\/2751504.2751511"},{"key":"749_CR10","doi-asserted-by":"publisher","unstructured":"Buntinas, D.: Scalable distributed consensus to support mpi fault tolerance. In : 2012 IEEE 26th International Parallel and Distributed Processing Symposium, pp. 1240\u20131249 (2012). https:\/\/doi.org\/10.1109\/IPDPS.2012.113","DOI":"10.1109\/IPDPS.2012.113"},{"key":"749_CR11","doi-asserted-by":"publisher","first-page":"24","DOI":"10.15863\/TAS.2017.04.48.5","volume":"48","author":"W Nowakowski","year":"2017","unstructured":"Nowakowski, W.: Network management software for redundant ethernet ring. Theor. Appl. Sci. 48, 24\u201329 (2017)","journal-title":"Theor. Appl. Sci."},{"key":"749_CR12","unstructured":"Libby, R.: Effective HPC hardware management and failure prediction strategy using IPMI. In: Proceedings of the Linux Symposium. Citeseer, (2003)"},{"key":"749_CR13","unstructured":"Baudet, M., Ching, A., Chursin, A., Danezis, G., Garillot, F., Li, Z., Malkhi, D., Naor, O., Perelman, D., Sonnino, A.: State machine replication in the libra blockchain (2019)"},{"key":"749_CR14","doi-asserted-by":"publisher","unstructured":"Driscoll, K., Hall, B., Paulitsch, M., Zumsteg, P., Sivencrona, H.: The real byzantine generals. In: The 23rd Digital Avionics Systems Conference (IEEE Cat. No.04CH37576), vol. 2, pp. 6.D.4\u201361 (2004). https:\/\/doi.org\/10.1109\/DASC.2004.1390734","DOI":"10.1109\/DASC.2004.1390734"},{"key":"749_CR15","unstructured":"Forum, M.P.I.: MPI: A Message-passing Interface Standard, Version 3.1. (2015). High-Performance Computing Center Stuttgart, University of Stuttgart, (2015). URL https:\/\/books.google.com\/books?id=Fbv7jwEACAAJ"},{"issue":"3","key":"749_CR16","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/BF01798957","volume":"4","author":"A Bar-Noy","year":"1991","unstructured":"Bar-Noy, A., Dolev, D.: Consensus algorithms with one-bit messages. Distrib. Comput. 4(3), 105\u2013110 (1991)","journal-title":"Distrib. Comput."},{"key":"749_CR17","unstructured":"Castro, M., Liskov, B.: Practical byzantine fault tolerance. In: Proceedings of the Third USENIX Symposium on Operating Systems Design and Implementation (OSDI), New Orleans, Louisiana, USA, pp. 173\u2013186, (1999). URL https:\/\/dl.acm.org\/citation.cfm?id=296824"},{"key":"749_CR18","doi-asserted-by":"crossref","unstructured":"El-Sayed, N., Schroeder, B.: Reading between the lines of failure logs: Understanding how hpc systems fail. In: 2013 43rd Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN), pp. 1\u201312. IEEE, (2013)","DOI":"10.1109\/DSN.2013.6575356"},{"key":"749_CR19","unstructured":"King, S., Nadal, S.: Ppcoin: Peer-to-peer crypto-currency with proof-of-stake. self-published paper, (2012)"},{"key":"749_CR20","doi-asserted-by":"publisher","DOI":"10.3390\/sym11101198","author":"L Ismail","year":"2019","unstructured":"Ismail, L., Materwala, H.: A review of blockchain architecture and consensus protocols: use cases, challenges, and solutions. Symmetry (2019). https:\/\/doi.org\/10.3390\/sym11101198","journal-title":"Symmetry"},{"key":"749_CR21","unstructured":"Ongaro, D., Ousterhout, J.: In search of an understandable consensus algorithm. In: Proceedings of the 2014 USENIX Conference on USENIX Annual Technical Conference. USENIX Association, USENIX ATC\u201914, pp. 305-320, USA (2014)"},{"key":"749_CR22","doi-asserted-by":"publisher","unstructured":"Ferreira, K., Stearley, J., Laros, J. H., Oldfield, R., Pedretti, K., Brightwell, R., Riesen, R., Bridges, P. G., Arnold, D.: Evaluating the viability of process replication reliability for exascale systems. In: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis. Association for Computing Machinery, SC \u201911, New York (2011a). https:\/\/doi.org\/10.1145\/2063384.2063443","DOI":"10.1145\/2063384.2063443"},{"issue":"12","key":"749_CR23","doi-asserted-by":"publisher","first-page":"3118","DOI":"10.1109\/TSP.2016.2537271","volume":"64","author":"T-H Chang","year":"2016","unstructured":"Chang, T.-H., Hong, M., Liao, W.-C., Wang, X.: Asynchronous distributed admm for large-scale optimization-part i: algorithm and convergence analysis. IEEE Trans. Signal Process. 64(12), 3118\u20133130 (2016). https:\/\/doi.org\/10.1109\/TSP.2016.2537271","journal-title":"IEEE Trans. Signal Process."},{"key":"749_CR24","doi-asserted-by":"crossref","unstructured":"Yin, M., Malkhi, D., Reiter, M. K., Gueta, G. G., Abraham, I.: Hotstuff: Bft consensus with linearity and responsiveness. In: Proceedings of the 2019 ACM Symposium on Principles of Distributed Computing, pp. 347\u2013356. ACM (2019)","DOI":"10.1145\/3293611.3331591"},{"issue":"3","key":"749_CR25","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1177\/1094342013488238","volume":"27","author":"W Bland","year":"2013","unstructured":"Bland, W., Bouteiller, A., Herault, T., Bosilca, G., Dongarra, J.: Post-failure recovery of MPI communication capability: design and rationale. Int. J. High Perform. Comput. Appl. 27(3), 244\u2013254 (2013). https:\/\/doi.org\/10.1177\/1094342013488238","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"749_CR26","doi-asserted-by":"publisher","unstructured":"Katti, A., Di Fatta, G., Naughton, T., Engelmann, C.: Scalable and fault tolerant failure detection and consensus. In: Proceedings of the 22nd European MPI Users\u2019 Group Meeting. Association for Computing Machinery, EuroMPI \u201915, New York, (2015) https:\/\/doi.org\/10.1145\/2802658.2802660","DOI":"10.1145\/2802658.2802660"},{"key":"749_CR27","unstructured":"Popov, S.: The tangle. White Paper 1(3) (2018)"},{"key":"749_CR28","doi-asserted-by":"publisher","unstructured":"Altarawneh, A., Skjellum, A.: The security ingredients for correct and byzantine fault-tolerant blockchain consensus algorithms. In: 2020 International Symposium on Networks, Computers and Communications (ISNCC), pp. 1\u20139, (2020). https:\/\/doi.org\/10.1109\/ISNCC49221.2020.9297326","DOI":"10.1109\/ISNCC49221.2020.9297326"},{"key":"749_CR29","unstructured":"Al-Mamun, A., Li, T., Sadoghi, M., Jiang, L., Shen, H.-T., Zhao, D.: Hpchain: an mpi-based blockchain framework for data fidelity in high-performance computing systems (2019)"},{"key":"749_CR30","unstructured":"Cachin, C., Vukoli\u0107, M.: Blockchain consensus protocols in the wild. arXiv preprint arXiv:1707.01873, (2017)"},{"key":"749_CR31","unstructured":"De Angelis, S.: Assessing security and performances of consensus algorithms for permissioned blockchains. arXiv preprint arXiv:1805.03490, (2018)"},{"key":"749_CR32","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/3-540-48071-4_10","volume-title":"Advances in Cryptology \u2013 CRYPTO\u2019 92","author":"C Dwork","year":"1993","unstructured":"Dwork, C., Naor, M.: Pricing via processing or combatting junk mail. In: Brickell, E.F. (ed.) Advances in Cryptology \u2013 CRYPTO\u2019 92, pp. 139\u2013147. Springer, Berlin Heidelberg (1993)"},{"issue":"3","key":"749_CR33","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1145\/2402.322398","volume":"30","author":"L Lamport","year":"1983","unstructured":"Lamport, L.: The weak byzantine generals problem. J. ACM 30(3), 668\u2013676 (1983). https:\/\/doi.org\/10.1145\/2402.322398","journal-title":"J. ACM"},{"key":"749_CR34","doi-asserted-by":"publisher","unstructured":"Bosilca, G., Bouteiller, A., Herault, T., Le F\u00e8vre, V., Robert, Y., Dongarra, J.: Revisiting credit distribution algorithms for distributed termination detection. In: 2021 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 611\u2013620 (2021). https:\/\/doi.org\/10.1109\/IPDPSW52791.2021.00095","DOI":"10.1109\/IPDPSW52791.2021.00095"},{"key":"749_CR35","doi-asserted-by":"crossref","unstructured":"Moise, I.: Efficient agreement protocols in asynchronous distributed systems. In: 2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and Phd Forum, pp. 2022\u20132025. IEEE, (2011)","DOI":"10.1109\/IPDPS.2011.367"},{"key":"749_CR36","doi-asserted-by":"publisher","unstructured":"Ropars, T., Lefray, A., Kim, D., Schiper, A.: Efficient process replication for MPI applications: Sharing work between replicas. In: 2015 IEEE International Parallel and Distributed Processing Symposium, pp. 645\u2013654, (2015). https:\/\/doi.org\/10.1109\/IPDPS.2015.29","DOI":"10.1109\/IPDPS.2015.29"},{"key":"749_CR37","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/3-540-12689-9_99","volume-title":"Foundations of Computation Theory","author":"MJ Fischer","year":"1983","unstructured":"Fischer, M.J.: The consensus problem in unreliable distributed systems (a brief survey). In: Karpinski, M. (ed.) Foundations of Computation Theory, pp. 127\u2013140. Springer, Berlin Heidelberg (1983)"},{"issue":"4","key":"749_CR38","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1177\/1094342005056139","volume":"19","author":"S Sankaran","year":"2005","unstructured":"Sankaran, S., Squyres, J.M., Barrett, B., Sahay, V., Lumsdaine, A., Duell, J., Hargrove, P., Roman, E.: The lam\/mpi checkpoint\/restart framework: system-initiated checkpointing. Int. J. High Perform. Comput. Appl. 19(4), 479\u2013493 (2005). https:\/\/doi.org\/10.1177\/1094342005056139","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"749_CR39","doi-asserted-by":"publisher","unstructured":"Ferreira, K., Stearley, J., Laros, J. H., Oldfield, R., Pedretti, K., Brightwell, R., Riesen, R., Bridges, P. G., Arnold, D.: Evaluating the viability of process replication reliability for exascale systems. In: SC \u201911: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312. (2011b). https:\/\/doi.org\/10.1145\/2063384.2063443","DOI":"10.1145\/2063384.2063443"},{"key":"749_CR40","unstructured":"Woo, S., Lang, S., Latham, R., Ross, R., Thakur, R.: Reliable MPI-IO through layout-aware replication (2011)"},{"issue":"2","key":"749_CR41","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1145\/279227.279229","volume":"16","author":"L Lamport","year":"1998","unstructured":"Lamport, L.: The part-time parliament. ACM Trans. Comput. Syst. 16(2), 133\u2013169 (1998). https:\/\/doi.org\/10.1145\/279227.279229","journal-title":"ACM Trans. Comput. Syst."},{"key":"749_CR42","doi-asserted-by":"publisher","unstructured":"Borowsky, E., Gafni, E.: Generalized flp impossibility result for<i>t<\/i>-resilient asynchronous computations. In: Proceedings of the Twenty-Fifth Annual ACM Symposium on Theory of Computing. STOC \u201993, pp. 91\u2013100. Association for Computing Machinery, New York. ISBN 0897915917. (1993). https:\/\/doi.org\/10.1145\/167088.167119","DOI":"10.1145\/167088.167119"},{"key":"749_CR43","first-page":"19","volume":"72","author":"T Brokaw","year":"2000","unstructured":"Brokaw, T., Koziuk, G.: The intelligent platform management interface (IPMI) and enclosure management. Electron. Eng. (Lond.) 72, 19 (2000)","journal-title":"Electron. Eng. (Lond.)"},{"key":"749_CR44","doi-asserted-by":"publisher","unstructured":"Costa, C. H. A., Park, Y., Rosenburg, B. S., Cher, C.-Y., Ryu, K. D.: A system software approach to proactive memory-error avoidance. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC \u201914, pp. 707\u2013718. IEEE Press, (2014). https:\/\/doi.org\/10.1109\/SC.2014.63","DOI":"10.1109\/SC.2014.63"},{"key":"749_CR45","doi-asserted-by":"publisher","unstructured":"Di, S., Gupta, R., Snir, M., Pershey, E., Cappello, F.: Logaider: A tool for mining potential correlations of hpc log events. In: 2017 17th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID), pp. 442\u2013451 (2017). https:\/\/doi.org\/10.1109\/CCGRID.2017.18","DOI":"10.1109\/CCGRID.2017.18"},{"key":"749_CR46","doi-asserted-by":"publisher","unstructured":"Leners, J.B., Wu, H., Hung, W.-L., Aguilera, M.K, Walfish, M.: Detecting failures in distributed systems with the falcon spy network, In: Proceedings of the Twenty-Third ACM Symposium on Operating Systems Principles, pp. 279\u2013294. New York, NY, Association for Computing Machinery (2011). https:\/\/doi.org\/10.1145\/2043556.2043583","DOI":"10.1145\/2043556.2043583"},{"issue":"4","key":"749_CR47","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1016\/j.jpdc.2009.01.001","volume":"69","author":"Y Moses","year":"2009","unstructured":"Moses, Y., Raynal, M.: Revisiting simultaneous consensus with crash failures. J. Parallel Distrib. Comput. 69(4), 400\u2013409 (2009). https:\/\/doi.org\/10.1016\/j.jpdc.2009.01.001","journal-title":"J. Parallel Distrib. Comput."},{"key":"749_CR48","unstructured":"Bano, S., Sonnino, A., Al-Bassam, M., Azouvi, S., McCorry, P., Meiklejohn, S., Danezis, G.: Sok: Consensus in the age of blockchains. In: Proceedings of the 1st ACM Conference on Advances in Financial Technologies, pp. 183\u2013198 (2019)"},{"key":"749_CR49","doi-asserted-by":"crossref","unstructured":"Aguilera, M. K., Toueg, S.: Randomization and failure detection: a hybrid approach to solve consensus. Technical report (1996)","DOI":"10.1007\/3-540-61769-8_3"},{"key":"749_CR50","doi-asserted-by":"crossref","unstructured":"Al-Mamun, A., Zhao, D.: BAASH: enabling blockchain-as-a-service on high-performance computing systems. CoRR Preprint at arxiv: 2001.07022 (2020)","DOI":"10.1145\/3458817.3476155"},{"issue":"3","key":"749_CR51","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1145\/357172.357176","volume":"4","author":"L Lamport","year":"1982","unstructured":"Lamport, L., Shostak, R.E., Pease, M.C.: The byzantine generals problem. ACM Trans. Program. Lang. Syst. 4(3), 382\u2013401 (1982)","journal-title":"ACM Trans. Program. Lang. Syst."},{"key":"749_CR52","unstructured":"Duan, S.: Building reliable and practical byzantine fault tolerance. PhD dissertation, University of California Davis (2016)"},{"key":"749_CR53","doi-asserted-by":"publisher","unstructured":"Fan, X., Chai, Q.: Roll-dpos: A randomized delegated proof of stake scheme for scalable blockchain-based internet of things systems. In: Proceedings of the 15th EAI International Conference on Mobile and Ubiquitous Systems: Computing, Networking and Services. MobiQuitous \u201918, pp. 482\u2013484. New York (2018). https:\/\/doi.org\/10.1145\/3286978.3287023","DOI":"10.1145\/3286978.3287023"},{"issue":"2","key":"749_CR54","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1177\/1094342014522573","volume":"28","author":"M Snir","year":"2014","unstructured":"...Snir, M., Wisniewski, R.W., Abraham, J.A., Adve, S.V., Bagchi, S., Balaji, P., Belak, J., Bose, P., Cappello, F., Carlson, B., Chien, A.A., Coteus, P., Debardeleben, N.A., Diniz, P.C., Engelmann, C., Erez, M., Fazzari, S., Geist, A., Gupta, R., Johnson, F., Krishnamoorthy, S., Leyffer, S., Liberty, D., Mitra, S., Munson, T., Schreiber, R., Stearley, J., Hensbergen, E.V.: Addressing failures in exascale computing. Int. J. High Perform. Comput. Appl. 28(2), 129\u2013173 (2014)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"749_CR55","doi-asserted-by":"crossref","unstructured":"Darius, B.: Scalable distributed consensus to support mpi fault tolerance. In: 2012 IEEE 26th International Parallel and Distributed Processing Symposium, pp. 1240\u20131249. IEEE, (2012)","DOI":"10.1109\/IPDPS.2012.113"},{"issue":"4","key":"749_CR56","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1109\/TDSC.2009.4","volume":"7","author":"B Schroeder","year":"2009","unstructured":"Schroeder, B., Gibson, G.A.: A large-scale study of failures in high-performance computing systems. IEEE Trans. Depend. Secur. Comput. 7(4), 337\u2013350 (2009)","journal-title":"IEEE Trans. Depend. Secur. Comput."},{"issue":"4","key":"749_CR57","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1145\/2954679.2872374","volume":"51","author":"T Leesatapornwongsa","year":"2016","unstructured":"Leesatapornwongsa, T., Lukman, J.F., Lu, S., Gunawi, H.S.: TaxDC: a taxonomy of non-deterministic concurrency bugs in datacenter distributed systems. SIGPLAN Not. 51(4), 517\u2013530 (2016). https:\/\/doi.org\/10.1145\/2954679.2872374","journal-title":"SIGPLAN Not."},{"key":"749_CR58","doi-asserted-by":"publisher","unstructured":"Omwenga, M., Otim, J., Lumala, A.: Robust mobile cloud services through offline support, pp. 90\u201393 (2012). https:\/\/doi.org\/10.1109\/ACSEAC.2012.27","DOI":"10.1109\/ACSEAC.2012.27"},{"issue":"4","key":"749_CR59","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1145\/347057.347561","volume":"30","author":"J Stone","year":"2000","unstructured":"Stone, J., Partridge, C.: When the CRC and TCP checksum disagree. SIGCOMM Comput. Commun. Rev. 30(4), 309\u2013319 (2000). https:\/\/doi.org\/10.1145\/347057.347561","journal-title":"SIGCOMM Comput. Commun. Rev."},{"key":"749_CR60","doi-asserted-by":"publisher","unstructured":"Huang, S.-T.: Detecting termination of distributed computations by external agents. In: [1989] Proceedings. The 9th International Conference on Distributed Computing Systems, pp. 79\u201384, (1989). https:\/\/doi.org\/10.1109\/ICDCS.1989.37933","DOI":"10.1109\/ICDCS.1989.37933"},{"issue":"1","key":"749_CR61","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/2200000016","volume":"3","author":"S Boyd","year":"2011","unstructured":"Boyd, S., Parikh, N., Chu, E., Peleato, B., Eckstein, J.: Distributed optimization and statistical learning via the alternating direction method of multipliers. Found. Trends Mach. Learn. 3(1), 1\u2013122 (2011). https:\/\/doi.org\/10.1561\/2200000016","journal-title":"Found. Trends Mach. Learn."},{"key":"749_CR62","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1016\/j.future.2020.01.026","volume":"106","author":"N Losada","year":"2020","unstructured":"Losada, N., Gonz\u00e1lez, P., Mart\u00edn, M.J., Bosilca, G., Bouteiller, A., Teranishi, K.: Fault tolerance of MPI applications in exascale systems: the ULFM solution. Future Gener. Comput. Syst. 106, 467\u2013481 (2020). https:\/\/doi.org\/10.1016\/j.future.2020.01.026","journal-title":"Future Gener. Comput. Syst."},{"key":"749_CR63","doi-asserted-by":"publisher","unstructured":"Hassani, A., Skjellum, A., Bangalore, P. V., Brightwell, R.: Practical resilient cases for fa-mpi, a transactional fault-tolerant mpi. In: Proceedings of the 3rd Workshop on Exascale MPI. Association for Computing Machinery, ExaMPI \u201915, New York (2015). https:\/\/doi.org\/10.1145\/2831129.2831130","DOI":"10.1145\/2831129.2831130"},{"key":"749_CR64","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/978-3-642-24449-0_29","volume-title":"Recent Advances in the Message Passing Interface","author":"J Hursey","year":"2011","unstructured":"Hursey, J., Naughton, T., Vallee, G., Graham, R.L.: A log-scaling fault tolerant agreement algorithm for a fault tolerant MPI. In: Cotronis, Y., Danalis, A., Nikolopoulos, D.S., Dongarra, J. (eds.) Recent Advances in the Message Passing Interface, pp. 255\u2013263. Springer, Berlin Heidelberg (2011)"},{"issue":"2","key":"749_CR65","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1145\/42282.42283","volume":"35","author":"C Dwork","year":"1988","unstructured":"Dwork, C., Lynch, N., Stockmeyer, L.: Consensus in the presence of partial synchrony. J. ACM 35(2), 288\u2013323 (1988). https:\/\/doi.org\/10.1145\/42282.42283","journal-title":"J. ACM"},{"key":"749_CR66","doi-asserted-by":"publisher","first-page":"912","DOI":"10.1007\/978-3-319-89884-1_32","volume-title":"Programming Languages and Systems","author":"\u00c1 Garc\u00eda-P\u00e9rez","year":"2018","unstructured":"Garc\u00eda-P\u00e9rez, \u00c1., Gotsman, A., Meshman, Y., Sergey, I.: Paxos consensus, deconstructed and abstracted. In: Ahmed, A. (ed.) Programming Languages and Systems, pp. 912\u2013939. Springer International Publishing, Cham (2018)"},{"issue":"4","key":"749_CR67","doi-asserted-by":"publisher","first-page":"398","DOI":"10.1145\/571637.571640","volume":"20","author":"BL Miguel Castro","year":"2002","unstructured":"Miguel Castro, B.L.: Practical byzantine fault tolerance and proactive recovery. ACM Trans. Comput. Syst. 20(4), 398\u2013461 (2002). https:\/\/doi.org\/10.1145\/571637.571640","journal-title":"ACM Trans. Comput. Syst."}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-022-00749-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10766-022-00749-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-022-00749-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,10]],"date-time":"2024-10-10T06:39:24Z","timestamp":1728542364000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10766-022-00749-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,12]]},"references-count":67,"journal-issue":{"issue":"2-3","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["749"],"URL":"https:\/\/doi.org\/10.1007\/s10766-022-00749-y","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"type":"print","value":"0885-7458"},{"type":"electronic","value":"1573-7640"}],"subject":[],"published":{"date-parts":[[2022,12,12]]},"assertion":[{"value":"11 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 November 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 December 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declaration"}},{"value":"The authors confirm that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}