{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T13:29:02Z","timestamp":1770989342727,"version":"3.50.1"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T00:00:00Z","timestamp":1605484800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T00:00:00Z","timestamp":1605484800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Wireless Pers Commun"],"published-print":{"date-parts":[[2021,4]]},"DOI":"10.1007\/s11277-020-07949-0","type":"journal-article","created":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T17:04:54Z","timestamp":1605546294000},"page":"1853-1877","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Checkpointing Algorithms for Fault-Tolerant Execution of Large-Scale Distributed Applications in Cloud"],"prefix":"10.1007","volume":"117","author":[{"given":"Priti","family":"Kumari","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0783-9426","authenticated-orcid":false,"given":"Parmeet","family":"Kaur","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,16]]},"reference":[{"issue":"3","key":"7949_CR1","doi-asserted-by":"publisher","first-page":"1971","DOI":"10.1007\/s11277-015-2498-8","volume":"83","author":"PK Jaggi","year":"2015","unstructured":"Jaggi, P. K., & Singh, A. K. (2015). Movement-based checkpointing and message logging for recovery in MANETs. Wireless Personal Communications, 83(3), 1971\u20131993.","journal-title":"Wireless Personal Communications"},{"key":"7949_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.jksuci.2018.09.021","author":"P Kumari","year":"2018","unstructured":"Kumari, P., & Kaur, P. (2018). A survey of fault tolerance in cloud computing. Journal of King Saud University-Computer and Information Sciences. https:\/\/doi.org\/10.1016\/j.jksuci.2018.09.021.","journal-title":"Journal of King Saud University-Computer and Information Sciences"},{"issue":"7","key":"7949_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/CC.2017.8010962","volume":"14","author":"A Zhou","year":"2017","unstructured":"Zhou, A., Sun, Q., & Li, J. (2017). Enhancing reliability via checkpointing in cloud computing systems. China Communications, 14(7), 1\u201310.","journal-title":"China Communications"},{"issue":"6","key":"7949_CR4","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1016\/j.future.2008.12.001","volume":"25","author":"R Buyya","year":"2009","unstructured":"Buyya, R., Yeo, C. S., Venugopal, S., Broberg, J., & Brandic, I. (2009). Cloud computing and emerging IT platforms: Vision, hype, and reality for delivering computing as the 5th utility. Future Generation Computer Systems, 25(6), 599\u2013616.","journal-title":"Future Generation Computer Systems"},{"key":"7949_CR5","unstructured":"https:\/\/www.crn.com\/slide-shows\/cloud\/the-10-biggest-cloud-outages-of-2018, Available online 2019."},{"issue":"4","key":"7949_CR6","doi-asserted-by":"publisher","first-page":"356","DOI":"10.7763\/IJFCC.2012.V1.95","volume":"1","author":"S Kumar","year":"2012","unstructured":"Kumar, S., & Goudar, R. H. (2012). Cloud computing-research issues, challenges, architecture, platforms and applications: A survey. International Journal of Future Computer and Communication, 1(4), 356.","journal-title":"International Journal of Future Computer and Communication"},{"issue":"12","key":"7949_CR7","first-page":"573","volume":"3","author":"S Patel","year":"2013","unstructured":"Patel, S., & Singh, A. S. (2013). Fault tolerance mechanisms and its implementation in cloud computing\u2013a review. International Journal of Advanced Research in Computer Science and Software Engineering, 3(12), 573\u2013576.","journal-title":"International Journal of Advanced Research in Computer Science and Software Engineering"},{"issue":"2","key":"7949_CR8","first-page":"491","volume":"28","author":"J Zhao","year":"2016","unstructured":"Zhao, J., Xiang, Y., Lan, T., Huang, H. H., & Subramaniam, S. (2016). Elastic reliability optimization through peer-to-peer checkpointing in cloud computing. IEEE Transactions on Parallel and Distributed Systems, 28(2), 491\u2013502.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"6","key":"7949_CR9","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1016\/j.jpdc.2011.01.002","volume":"71","author":"FA da Silva","year":"2011","unstructured":"da Silva, F. A., & Senger, H. (2011). Scalability limits of Bag-of-Tasks applications running on hierarchical platforms. Journal of Parallel and Distributed Computing, 71(6), 788\u2013801.","journal-title":"Journal of Parallel and Distributed Computing"},{"key":"7949_CR10","unstructured":"Sukhoroslov, O. (2018). Supporting efficient execution of many-task applications with Everest. In\u00a0Proceedings of the VIII international conference \u201cdistributed computing and grid-technologies in science and education\u201d(GRID 2018)\u00a0(pp. 266\u2013270)."},{"issue":"1","key":"7949_CR11","first-page":"01","volume":"4","author":"LP Saikia","year":"2014","unstructured":"Saikia, L. P., & Devi, Y. L. (2014). Fault tolerance techniques and algorithms in cloud computing. International Journal of Computer Science & Communication Networks, 4(1), 01\u201308.","journal-title":"International Journal of Computer Science & Communication Networks"},{"key":"7949_CR12","doi-asserted-by":"crossref","unstructured":"Goiri, \u00cd., Julia, F., Guitart, J., & Torres, J. (2010). Checkpoint-based fault-tolerant infrastructure for virtualized service providers. In\u00a02010 IEEE network operations and management symposium-NOMS 2010\u00a0(pp. 455\u2013462). IEEE.","DOI":"10.1109\/NOMS.2010.5488493"},{"issue":"2","key":"7949_CR13","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1109\/TDSC.2016.2548463","volume":"15","author":"N El-Sayed","year":"2016","unstructured":"El-Sayed, N., & Schroeder, B. (2016). Understanding practical tradeoffs in HPC checkpoint-scheduling policies. IEEE Transactions on Dependable and Secure Computing, 15(2), 336\u2013350.","journal-title":"IEEE Transactions on Dependable and Secure Computing"},{"key":"7949_CR14","doi-asserted-by":"publisher","first-page":"18616","DOI":"10.1109\/ACCESS.2018.2810214","volume":"6","author":"H Han","year":"2018","unstructured":"Han, H., Bao, W., Zhu, X., Feng, X., & Zhou, W. (2018). Fault-tolerant scheduling for hybrid real-time tasks based on CPB model in cloud. IEEE Access, 6, 18616\u201318629.","journal-title":"IEEE Access"},{"issue":"8","key":"7949_CR15","first-page":"1105","volume":"67","author":"L Han","year":"2018","unstructured":"Han, L., Canon, L. C., Casanova, H., Robert, Y., & Vivien, F. (2018). Checkpointing workflows for fail-stop errors. IEEE Transactions on Computers, 67(8), 1105\u20131120.","journal-title":"IEEE Transactions on Computers"},{"issue":"5","key":"7949_CR16","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1007\/s12652-014-0220-4","volume":"6","author":"D Liu","year":"2015","unstructured":"Liu, D. (2015). A fault-tolerant architecture for ROIA in cloud. Journal of Ambient Intelligence and Humanized Computing, 6(5), 587\u2013595.","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"6","key":"7949_CR17","doi-asserted-by":"publisher","first-page":"14637","DOI":"10.1007\/s10586-018-2375-9","volume":"22","author":"S Chinnathambi","year":"2019","unstructured":"Chinnathambi, S., Santhanam, A., Rajarathinam, J., & Senthilkumar, M. (2019). Scheduling and checkpointing optimization algorithm for Byzantine fault tolerance in cloud clusters. Cluster Computing, 22(6), 14637\u201314650.","journal-title":"Cluster Computing"},{"issue":"11","key":"7949_CR18","doi-asserted-by":"publisher","first-page":"4567","DOI":"10.1007\/s12652-018-1139-y","volume":"10","author":"M Amoon","year":"2019","unstructured":"Amoon, M., El-Bahnasawy, N., Sadi, S., & Wagdi, M. (2019). On the design of reactive approach with flexible checkpoint interval to tolerate faults in cloud computing systems. Journal of Ambient Intelligence and Humanized Computing, 10(11), 4567\u20134577.","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"4","key":"7949_CR19","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1007\/s10922-019-09491-2","volume":"27","author":"MN Cheraghlou","year":"2019","unstructured":"Cheraghlou, M. N., Khademzadeh, A., & Haghparast, M. (2019). New fuzzy-based fault tolerance evaluation framework for cloud computing. Journal of Network and Systems Management, 27(4), 930\u2013948.","journal-title":"Journal of Network and Systems Management"},{"key":"7949_CR20","doi-asserted-by":"publisher","unstructured":"Rezaeipanah, A., Mojarad, M., & Fakhari, A. (2020). Providing a new approach to increase fault tolerance in cloud computing using fuzzy logic.\u00a0International Journal of Computers and Applications, 1\u20139. https:\/\/doi.org\/10.1080\/1206212X.2019.1709288.","DOI":"10.1080\/1206212X.2019.1709288"},{"issue":"3","key":"7949_CR21","doi-asserted-by":"publisher","first-page":"78","DOI":"10.4018\/JGIM.2018070106","volume":"26","author":"P Parwekar","year":"2018","unstructured":"Parwekar, P., Rodda, S., & Kaur, P. (2018). Mobile sink as checkpoints for fault detection towards fault tolerance in wireless sensor networks. Journal of Global Information Management (JGIM), 26(3), 78\u201389.","journal-title":"Journal of Global Information Management (JGIM)"},{"issue":"2","key":"7949_CR22","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1504\/IJHPCN.2018.089888","volume":"11","author":"H Mansouri","year":"2018","unstructured":"Mansouri, H., Badache, N., Aliouat, M., & Pathan, A. S. K. (2018). Checkpointing distributed application running on mobile ad hoc networks. International Journal of High Performance Computing and Networking, 11(2), 95\u2013107.","journal-title":"International Journal of High Performance Computing and Networking"},{"key":"7949_CR23","unstructured":"Singh, A. K., & Jaggi, P. K. (2013). Asynchronous rollback recovery in cluster based multi hop mobile ad hoc networks.\u00a0International Journal of Enhanced Research in Management & Computer Applications, ISSN, 2319\u20137471."},{"key":"7949_CR24","volume-title":"Distributed computing: Principles, algorithms, and systems","author":"AD Kshemkalyani","year":"2011","unstructured":"Kshemkalyani, A. D., & Singhal, M. (2011). Distributed computing: Principles, algorithms, and systems. Cambridge: Cambridge University Press."},{"issue":"3\u20134","key":"7949_CR25","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1504\/IJHPCN.2019.106109","volume":"15","author":"H Mansouri","year":"2019","unstructured":"Mansouri, H., & Pathan, A. S. K. (2019). Checkpointing distributed computing systems: An optimisation approach. International Journal of High Performance Computing and Networking, 15(3\u20134), 202\u2013209.","journal-title":"International Journal of High Performance Computing and Networking"},{"key":"7949_CR26","doi-asserted-by":"crossref","unstructured":"Singh, A. K., & Kaur, P. (2011). Log based recovery with low overhead for mobile computing systems. In\u00a0International conference on advances in communication, network, and computing\u00a0(pp. 637\u2013642). Springer, Berlin, Heidelberg.","DOI":"10.1007\/978-3-642-19542-6_125"},{"issue":"4","key":"7949_CR27","doi-asserted-by":"publisher","first-page":"1191","DOI":"10.1109\/TCC.2016.2567392","volume":"6","author":"J Liu","year":"2016","unstructured":"Liu, J., Wang, S., Zhou, A., Kumar, S. A., Yang, F., & Buyya, R. (2016). Using proactive fault-tolerance approach to enhance cloud service reliability. IEEE Transactions on Cloud Computing, 6(4), 1191\u20131202.","journal-title":"IEEE Transactions on Cloud Computing"},{"issue":"6","key":"7949_CR28","doi-asserted-by":"publisher","first-page":"902","DOI":"10.1109\/TSC.2016.2519898","volume":"10","author":"A Zhou","year":"2016","unstructured":"Zhou, A., Wang, S., Cheng, B., Zheng, Z., Yang, F., Chang, R. N., et al. (2016). Cloud service reliability enhancement via virtual machine placement optimization. IEEE Transactions on Services Computing, 10(6), 902\u2013913.","journal-title":"IEEE Transactions on Services Computing"},{"issue":"2","key":"7949_CR29","doi-asserted-by":"publisher","first-page":"193","DOI":"10.3233\/MGS-200328","volume":"16","author":"P Kumari","year":"2020","unstructured":"Kumari, P., & Kaur, P. (2020). Topology-aware virtual machine replication for fault tolerance in cloud computing systems. Multiagent and Grid Systems, 16(2), 193\u2013206.","journal-title":"Multiagent and Grid Systems"},{"key":"7949_CR30","unstructured":"https:\/\/blogchinmaya.blogspot.com\/2017\/04\/what-is-fat-tree-and-how-to-onstruct.html, Available online 2019."},{"key":"7949_CR31","unstructured":"https:\/\/www.cisco.com\/en\/US\/docs\/storage\/san_switches\/mds9000\/hw\/9124\/quick\/quide\/9124QSG.html. Available online 2019."},{"key":"7949_CR32","unstructured":"https:\/\/www.dell.com\/en-in\/work\/shop\/povw\/networking-n2000-series. Available online 2019."}],"container-title":["Wireless Personal Communications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11277-020-07949-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11277-020-07949-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11277-020-07949-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,3,17]],"date-time":"2021-03-17T19:29:00Z","timestamp":1616009340000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11277-020-07949-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,16]]},"references-count":32,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,4]]}},"alternative-id":["7949"],"URL":"https:\/\/doi.org\/10.1007\/s11277-020-07949-0","relation":{},"ISSN":["0929-6212","1572-834X"],"issn-type":[{"value":"0929-6212","type":"print"},{"value":"1572-834X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,11,16]]},"assertion":[{"value":"5 November 2020","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 November 2020","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}