{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:38:57Z","timestamp":1740123537657,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2020,10,16]],"date-time":"2020-10-16T00:00:00Z","timestamp":1602806400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,10,16]],"date-time":"2020-10-16T00:00:00Z","timestamp":1602806400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100011033","name":"Agencia Estatal de Investigaci\u00f3n","doi-asserted-by":"publisher","award":["TIN2017-84875-P"],"award-info":[{"award-number":["TIN2017-84875-P"]}],"id":[{"id":"10.13039\/501100011033","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s11227-020-03445-1","type":"journal-article","created":{"date-parts":[[2020,10,16]],"date-time":"2020-10-16T10:02:51Z","timestamp":1602842571000},"page":"4582-4617","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Analysis of parallel application checkpoint storage for system configuration"],"prefix":"10.1007","volume":"77","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1778-0237","authenticated-orcid":false,"given":"Betzabeth","family":"Le\u00f3n","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0002-7046","authenticated-orcid":false,"given":"Daniel","family":"Franco","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5500-850X","authenticated-orcid":false,"given":"Dolores","family":"Rexachs","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2884-3232","authenticated-orcid":false,"given":"Emilio","family":"Luque","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,10,16]]},"reference":[{"key":"3445_CR1","unstructured":"Le\u00f3n B, Franco D, Rexachs D, Luque E (2018) Characterization of I\/O Patterns generated by Fault Tolerance in HPC environments. International Conference on Parallel and Distributed Processing Techniques and Applications (PDPTA) vol 18, p 28"},{"key":"3445_CR2","doi-asserted-by":"publisher","unstructured":"Lemarinier Bouteiller, Capello Krawezik (2003) Coordinated checkpoint versus message log for fault tolerant MPI, in 2003 Proceedings IEEE International Conference on Cluster Computing, pp. 242\u2013250. https:\/\/doi.org\/10.1109\/CLUSTR.2003.1253321","DOI":"10.1109\/CLUSTR.2003.1253321"},{"issue":"3","key":"3445_CR3","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1109\/TPDS.2018.2866794","volume":"30","author":"F Shahzad","year":"2019","unstructured":"Shahzad F, Thies J, Kreutzer M, Zeiser T, Hager G, Wellein G (2019) CRAFT: a library for easier application-level checkpoint\/restart and automatic fault tolerance. IEEE Trans Parallel Distrib Syst 30(3):501. https:\/\/doi.org\/10.1109\/TPDS.2018.2866794","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"3445_CR4","doi-asserted-by":"publisher","unstructured":"Coti C, Herault T, Lemarinier P, Pilard L, Rezmerita A, Rodriguez E, Cappello F (2006) Blocking vs. Non-Blocking Coordinated Checkpointing for Large-Scale Fault Tolerant MPI, In: SC \u201906: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, pp. 18\u201318. https:\/\/doi.org\/10.1109\/SC.2006.15","DOI":"10.1109\/SC.2006.15"},{"issue":"2","key":"3445_CR5","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1007\/s11227-018-2621-1","volume":"75","author":"JA Mor\u00ed\u00f1igo","year":"2019","unstructured":"Mor\u00ed\u00f1igo JA, Rodr\u00edguez-Pascual M, Mayo-Garc\u00eda R (2019) On the modelling of optimal coordinated checkpoint period in supercomputers. J Supercomput 75(2):930","journal-title":"J Supercomput"},{"key":"3445_CR6","doi-asserted-by":"publisher","unstructured":"Guermouche A, Ropars T, Brunet E, Snir M, Cappello F (2011) Uncoordinated Checkpointing Without Domino Effect for Send-Deterministic MPI Applications, in 2011 IEEE International Parallel Distributed Processing Symposium, pp. 989\u20131000. https:\/\/doi.org\/10.1109\/IPDPS.2011.95","DOI":"10.1109\/IPDPS.2011.95"},{"key":"3445_CR7","first-page":"36","volume":"3","author":"M Kumar","year":"2014","unstructured":"Kumar M, Choudhary A, Kumar V (2014) A comparison between different checkpoint schemes with advantages and disadvantages. Int J Comput Appl Nat Semin Recent Adv Wireless Netw Commun 3:36","journal-title":"Int J Comput Appl Nat Semin Recent Adv Wireless Netw Commun"},{"issue":"3","key":"3445_CR8","doi-asserted-by":"publisher","first-page":"498","DOI":"10.1016\/j.future.2009.07.013","volume":"26","author":"J Kov\u00e1cs","year":"2010","unstructured":"Kov\u00e1cs J, Kacsuk P, Januszewski R, Jankowski G (2010) Application and middleware transparent checkpointing with TCKPT on ClusterGrids. Future Gener Comput Syst 26(3):498","journal-title":"Future Gener Comput Syst"},{"key":"3445_CR9","doi-asserted-by":"publisher","unstructured":"Castro-Le\u00f3n M, Meyer H, Rexachs D, Luque E (2015) Fault tolerance at system level based on RADIC architecture. Journal of Parallel and Distributed Computing 86:98. https:\/\/doi.org\/10.1016\/j.jpdc.2015.08.005. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0743731515001434","DOI":"10.1016\/j.jpdc.2015.08.005"},{"key":"3445_CR10","doi-asserted-by":"crossref","unstructured":"Subasi O, Zyulkyarov F, Unsal O, Labarta J (2015) Marriage Between Coordinated and Uncoordinated Checkpointing for the Exascale Era, in 2015 IEEE 17th International Conference on High Performance Computing and Communications, 2015 IEEE 7th International Symposium on Cyberspace Safety and Security, and 2015 IEEE 12th International Conference on Embedded Software and Systems, pp. 470\u2013478","DOI":"10.1109\/HPCC-CSS-ICESS.2015.150"},{"key":"3445_CR11","doi-asserted-by":"publisher","unstructured":"Takizawa H, Amrizal MA, Komatsu K, Egawa R (2017) An Application-Level Incremental Checkpointing Mechanism with Automatic Parameter Tuning, In: 2017 Fifth International Symposium on Computing and Networking (CANDAR), pp. 389\u2013394. https:\/\/doi.org\/10.1109\/CANDAR.2017.96","DOI":"10.1109\/CANDAR.2017.96"},{"key":"3445_CR12","doi-asserted-by":"crossref","unstructured":"Li G, Pattabiraman K, Cher C, Bose P (2015) Experience report: An application-specific checkpointing technique for minimizing checkpoint corruption, In: 2015 IEEE 26th International Symposium on Software Reliability Engineering (ISSRE), pp. 141\u2013152","DOI":"10.1109\/ISSRE.2015.7381808"},{"key":"3445_CR13","doi-asserted-by":"publisher","unstructured":"Ansel J, Arya K, Cooperman G (2009) DMTCP: Transparent checkpointing for cluster computations and the desktop, In: 2009 IEEE International Symposium on Parallel Distributed Processing, pp. 1\u201312. https:\/\/doi.org\/10.1109\/IPDPS.2009.5161063","DOI":"10.1109\/IPDPS.2009.5161063"},{"key":"3445_CR14","unstructured":"Kongmunvattana A, Tanchatchawal S, Tzeng Nian-Feng (2000) Coherence-based coordinated checkpointing for software distributed shared memory systems, In: Proceedings 20th IEEE International Conference on Distributed Computing Systems, pp. 556\u2013563"},{"issue":"3","key":"3445_CR15","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/s00354-013-0302-4","volume":"31","author":"I Cores","year":"2013","unstructured":"Cores I, Rodr\u00edguez G, Gonz\u00e1lez P, Osorio RR et al (2013) Improving scalability of application-level checkpoint-recovery by reducing checkpoint sizes. New Gener Comput 31(3):163","journal-title":"New Gener Comput"},{"issue":"4","key":"3445_CR16","first-page":"199","volume":"4","author":"A Kongmunvattana","year":"2015","unstructured":"Kongmunvattana A (2015) Reducing checkpoint creation overhead using data similarity. Int J Comput 4(4):199","journal-title":"Int J Comput"},{"key":"3445_CR17","doi-asserted-by":"publisher","unstructured":"Rusu C, Grecu C, Anghel L (2008) Improving the scalability of checkpoint recovery for networks-on-chip, in 2008 IEEE International Symposium on Circuits and Systems, pp. 2793\u20132796. https:\/\/doi.org\/10.1109\/ISCAS.2008.4542037","DOI":"10.1109\/ISCAS.2008.4542037"},{"key":"3445_CR18","doi-asserted-by":"publisher","unstructured":"Bouabache F, Herault T, Fedak G, Cappello F (2008) Hierarchical Replication Techniques to Ensure Checkpoint Storage Reliability in Grid Environment, In: 2008 Eighth IEEE International Symposium on Cluster Computing and the Grid (CCGRID), pp. 475\u2013483. https:\/\/doi.org\/10.1109\/CCGRID.2008.95","DOI":"10.1109\/CCGRID.2008.95"},{"key":"3445_CR19","doi-asserted-by":"publisher","unstructured":"Al-Kiswany S, Ripeanu M, Vazhkudai SS, Gharaibeh A (2008) stdchk: A Checkpoint Storage System for Desktop Grid Computing, In: 2008 The 28th International Conference on Distributed Computing Systems, pp. 613\u2013624. https:\/\/doi.org\/10.1109\/ICDCS.2008.19","DOI":"10.1109\/ICDCS.2008.19"},{"key":"3445_CR20","doi-asserted-by":"publisher","unstructured":"Shahzad F, Wittmann M, Zeiser T, Hager G, Wellein G, Evaluation An, of Different I, O Techniques for Checkpoint, Restart, in, (2013) IEEE International Symposium on Parallel Distributed Processing. Workshops and Phd Forum 2013:1708\u20131716. https:\/\/doi.org\/10.1109\/IPDPSW.2013.145","DOI":"10.1109\/IPDPSW.2013.145"},{"key":"3445_CR21","doi-asserted-by":"publisher","unstructured":"Wan L, Cao Q, Wang F, Oral S (2017) Optimizing checkpoint data placement with guaranteed burst buffer endurance in large-scale hierarchical storage systems. Journal of Parallel and Distributed Computing 100:16. https:\/\/doi.org\/10.1016\/j.jpdc.2016.10.002. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0743731516301198","DOI":"10.1016\/j.jpdc.2016.10.002"},{"key":"3445_CR22","doi-asserted-by":"crossref","unstructured":"Parasyris K, Keller K, Bautista-Gomez L, Unsal O, Support Checkpoint Restart, for Heterogeneous HPC Applications, in, (2020) 20th IEEE\/ACM International Symposium on Cluster. Cloud and Internet Computing (CCGRID) 2020:242\u2013251","DOI":"10.1109\/CCGrid49817.2020.00-69"},{"key":"3445_CR23","unstructured":"Garg R, Mohan A, Sullivan M, Cooperman G (2018) In: 2018 IEEE International Conference on Cluster Computing (CLUSTER), pp. 302\u2013313"},{"key":"3445_CR24","doi-asserted-by":"crossref","unstructured":"Amrizal A, Hirasawa S, Komatsu K, Takizawa H, Kobayashi H (2012) Improving the scalability of transparent checkpointing for GPU computing systems, In: TENCON 2012 IEEE Region 10 Conference (IEEE, 2012), pp. 1\u20136","DOI":"10.1109\/TENCON.2012.6412343"},{"key":"3445_CR25","doi-asserted-by":"publisher","first-page":"494","DOI":"10.1088\/1742-6596\/46\/1\/067","volume":"46","author":"PH Hargrove","year":"2006","unstructured":"Hargrove PH, Duell JC (2006) Berkeley lab checkpoint\/restart (blcr) for linux clusters. J Phys Conf Ser 46:494","journal-title":"J Phys Conf Ser"},{"key":"3445_CR26","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.future.2013.04.017","volume":"30","author":"KB Ferreira","year":"2014","unstructured":"Ferreira KB, Riesen R, Bridges P, Arnold D, Brightwell R (2014) Accelerating incremental checkpointing for extreme-scale computing. Future Gener Comput Syst 30:66","journal-title":"Future Gener Comput Syst"},{"key":"3445_CR27","doi-asserted-by":"publisher","unstructured":"Muhammad Abrar Akber S, Chen H, Wang Y, Jin H (2018) Minimizing Overheads of Checkpoints in Distributed Stream Processing Systems, In: 2018 IEEE 7th International Conference on Cloud Networking (CloudNet), pp. 1\u20134. https:\/\/doi.org\/10.1109\/CloudNet.2018.8549548","DOI":"10.1109\/CloudNet.2018.8549548"},{"key":"3445_CR28","doi-asserted-by":"publisher","unstructured":"Dauwe D, Pasricha S, Maciejewski AA, Siegel HJ (2018) An Analysis of Multilevel Checkpoint Performance Models, In: 2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 783\u2013792. https:\/\/doi.org\/10.1109\/IPDPSW.2018.00125","DOI":"10.1109\/IPDPSW.2018.00125"},{"key":"3445_CR29","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1007\/978-3-030-50371-0_14","volume-title":"Computational Science - ICCS 2020","author":"B Le\u00f3n","year":"2020","unstructured":"Le\u00f3n B, Franco D, Rexachs D, Luque E (2020) Analysis of Checkpoint I\/O Behavior. In: Krzhizhanovskaya VV, Z\u00e1vodszky G, Lees MH, Dongarra JJ, Sloot PMA, Brissos S, Teixeira J (eds) Computational Science - ICCS 2020. Springer International Publishing, Cham, pp 191\u2013205"},{"key":"3445_CR30","unstructured":"MPICH (2000) Using the Hydra Process Manager, in https:\/\/wiki.mpich.org\/mpich\/index.php\/Using_the_Hydra_Process_Manage"},{"issue":"8","key":"3445_CR31","doi-asserted-by":"publisher","first-page":"942","DOI":"10.1109\/12.609281","volume":"46","author":"NH Vaidya","year":"1997","unstructured":"Vaidya NH (1997) Impact of checkpoint latency on overhead ratio of a checkpointing scheme. IEEE Trans Comput 46(8):942","journal-title":"IEEE Trans Comput"},{"issue":"3","key":"3445_CR32","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1109\/TPDS.2017.2763148","volume":"29","author":"J Panadero","year":"2018","unstructured":"Panadero J, Wong A, Rexachs D, Luque E (2018) P3S: a methodology to analyze and predict application scalability. IEEE Trans Parallel Distrib Syst 29(3):642. https:\/\/doi.org\/10.1109\/TPDS.2017.2763148","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"3445_CR33","first-page":"140","volume-title":"European MPI users\u2019 group meeting","author":"D Goodell","year":"2011","unstructured":"Goodell D, Gropp W, Zhao X, Thakur R (2011) Scalable memory use in MPI: a case study with MPICH2. European MPI users\u2019 group meeting. Springer, Berlin, pp 140\u2013149"},{"key":"3445_CR34","doi-asserted-by":"crossref","unstructured":"Yoshinaga K, Tsujita Y, Hori A, Sato M, Namiki M, Ishikawa Y (2013) A Delegation Mechanism on Many-Core Oriented Hybrid Parallel Computers for Scalability of Communicators and Communications in MPI, In: 2013 21st Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, pp. 249\u2013253","DOI":"10.1109\/PDP.2013.43"},{"issue":"3","key":"3445_CR35","first-page":"63","volume":"5","author":"DH Bailey","year":"1991","unstructured":"Bailey DH, Barszcz E, Barton JT, Browning DS, Carter RL, Dagum L, Fatoohi RA, Frederickson PO, Lasinski TA, Schreiber RS et al (1991) The NAS parallel benchmarks. Int J Supercomput Appl 5(3):63","journal-title":"Int J Supercomput Appl"},{"key":"3445_CR36","doi-asserted-by":"crossref","unstructured":"Karlin I, Keasler J, Neely J (2013) LULESH 2.0 Updates and Changes, In: 2009 IEEE International Symposium on Parallel Distributed Processing, vol. United States, vol. United States","DOI":"10.2172\/1090032"},{"key":"3445_CR37","doi-asserted-by":"publisher","unstructured":"Hou KY, Shin KG, Turner Y, Singhal S (2013) Tradeoffs in Compressing Virtual Machine Checkpoints, In: Proceedings of the 7th International Workshop on Virtualization Technologies in Distributed Computing (Association for Computing Machinery, New York, NY, USA, 2013), VTDC \u201913, p. 41\u201348. https:\/\/doi.org\/10.1145\/2465829.2465834","DOI":"10.1145\/2465829.2465834"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-020-03445-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-020-03445-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-020-03445-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,16]],"date-time":"2021-10-16T11:29:42Z","timestamp":1634383782000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-020-03445-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,16]]},"references-count":37,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["3445"],"URL":"https:\/\/doi.org\/10.1007\/s11227-020-03445-1","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2020,10,16]]},"assertion":[{"value":"30 September 2020","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2020","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}