{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T17:58:24Z","timestamp":1775671104114,"version":"3.50.1"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2019,3,1]],"date-time":"2019-03-01T00:00:00Z","timestamp":1551398400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,3,1]],"date-time":"2019-03-01T00:00:00Z","timestamp":1551398400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,3,1]],"date-time":"2019-03-01T00:00:00Z","timestamp":1551398400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001659","name":"German Research Foundation","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2019,3,1]]},"DOI":"10.1109\/tpds.2018.2866794","type":"journal-article","created":{"date-parts":[[2018,8,23]],"date-time":"2018-08-23T18:50:36Z","timestamp":1535050236000},"page":"501-514","source":"Crossref","is-referenced-by-count":40,"title":["CRAFT: A Library for Easier Application-Level Checkpoint\/Restart and Automatic Fault Tolerance"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6766-7622","authenticated-orcid":false,"given":"Faisal","family":"Shahzad","sequence":"first","affiliation":[]},{"given":"Jonas","family":"Thies","sequence":"additional","affiliation":[]},{"given":"Moritz","family":"Kreutzer","sequence":"additional","affiliation":[]},{"given":"Thomas","family":"Zeiser","sequence":"additional","affiliation":[]},{"given":"Georg","family":"Hager","sequence":"additional","affiliation":[]},{"given":"Gerhard","family":"Wellein","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/1037187.1024421"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/1122971.1122977"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005056139"},{"key":"ref32","year":"2011"},{"key":"ref31","year":"2014"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1137\/15M1026122"},{"key":"ref37","first-page":"213","article-title":"Libckpt: Transparent checkpointing under unix","author":"plank","year":"1995","journal-title":"Proc USENIX Winter Tech Conf"},{"key":"ref36","first-page":"1997","article-title":"Checkpoint and Migration of UNIX processes in the condor distributed processing system","author":"litzkow","year":"1997"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2002.10048"},{"key":"ref34","article-title":"Berkeley lab checkpoint\/restart library","year":"0"},{"key":"ref10","year":"2012"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1541"},{"key":"ref11","first-page":"3","article-title":"A model for predicting the optimum checkpoint interval for restart dumps","author":"daly","year":"2003","journal-title":"Proc Int Conf Comput Sci"},{"key":"ref12","article-title":"Checkpoint\/Restart and automatic fault tolerance(CRAFT) library","author":"shahzad","year":"2016"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33518-1_36"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.18"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI.2014.4"},{"key":"ref16","first-page":"471","article-title":"A fault tolerant implementation of multi-level monte carlo methods","volume":"25","author":"pauli","year":"2014","journal-title":"Parallel Computing Accelerating Computational Science and Engineering (CSE)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2014.132"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-016-1629-7"},{"key":"ref19","first-page":"51:51","article-title":"Toward local failure local recovery resilience model using MPI-ULFM","author":"teranishi","year":"2014","journal-title":"4 European PVM\/MPI Users' Group Meeting"},{"key":"ref28","year":"2013"},{"key":"ref4","article-title":"Coordinated checkpoint\/restart process fault tolerance for MPI applications on HPC systems","author":"hursey","year":"2010"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1137\/140976017"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2013.6575356"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"1213","DOI":"10.1109\/71.737697","article-title":"On coordinated checkpointing in distributed systems","volume":"9","author":"cao","year":"1998","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"ref29","year":"2013"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2004.2"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2006.15"},{"key":"ref7","first-page":"36","article-title":"Article: A comparison between different checkpoint schemes with advantages and disadvantages","author":"kumar","year":"2014","journal-title":"Int J Comput Appl Nat Semin Recent Advances Wireless Netw Commun"},{"key":"ref2","article-title":"Emerging heterogeneous technologies for high performance computing","author":"dongarra","year":"2013"},{"key":"ref9","article-title":"A proposal for user-level failure mitigation in the MPI-3 standard","author":"bland","year":"2012"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342014522573"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8191(01)00100-4"},{"key":"ref20","year":"2012"},{"key":"ref45","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1007\/3-540-45255-9_47","article-title":"FT-MPI: Fault tolerant MPI, supporting dynamic applications in a dynamic world","author":"fagg","year":"2000","journal-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface 7th European PVM\/MPI Users&#x2019; Group Meeting Balatonf&#x00FC;red Hungary"},{"key":"ref48","year":"2013"},{"key":"ref22","year":"2017"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1023\/B:CLUS.0000039491.64560.8a"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1177\/1094342013488238"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.78"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1137\/S0895479894270427"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/FTCS.1997.614078"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10766-016-0464-z"},{"key":"ref44","year":"2004"},{"key":"ref26","article-title":"The highly scalable iterative solver library PHIST","author":"basermann","year":"2015","journal-title":"Proc 21th Adv Supercomputing Environment Semin University Tokyo"},{"key":"ref43","first-page":"16","article-title":"A tool for semi-automatic application-level checkpointing","author":"ba","year":"0","journal-title":"Proc Tech Posters Int Conf High Perform Comput Netw Storage Anal"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1002\/gamm.201490038"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/8640283\/08444763.pdf?arnumber=8444763","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T20:53:48Z","timestamp":1657745628000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8444763\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3,1]]},"references-count":48,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2018.2866794","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,3,1]]}}}