{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:31:18Z","timestamp":1740148278037,"version":"3.37.3"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["1350766","1618706","1717774"],"award-info":[{"award-number":["1350766","1618706","1717774"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Netw. Serv. Manage."],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1109\/tnsm.2020.3030937","type":"journal-article","created":{"date-parts":[[2020,10,14]],"date-time":"2020-10-14T19:24:21Z","timestamp":1602703461000},"page":"2077-2088","source":"Crossref","is-referenced-by-count":1,"title":["Optimizing Job Reliability Through Contention-Free, Distributed Checkpoint Scheduling"],"prefix":"10.1109","volume":"18","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3894-8756","authenticated-orcid":false,"given":"Yu","family":"Xiang","sequence":"first","affiliation":[]},{"given":"Hang","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3010-8090","authenticated-orcid":false,"given":"Tian","family":"Lan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8588-7680","authenticated-orcid":false,"given":"Howie","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1501-5953","authenticated-orcid":false,"given":"Suresh","family":"Subramaniam","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2012.6195481"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.2010.5461911"},{"key":"ref33","first-page":"255","article-title":"Optimizing job reliability through contention-free, distributed checkpoint scheduling","author":"xiang","year":"2014","journal-title":"Proc ACM SIGCOMM Workshop Distrib Cloud Comput (DCC)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2011.2144617"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2006.887322"},{"journal-title":"Reversible Markov Chains and Random Walks on Graphs","year":"2002","author":"aldous","key":"ref30"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.728"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1126\/science.220.4598.671","article-title":"Optimization by simulated annealing","volume":"220","author":"kirkpatrick","year":"1983","journal-title":"Science"},{"key":"ref35","first-page":"111","author":"russell","year":"2020","journal-title":"Artificial Intelligence A Modern Approach"},{"journal-title":"Optimizing Job Reliability Through Contention-Free Distributed Checkpoint Scheduling","year":"2013","author":"xiang","key":"ref34"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/PRDC.2004.1276566"},{"key":"ref40","first-page":"83","author":"ross","year":"1985","journal-title":"Introduction to Probability Models"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/0020-0190(83)90093-5"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1341312.1341341"},{"journal-title":"Fast Lightweight Virtual Machine Checkpointing","year":"2010","author":"sun","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00127"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/NOMS.2010.5488493"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2010.125"},{"key":"ref17","first-page":"1","article-title":"An optimal checkpoint\/restart model for a large scale high performance computing system","author":"liu","year":"2008","journal-title":"Proc Parallel Distrib Process (IPDPS)"},{"key":"ref18","first-page":"1","article-title":"ParallaX: Managing storage for a million machines","author":"warfield","year":"2005","journal-title":"Proc HotOS"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS.2007.4447844"},{"journal-title":"Downtime Statistics of Current Cloud Solutions","year":"2012","key":"ref28"},{"key":"ref4","first-page":"1","article-title":"Checkpointing orhestration: Toward a scalable HPC fault-tolerant environment","author":"hui","year":"2012","journal-title":"Proc IEEE\/ACM Int Symp CCGrid"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2009.5198774"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICIMP.2008.12"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/HASE.2005.7"},{"journal-title":"Failure as a Service (faas) A Cloud Service for Large-scale Online Failure Drills","year":"2012","author":"gunawi","key":"ref29"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063429"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/RELDIS.2002.1180181"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/C-M.1975.218955"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2012.6364649"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/12.609281"},{"journal-title":"Amazon We Promise Our EC2 Cloud Will only Crash Once A Week","year":"2008","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2006.22"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2011.5958003"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/DASC.2006.37"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2016.08.010"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1555349.1555372"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.5120\/20435-2768"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ChinaGrid.2009.42"},{"key":"ref26","first-page":"23","article-title":"Throughput modelling and fairness issues in CSMA\/CA based ad-hoc networks","author":"wang","year":"2005","journal-title":"Proc IEEE InfoCom"},{"key":"ref43","first-page":"1","article-title":"Optimizing VM checkpointing for restore performance in VMware ESXi","author":"zhang","year":"2013","journal-title":"Proc USENIX Conf Annu Techn Conf"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2009.2035046"}],"container-title":["IEEE Transactions on Network and Service Management"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/4275028\/9450206\/9223753-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/4275028\/9450206\/09223753.pdf?arnumber=9223753","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:52:42Z","timestamp":1652194362000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9223753\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6]]},"references-count":43,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tnsm.2020.3030937","relation":{},"ISSN":["1932-4537","2373-7379"],"issn-type":[{"type":"electronic","value":"1932-4537"},{"type":"electronic","value":"2373-7379"}],"subject":[],"published":{"date-parts":[[2021,6]]}}}