{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T18:03:44Z","timestamp":1730225024959,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011,12]]},"DOI":"10.1109\/hipc.2011.6152716","type":"proceedings-article","created":{"date-parts":[[2012,2,22]],"date-time":"2012-02-22T20:27:21Z","timestamp":1329942441000},"page":"1-9","source":"Crossref","is-referenced-by-count":12,"title":["Building algorithmically nonstop fault tolerant MPI programs"],"prefix":"10.1109","author":[{"given":"Rui","family":"Wang","sequence":"first","affiliation":[]},{"given":"Erlin","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Mingyu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Guangming","family":"Tan","sequence":"additional","affiliation":[]},{"given":"Pavan","family":"Balaji","sequence":"additional","affiliation":[]},{"given":"Darius","family":"Buntinas","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470436"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1941553.1941600"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1996130.1996142"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TC.1984.1676475"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/12.9736"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2006.1639333"},{"key":"ref16","first-page":"479","article-title":"The LAM\/MPI checkpoint\/restart framework: System-initiated checkpointing","author":"sankaran","year":"2003","journal-title":"Proceedings LACSI Symposium"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370605"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2007.02.002"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45255-9_47"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/78\/1\/012022"},{"journal-title":"The computer failure data repository sites","year":"0","key":"ref3"},{"key":"ref6","article-title":"Analyzing checkpointing trends for applications on petascale systems","author":"naik","year":"2009","journal-title":"the Proceedings of the Workshop on Parallel Programming Models and Systems Software for High-End Computing (P2S2)"},{"key":"ref5","article-title":"Failure tolerance in petascale computers","volume":"3","author":"gibson","year":"2007","journal-title":"CTWatchQuarterly"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2009.42"},{"key":"ref7","first-page":"213","article-title":"Building fault survivable MPI programs with FT-MPI using diskless checkpointing","author":"chen","year":"2005","journal-title":"Proceedings for ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2004.11.016"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/71.730527"},{"key":"ref9","article-title":"Algorithm-based fault tolerance for fail-stop failures","volume":"19","author":"chen","year":"2008","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1177\/1094342004046045"},{"journal-title":"Top 500 Supercomputing Sites","year":"0","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1023\/B:CLUS.0000039491.64560.8a"},{"journal-title":"HPL Benchmark Sites","year":"0","key":"ref24"},{"journal-title":"MPI-Forum fault-tolerance working group","year":"0","key":"ref23"}],"event":{"name":"2011 18th International Conference on High Performance Computing (HiPC)","start":{"date-parts":[[2011,12,18]]},"location":"Bengaluru, India","end":{"date-parts":[[2011,12,21]]}},"container-title":["2011 18th International Conference on High Performance Computing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6146509\/6152423\/06152716.pdf?arnumber=6152716","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,21]],"date-time":"2017-03-21T18:43:12Z","timestamp":1490121792000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6152716\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,12]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/hipc.2011.6152716","relation":{},"subject":[],"published":{"date-parts":[[2011,12]]}}}