{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T14:27:19Z","timestamp":1725892039751},"publisher-location":"Berlin, Heidelberg","reference-count":14,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642156458"},{"type":"electronic","value":"9783642156465"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-15646-5_22","type":"book-chapter","created":{"date-parts":[[2010,9,6]],"date-time":"2010-09-06T07:43:52Z","timestamp":1283759032000},"page":"208-218","source":"Crossref","is-referenced-by-count":7,"title":["Transparent Redundant Computing with MPI"],"prefix":"10.1007","author":[{"given":"Ron","family":"Brightwell","sequence":"first","affiliation":[]},{"given":"Kurt","family":"Ferreira","sequence":"additional","affiliation":[]},{"given":"Rolf","family":"Riesen","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"unstructured":"Ferreira, K., Riesen, R., Oldfield, R., Stearley, J., Laros, J., Pedretti, K., Brightwell, R., Kordenbrock, T.: Increasing fault resiliency in a message-passing environment. Technical report SAND2009-6753, Sandia National Laboratories (2009)","key":"22_CR1"},{"doi-asserted-by":"crossref","unstructured":"Riesen, R., Ferreira, K., Stearley, J.: See applications run and throughput jump: The case for redundant computing in HPC. In: 1st International Workshop on Fault-Tolerance for HPC at Extreme Scale, FTXS 2010 (2010)","key":"22_CR2","DOI":"10.1109\/DSNW.2010.5542625"},{"unstructured":"Network-Based Computing Laboratory, Ohio State University: OSU MPI benchmarks, OMB (2010), http:\/\/mvapich.cse.ohio-state.edu\/benchmarks\/","key":"22_CR3"},{"issue":"1","key":"22_CR4","first-page":"188","volume":"78","author":"B. Schroeder","year":"2007","unstructured":"Schroeder, B., Gibson, G.A.: Understanding failures in petascale computers. Journal of Physics: Conference Series\u00a078(1), 188\u2013198 (2007)","journal-title":"Journal of Physics: Conference Series"},{"doi-asserted-by":"crossref","unstructured":"Zheng, Z., Lan, Z.: Reliability-aware scalability models for high performance computing, In: Proceedings of the IEEE conference on Cluster Computing (2009)","key":"22_CR5","DOI":"10.1109\/CLUSTR.2009.5289177"},{"issue":"12","key":"22_CR6","doi-asserted-by":"publisher","first-page":"961","DOI":"10.1016\/j.jpdc.2009.08.004","volume":"69","author":"X. He","year":"2009","unstructured":"He, X., Ou, L., Engelmann, C., Chen, X., Scott, S.L.: Symmetric active\/active metadata service for high availability parallel file systems. Journal of Parallel and Distributed Computing (JPDC)\u00a069(12), 961\u2013973 (2009)","journal-title":"Journal of Parallel and Distributed Computing (JPDC)"},{"doi-asserted-by":"crossref","unstructured":"Fagg, G.E., Dongarra, J.: FT-MPI: Fault tolerant MPI, supporting dynamic applications in a dynamic world. In: Proceedings of the 7th European PVM\/MPI Users\u2019 Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface, pp. 346\u2013353 (2000)","key":"22_CR7","DOI":"10.1007\/3-540-45255-9_47"},{"doi-asserted-by":"crossref","unstructured":"Gropp, W., Lusk, E.: Fault tolerance in message passing interface programs. International Journal of High Performance Computing Applications\u00a018(3) (2004)","key":"22_CR8","DOI":"10.1177\/1094342004046045"},{"doi-asserted-by":"crossref","unstructured":"Bouteiller, A., Cappello, F., Herault, T., Krawezik, G., Lemarinier, P., Magniette, F.: MPICH-V2: a fault tolerant MPI for volatile nodes based on pessimistic sender based message logging. In: Proceedings of the ACM\/IEEE International Conference on High Performance Computing and Networking (2003)","key":"22_CR9","DOI":"10.1145\/1048935.1050176"},{"doi-asserted-by":"crossref","unstructured":"Hursey, J., Squyres, J., Mattox, T., Lumsdaine, A.: The design and implementation of checkpoint\/restart process fault tolerance for Open MPI. In: Proceedings of the IEEE International Parallel and Distributed Processing Symposium (2007)","key":"22_CR10","DOI":"10.1109\/IPDPS.2007.370605"},{"key":"22_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1007\/978-3-540-85451-7_7","volume-title":"Euro-Par 2008 Parallel Processing","author":"G. Santos","year":"2008","unstructured":"Santos, G., Duarte, A., Rexachs, D., Luque, E.: Providing non-stop service for message-passing based parallel applications with RADIC. In: Luque, E., Margalef, T., Ben\u00edtez, D. (eds.) Euro-Par 2008. LNCS, vol.\u00a05168, pp. 58\u201367. Springer, Heidelberg (2008)"},{"issue":"1","key":"22_CR12","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s10723-006-9056-2","volume":"5","author":"S. Genaud","year":"2007","unstructured":"Genaud, S., Rattanapoka, C.: P2P-MPI: A peer-to-peer framework for robust execution of message passing parallel programs on grids. J. Grid Comput.\u00a05(1), 27\u201342 (2007)","journal-title":"J. Grid Comput."},{"issue":"5","key":"22_CR13","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1007\/s10766-009-0115-8","volume":"37","author":"S. Genaud","year":"2009","unstructured":"Genaud, S., Jeannot, E., Rattanapoka, C.: Fault-management in P2P-MPI. Int. J. Parallel Program.\u00a037(5), 433\u2013461 (2009)","journal-title":"Int. J. Parallel Program."},{"doi-asserted-by":"crossref","unstructured":"Farreras, M., Cortes, T., Labarta, J., Almasi, G.: Scaling MPI to short-memory MPPs such as BG\/L. In: Proceeding of the International Conference on Supercomputing, pp. 209\u2013218 (2006)","key":"22_CR14","DOI":"10.1145\/1183401.1183432"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in the Message Passing Interface"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-15646-5_22.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,24]],"date-time":"2020-11-24T03:11:03Z","timestamp":1606187463000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-15646-5_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642156458","9783642156465"],"references-count":14,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-15646-5_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2010]]}}}