{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T04:46:39Z","timestamp":1725943599237},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642297397"},{"type":"electronic","value":"9783642297403"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-29740-3_28","type":"book-chapter","created":{"date-parts":[[2012,4,13]],"date-time":"2012-04-13T17:54:04Z","timestamp":1334339644000},"page":"241-250","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Cooperative Application\/OS DRAM Fault Recovery"],"prefix":"10.1007","author":[{"given":"Patrick G.","family":"Bridges","sequence":"first","affiliation":[]},{"given":"Mark","family":"Hoemmen","sequence":"additional","affiliation":[]},{"given":"Kurt B.","family":"Ferreira","sequence":"additional","affiliation":[]},{"given":"Michael A.","family":"Heroux","sequence":"additional","affiliation":[]},{"given":"Philip","family":"Soltero","sequence":"additional","affiliation":[]},{"given":"Ron","family":"Brightwell","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"28_CR1","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1145\/1375527.1375552","volume-title":"Proceedings of the 22nd Annual International Conference on Supercomputing, ICS 2008","author":"G. Bronevetsky","year":"2008","unstructured":"Bronevetsky, G., de Supinski, B.: Soft error vulnerability of iterative linear algebra methods. In: Proceedings of the 22nd Annual International Conference on Supercomputing, ICS 2008, pp. 155\u2013164. ACM, New York (2008)"},{"key":"28_CR2","unstructured":"Buttari, A., Dongarra, J., Kurzak, J., Luszczek, P., Tomov, S.: Computations to enhance the performance while achieving the 64-bit accuracy. Tech. Rep. UT-CS-06-584, University of Tennessee Knoxville, lAPACK Working Note #180 (November 2006)"},{"key":"28_CR3","unstructured":"Chen, Z., Dongarra, J.: Algorithm-based checkpoint-free fault tolerance for parallel matrix computations on volatile resources. In: 20th International Parallel and Distributed Processing Symposium, IPDPS 2006 (April 2006)"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Davis, T.A., Hu, Y.: The University of Florida Sparse Matrix Collection. ACM Trans. Math. Softw. (2011) (to appear), \n                    \n                      http:\/\/www.cise.ufl.edu\/research\/sparse\/matrices","DOI":"10.1145\/2049662.2049663"},{"key":"28_CR5","unstructured":"Dopson, D.: SoftECC: A System for Software Memory Integrity Checking. Master\u2019s thesis, Massachusetts Institute of Technology (September 2005)"},{"issue":"3","key":"28_CR6","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1145\/568522.568525","volume":"34","author":"E.N. Elnozahy","year":"2002","unstructured":"Elnozahy, E.N., Alvisi, L., Wang, Y.M., Johnson, D.B.: A survey of rollback-recovery protocols in message-passing systems. ACM Computing Surveys\u00a034(3), 375\u2013408 (2002)","journal-title":"ACM Computing Surveys"},{"issue":"1","key":"28_CR7","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1137\/S0895479802403459","volume":"26","author":"J. Eshof van den","year":"2004","unstructured":"van den Eshof, J., Sleijpen, G.L.G.: Inexact Krylov subspace methods for linear systems. SIAM J. Matrix Anal. Appl.\u00a026(1), 125\u2013153 (2004)","journal-title":"SIAM J. Matrix Anal. Appl."},{"key":"28_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1007\/978-3-642-24449-0_31","volume-title":"Recent Advances in the Message Passing Interface","author":"K.B. Ferreira","year":"2011","unstructured":"Ferreira, K.B., Riesen, R., Brighwell, R., Bridges, P., Arnold, D.: libhashckpt: Hash-Based Incremental Checkpointing Using GPU\u2019s. In: Cotronis, Y., Danalis, A., Nikolopoulos, D.S., Dongarra, J. (eds.) EuroMPI 2011. LNCS, vol.\u00a06960, pp. 272\u2013281. Springer, Heidelberg (2011)"},{"issue":"3","key":"28_CR9","doi-asserted-by":"publisher","first-page":"397","DOI":"10.1145\/1089014.1089021","volume":"31","author":"M.A. Heroux","year":"2005","unstructured":"Heroux, M.A., Bartlett, R.A., Howle, V.E., Hoekstra, R.J., Hu, J.J., Kolda, T.G., Lehoucq, R.B., Long, K.R., Pawlowski, R.P., Phipps, E.T., Salinger, A.G., Thornquist, H.K., Tuminaro, R.S., Willenbring, J.M., Williams, A., Stanley, K.S.: An overview of the Trilinos project. ACM Trans. Math. Softw.\u00a031(3), 397\u2013423 (2005)","journal-title":"ACM Trans. Math. Softw."},{"key":"28_CR10","unstructured":"Heroux, M.A., Hoemmen, M.: Fault-tolerant iterative methods via selective reliability. Tech. Rep. SAND2011-3915 C, Sandia National Laboratories (2011), \n                    \n                      http:\/\/www.sandia.gov\/~maherou\/"},{"key":"28_CR11","unstructured":"Howle, V.E.: Soft errors in linear solvers as integrated components of a simulation. Presented at the Copper Mountain Conference on Iterative Methods, Copper Mountain, CO, April 9 (2010)"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Huang, K.H., Abraham, J.A.: Algorithm-based fault tolerance for matrix operations. IEEE Transactions on Computers C-33(6) (June 1984)","DOI":"10.1109\/TC.1984.1676475"},{"key":"28_CR13","unstructured":"Kleen, A.: mcelog: memory error handling in user space. In: Proceedings of Linux Kongress 2010, Nuremburg, Germany (September 2010)"},{"key":"28_CR14","unstructured":"Li, X., Huang, M.C., Shen, K., Chu, L.: A realistic evaluation of memory hardware errors and software system susceptibility. In: Proceedings of the 2010 USENIX Annual Technical Conference (USENIX 2010), Boston, MA (June 2010)"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Maruyama, N., Nukada, A., Matsuoka, S.: A high-performance fault-tolerant software framework for memory on commodity GPUs. In: 2010 IEEE International Symposium on Parallel Distributed Processing (IPDPS), pp. 1\u201312 (April 2010)","DOI":"10.1109\/IPDPS.2010.5470473"},{"key":"28_CR16","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1137\/0914028","volume":"14","author":"Y. Saad","year":"1993","unstructured":"Saad, Y.: A flexible inner-outer preconditioned GMRES algorithm. SIAM J. Sci. Comput.\u00a014, 461\u2013469 (1993)","journal-title":"SIAM J. Sci. Comput."},{"key":"28_CR17","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898718003","volume-title":"Iterative Methods for Sparse Linear Systems","author":"Y. Saad","year":"2003","unstructured":"Saad, Y.: Iterative Methods for Sparse Linear Systems, 2nd edn. SIAM, Philadelphia (2003)","edition":"2"},{"key":"28_CR18","doi-asserted-by":"publisher","first-page":"856","DOI":"10.1137\/0907058","volume":"7","author":"Y. Saad","year":"1986","unstructured":"Saad, Y., Schultz, M.H.: GMRES: A generalized minimal residual algorithm for solving nonsymmetric linear systems. SIAM J. Sci. Statist. Comput.\u00a07, 856\u2013869 (1986)","journal-title":"SIAM J. Sci. Statist. Comput."},{"key":"28_CR19","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1145\/1897816.1897844","volume":"54","author":"B. Schroeder","year":"2011","unstructured":"Schroeder, B., Pinheiro, E., Weber, W.D.: DRAM errors in the wild: a large-scale field study. Communications of the ACM\u00a054, 100\u2013107 (2011)","journal-title":"Communications of the ACM"},{"issue":"2","key":"28_CR20","doi-asserted-by":"publisher","first-page":"454","DOI":"10.1137\/S1064827502406415","volume":"25","author":"V. Simonici","year":"2003","unstructured":"Simonici, V., Szyld, D.B.: Theory of inexact Krylov subspace methods and applications to scientific computing. SIAM J. Sci. Comput.\u00a025(2), 454\u2013477 (2003)","journal-title":"SIAM J. Sci. Comput."}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2011: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-29740-3_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:52:52Z","timestamp":1587343972000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-29740-3_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642297397","9783642297403"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-29740-3_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}