{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,22]],"date-time":"2025-03-22T11:18:42Z","timestamp":1742642322922,"version":"3.37.3"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2020,9,9]],"date-time":"2020-09-09T00:00:00Z","timestamp":1599609600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,9,9]],"date-time":"2020-09-09T00:00:00Z","timestamp":1599609600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-SC-0016564","DE-AC00-07CH11358"],"award-info":[{"award-number":["DE-SC-0016564","DE-AC00-07CH11358"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2021,2]]},"DOI":"10.1007\/s10766-020-00676-w","type":"journal-article","created":{"date-parts":[[2020,9,10]],"date-time":"2020-09-10T12:53:09Z","timestamp":1599742389000},"page":"51-80","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Fault Recovery Methods for Asynchronous Linear Solvers"],"prefix":"10.1007","volume":"49","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3399-1117","authenticated-orcid":false,"given":"Evan","family":"Coleman","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Erik J.","family":"Jensen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masha","family":"Sosonkina","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,9,9]]},"reference":[{"key":"676_CR1","doi-asserted-by":"publisher","first-page":"3175","DOI":"10.1155\/IJMMS.2005.3175","volume":"19","author":"A Addou","year":"2005","unstructured":"Addou, A., Benahmed, A.: Parallel synchronous algorithm for nonlinear fixed point problems. Int. J. Math. Math. Sci. 19, 3175\u20133183 (2005)","journal-title":"Int. J. Math. Math. Sci."},{"key":"676_CR2","unstructured":"Agullo, E., Cools, S., Fatih-Yetkin, E., Giraud, L., Vanroose, W.: On soft errors in the conjugate gradient method: sensitivity and robust numerical detection. Research Report 9226, Inria Bordeaux Sud-Ouest (2018)"},{"key":"676_CR3","unstructured":"Anzt, H., Dongarra, J., Quintana-Ort\u00ed, E.S.: Fine-grained bit-flip protection for relaxation methods. J. Comput. Sci. (2016)"},{"issue":"6","key":"676_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2814566","volume":"62","author":"H Avron","year":"2015","unstructured":"Avron, H., Druinsky, A., Gupta, A.: Revisiting asynchronous linear solvers: Provable convergence rate through randomization. J. ACM (JACM) 62(6), 1\u201327 (2015)","journal-title":"J. ACM (JACM)"},{"key":"676_CR5","doi-asserted-by":"publisher","DOI":"10.1201\/9781584888093","volume-title":"Parallel Iterative Algorithms: From Sequential to Grid Computing","author":"JM Bahi","year":"2007","unstructured":"Bahi, J.M., Contassot-Vivier, S., Couturier, R.: Parallel Iterative Algorithms: From Sequential to Grid Computing. Chapman and Hall\/CRC, Boca Raton (2007)"},{"issue":"2","key":"676_CR6","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1145\/322063.322067","volume":"25","author":"GM Baudet","year":"1978","unstructured":"Baudet, G.M.: Asynchronous iterative methods for multiprocessors. J. ACM (JACM) 25(2), 226\u2013244 (1978)","journal-title":"J. ACM (JACM)"},{"key":"676_CR7","doi-asserted-by":"crossref","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Convergence rate and termination of asynchronous iterative algorithms. In: Proceedings of the 3rd International Conference on Supercomputing, ACM, pp 461\u2013470 (1989a)","DOI":"10.1145\/318789.318894"},{"key":"676_CR8","volume-title":"Parallel and Distributed Computation: Numerical Methods","author":"DP Bertsekas","year":"1989","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Parallel and Distributed Computation: Numerical Methods, vol. 23. Prentice hall Englewood Cliffs, Upper Saddle River (1989b)"},{"issue":"1","key":"676_CR9","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1177\/1094342013493123","volume":"28","author":"I Bethune","year":"2014","unstructured":"Bethune, I., Bull, J.M., Dingle, N.J., Higham, N.J.: Performance analysis of asynchronous Jacobi\u2019s method implemented in MPI, SHMEM and OpenMP. Int. J. High Performance Comput. Appl. 28(1), 97\u2013111 (2014)","journal-title":"Int. J. High Performance Comput. Appl."},{"key":"676_CR10","unstructured":"Bridges, P.G., Ferreira, K.B., Heroux, M.A., Hoemmen, M.: Fault-tolerant linear solvers via selective reliability (2012). arXiv:1206.1390"},{"key":"676_CR11","doi-asserted-by":"crossref","unstructured":"Bronevetsky, G., de\u00a0Supinski, B.: Soft error vulnerability of iterative linear algebra methods. In: Proceedings of the 22nd annual international conference on Supercomputing, ACM, pp 155\u2013164 (2008)","DOI":"10.1145\/1375527.1375552"},{"issue":"1","key":"676_CR12","first-page":"5","volume":"1","author":"F Cappello","year":"2014","unstructured":"Cappello, F., Geist, A., Gropp, W., Kale, S., Kramer, B., Snir, M.: Toward exascale resilience: 2014 update. Supercomput. Front. Innovat. 1(1), 5\u201328 (2014)","journal-title":"Supercomput. Front. Innovat."},{"issue":"2","key":"676_CR13","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1016\/0024-3795(69)90028-7","volume":"2","author":"D Chazan","year":"1969","unstructured":"Chazan, D., Miranker, W.: Chaotic relaxation. Linear Algebra Appl. 2(2), 199\u2013222 (1969)","journal-title":"Linear Algebra Appl."},{"key":"676_CR14","first-page":"167","volume":"48","author":"Z Chen","year":"2013","unstructured":"Chen, Z.: Online-abft: an online algorithm based fault tolerance scheme for soft error detection in iterative methods. ACM SIGPLAN Notices ACM 48, 167\u2013176 (2013)","journal-title":"ACM SIGPLAN Notices ACM"},{"issue":"2","key":"676_CR15","doi-asserted-by":"publisher","first-page":"C169","DOI":"10.1137\/140968896","volume":"37","author":"E Chow","year":"2015","unstructured":"Chow, E., Patel, A.: Fine-grained parallel incomplete LU factorization. SIAM J. Sci. Comput. 37(2), C169\u2013C193 (2015)","journal-title":"SIAM J. Sci. Comput."},{"key":"676_CR16","first-page":"291","volume":"19","author":"E Coleman","year":"2018","unstructured":"Coleman, E., Sosonkina, M.: Self-stabilizing fine-grained parallel incomplete LU factorization. Sustain. Comput. Inf. Syst. 19, 291\u2013304 (2018)","journal-title":"Sustain. Comput. Inf. Syst."},{"key":"676_CR17","doi-asserted-by":"crossref","unstructured":"Coleman, E., Jensen, E.J., Sosonkina, M.: Impacts of three soft-fault models on hybrid parallel asynchronous iterative methods. In: 2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD), IEEE, pp. 458\u2013465 (2018)","DOI":"10.1109\/CAHPC.2018.8645942"},{"key":"676_CR18","doi-asserted-by":"crossref","unstructured":"Elliott, J., Hoemmen, M., Mueller, F.: Evaluating the impact of SDC on the GMRES iterative solver. In: Parallel and Distributed Processing Symposium, 2014 IEEE 28th International, IEEE, pp. 1193\u20131202 (2014a)","DOI":"10.1109\/IPDPS.2014.123"},{"key":"676_CR19","unstructured":"Elliott, J., Hoemmen, M., Mueller, F.: Resilience in numerical methods: a position on fault models and methodologies (2014b). arXiv:1401.3013"},{"key":"676_CR20","doi-asserted-by":"crossref","unstructured":"Elliott, J., Hoemmen, M., Mueller, F.: A Numerical Soft Fault Model for Iterative Linear Solvers. In: Proceedings of the 24nd International Symposium on High-Performance Parallel and Distributed Computing (2015)","DOI":"10.1145\/2749246.2749254"},{"issue":"1","key":"676_CR21","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1016\/S0377-0427(00)00409-X","volume":"123","author":"A Frommer","year":"2000","unstructured":"Frommer, A., Szyld, D.B.: On asynchronous iterations. J. Comput. Appl. Math. 123(1), 201\u2013216 (2000)","journal-title":"J. Comput. Appl. Math."},{"issue":"8","key":"676_CR22","doi-asserted-by":"publisher","first-page":"5084","DOI":"10.1007\/s11227-019-02784-y","volume":"75","author":"EJ Jensen","year":"2019","unstructured":"Jensen, E.J., Coleman, E., Sosonkina, M.: Predictive modeling of the performance of asynchronous iterative methods. J. Supercomput. 75(8), 5084\u20135105 (2019)","journal-title":"J. Supercomput."},{"issue":"3","key":"676_CR23","doi-asserted-by":"publisher","first-page":"1517","DOI":"10.1007\/s11227-011-0563-y","volume":"59","author":"F Jezequel","year":"2012","unstructured":"Jezequel, F., Couturier, R., Denis, C.: Solving large sparse linear systems in a grid environment: the gremlins code versus the petsc library. J. Supercomput. 59(3), 1517\u20131532 (2012)","journal-title":"J. Supercomput."},{"issue":"4","key":"676_CR24","doi-asserted-by":"publisher","first-page":"819","DOI":"10.1109\/TPDS.2017.2780856","volume":"29","author":"F Magoul\u00e8s","year":"2017","unstructured":"Magoul\u00e8s, F., Gbikpi-Benissan, G.: Distributed convergence detection based on global residual error under asynchronous iterations. IEEE Trans. Parallel Distributed Syst. 29(4), 819\u2013829 (2017)","journal-title":"IEEE Trans. Parallel Distributed Syst."},{"key":"676_CR25","unstructured":"Magoules, F., Szyld, D.B., Venet, C.: Asynchronous optimized Schwarz methods with and without overlap. Numerische Mathematik pp 1\u201329 (2015)"},{"issue":"2","key":"676_CR26","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1016\/j.cam.2007.09.019","volume":"219","author":"J Miellou","year":"2008","unstructured":"Miellou, J., Spiteri, P., El Baz, D.: A new stopping criterion for linear perturbed asynchronous iterations. J. Comput. Appl. Math. 219(2), 471\u2013483 (2008)","journal-title":"J. Comput. Appl. Math."},{"issue":"3","key":"676_CR27","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1093\/imanum\/dri005","volume":"25","author":"JC Miellou","year":"2005","unstructured":"Miellou, J.C., Spiteri, P., El Baz, D.: Stopping criteria, forward and backward errors for perturbed asynchronous linear fixed point methods in finite precision. IMA J. Numer. Anal. 25(3), 429\u2013442 (2005)","journal-title":"IMA J. Numer. Anal."},{"key":"676_CR28","unstructured":"Recht, B., Re, C., Wright, S., Niu, F.: Hogwild: A lock-free approach to parallelizing stochastic gradient descent. In: Advances in neural information processing systems, pp. 693\u2013701 (2011)"},{"key":"676_CR29","doi-asserted-by":"crossref","unstructured":"Sao, P., Vuduc, R.: Self-stabilizing iterative solvers. In: Proceedings of the Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, pp. 1\u20138 (2013)","DOI":"10.1145\/2530268.2530272"},{"issue":"1","key":"676_CR30","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1016\/0167-8191(95)00059-3","volume":"22","author":"SA Savar\u00ed","year":"1996","unstructured":"Savar\u00ed, S.A., Bertsekas, D.P.: Finite termination of asynchronous iterative algorithms. Parallel Comput. 22(1), 39\u201356 (1996)","journal-title":"Parallel Comput."},{"key":"676_CR31","doi-asserted-by":"crossref","unstructured":"Shantharam, M., Srinivasmurthy, S., Raghavan, P.: Characterizing the impact of soft errors on iterative methods in scientific computing. In: Proceedings of the International Conference on Supercomputing, ACM, pp. 152\u2013161 (2011)","DOI":"10.1145\/1995896.1995922"},{"key":"676_CR32","doi-asserted-by":"crossref","unstructured":"Shantharam, M., Srinivasmurthy, S., Raghavan, P.: Fault tolerant preconditioned conjugate gradient for sparse linear system solution. In: Proceedings of the 26th ACM International Conference on Supercomputing, ACM, pp. 69\u201378 (2012)","DOI":"10.1145\/2304576.2304588"},{"key":"676_CR33","doi-asserted-by":"crossref","unstructured":"Sloan, J., Kumar, R., Bronevetsky, G.: Algorithmic approaches to low overhead fault detection for sparse linear algebra. In: Dependable Systems and Networks (DSN), 2012 42nd Annual IEEE\/IFIP International Conference on, IEEE, pp. 1\u201312 (2012)","DOI":"10.1109\/DSN.2012.6263938"},{"key":"676_CR34","first-page":"38","volume":"13","author":"P Spiteri","year":"2002","unstructured":"Spiteri, P., Miellou, J.C., El Baz, D.: Perturbation of parallel asynchronous linear iterations by floating point errors. Electron. Trans. Numer. Anal. 13, 38\u201355 (2002)","journal-title":"Electron. Trans. Numer. Anal."},{"issue":"5","key":"676_CR35","doi-asserted-by":"publisher","first-page":"C532","DOI":"10.1137\/140991406","volume":"37","author":"M Stoyanov","year":"2015","unstructured":"Stoyanov, M., Webster, C.: Numerical analysis of fixed point algorithms in the presence of hardware faults. SIAM J. Sci. Comput. 37(5), C532\u2013C553 (2015)","journal-title":"SIAM J. Sci. Comput."},{"key":"676_CR36","doi-asserted-by":"crossref","unstructured":"Wolfson-Pou, J., Chow, E.: Distributed southwell: an iterative method with low communication costs. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201313 (2017)","DOI":"10.1145\/3126908.3126966"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-020-00676-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10766-020-00676-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-020-00676-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,8]],"date-time":"2021-09-08T23:50:02Z","timestamp":1631145002000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10766-020-00676-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,9]]},"references-count":36,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2021,2]]}},"alternative-id":["676"],"URL":"https:\/\/doi.org\/10.1007\/s10766-020-00676-w","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"type":"print","value":"0885-7458"},{"type":"electronic","value":"1573-7640"}],"subject":[],"published":{"date-parts":[[2020,9,9]]},"assertion":[{"value":"25 June 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 August 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}