{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T22:40:07Z","timestamp":1756334407365,"version":"3.44.0"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030856649"},{"type":"electronic","value":"9783030856656"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-85665-6_28","type":"book-chapter","created":{"date-parts":[[2021,8,28]],"date-time":"2021-08-28T03:06:52Z","timestamp":1630120012000},"page":"451-465","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards High Performance Resilience Using Performance Portable Abstractions"],"prefix":"10.1007","author":[{"given":"Nicolas","family":"Morales","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Keita","family":"Teranishi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bogdan","family":"Nicolae","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Trott","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Franck","family":"Cappello","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,8,25]]},"reference":[{"key":"28_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"826","DOI":"10.1007\/978-3-030-10549-5_64","volume-title":"Euro-Par 2018: Parallel Processing Workshops","author":"M Baird","year":"2019","unstructured":"Baird, M., Fensch, C., Scholz, S.-B., \u0160inkarovs, A.: A lightweight approach to GPU resilience. In: Mencagli, G., et al. (eds.) Euro-Par 2018. LNCS, vol. 11339, pp. 826\u2013838. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-10549-5_64"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Bautista-Gomez, L., Tsuboi, S., Komatitsch, D., Cappello, F., Maruyama, N., Matsuoka, S.: FTI: high performance fault tolerance interface for hybrid systems. In: SC 2011: The 2011 ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis, Seattle, USA, pp. 32:1\u201332:32 (2011)","DOI":"10.1145\/2063384.2063427"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"De Kruijf, M., Nomura, S., Sankaralingam, K.: Relax: an architectural framework for software recovery of hardware faults. In: ACM SIGARCH Computer Architecture News, vol. 38, pp. 497\u2013508. ACM (2010)","DOI":"10.1145\/1816038.1816026"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Di Martino, C., Kalbarczyk, Z., Iyer, R.K., Baccanico, F., Fullop, J., Kramer, W.: Lessons learned from the analysis of system failures at petascale: the case of blue waters. In: 2014 44th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks, pp. 610\u2013621. IEEE (2014)","DOI":"10.1109\/DSN.2014.62"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Di Martino, C., Kramer, W., Kalbarczyk, Z., Iyer, R.: Measuring and understanding extreme-scale application resilience: a field study of 5,000,000 HPC application runs. In: 2015 45th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks, pp. 25\u201336. IEEE (2015)","DOI":"10.1109\/DSN.2015.50"},{"issue":"2","key":"28_CR6","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/s10586-011-0162-y","volume":"15","author":"C Docan","year":"2012","unstructured":"Docan, C., Parashar, M., Klasky, S.: Dataspaces: an interaction and coordination framework for coupled simulation workflows. Cluster Comput. 15(2), 163\u2013181 (2012)","journal-title":"Cluster Comput."},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Duan, S., et al.: Scalable data resilience for in-memory data staging. In: 2018 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 105\u2013115. IEEE (2018)","DOI":"10.1109\/IPDPS.2018.00021"},{"issue":"12","key":"28_CR8","doi-asserted-by":"publisher","first-page":"3202","DOI":"10.1016\/j.jpdc.2014.07.003","volume":"74","author":"HC Edwards","year":"2014","unstructured":"Edwards, H.C., Trott, C.R., Sunderland, D.: Kokkos: enabling manycore performance portability through polymorphic memory access patterns. J. Parallel Distrib. Comput. 74(12), 3202\u20133216 (2014)","journal-title":"J. Parallel Distrib. Comput."},{"key":"28_CR9","doi-asserted-by":"crossref","unstructured":"Gamell, M., Katz, D.S., Kolla, H., Chen, J., Klasky, S., Parashar, M.: Exploring automatic, online failure recovery for scientific applications at extreme scales. In: SC 2014: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 895\u2013906. IEEE (2014)","DOI":"10.1109\/SC.2014.78"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Gamell, M., et al.: Evaluating online global recovery with fenix using application-aware in-memory checkpointing techniques. In: 2016 45th International Conference on Parallel Processing Workshops (ICPPW), pp. 346\u2013355. IEEE (2016)","DOI":"10.1109\/ICPPW.2016.56"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Gamell, M., et al.: Local recovery and failure masking for stencil-based applications at extreme scales. In: SC 2015: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312. IEEE (2015)","DOI":"10.1145\/2807591.2807672"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Gamell, M., Van der Wijngaart, R.F., Teranishi, K., Parashar, M.: Specification of fenix MPI fault tolerance library version 1.0. Tech. rep., Technical Report SAND2016-9171, Sandia National Laboratories, Livermore, CA (2016)","DOI":"10.2172\/1561495"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Hornung, R.D., Keasler, J.A.: The RAJA portability layer: overview and status. Tech. rep., Lawrence Livermore National Lab. (LLNL), Livermore, CA (United States) (2014)","DOI":"10.2172\/1169830"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Hukerikar, S., Engelmann, C.: Resilience design patterns: a structured approach to resilience at extreme scale. Tech. Rep. ORNL\/TM-2016\/767, Oak Ridge National Laboratory, Oak Ridge, TN, USA (2016)","DOI":"10.2172\/1338552"},{"issue":"3","key":"28_CR15","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1177\/1094342015623623","volume":"30","author":"I Laguna","year":"2016","unstructured":"Laguna, I., et al.: Evaluating and extending user-level fault tolerance in MPI applications. Int. J. High Perform. Comput. Appl. 30(3), 305\u2013319 (2016)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"28_CR16","doi-asserted-by":"publisher","first-page":"450","DOI":"10.1016\/j.future.2018.09.041","volume":"91","author":"N Losada","year":"2019","unstructured":"Losada, N., Bosilca, G., Bouteiller, A., Gonz\u00e1lez, P., Mart\u00edn, M.J.: Local rollback for resilient MPI applications with application-level checkpointing and message logging. Future Gener. Comput. Syst. 91, 450\u2013464 (2019)","journal-title":"Future Gener. Comput. Syst."},{"key":"28_CR17","doi-asserted-by":"crossref","unstructured":"Martsinkevich, T., Subasi, O., Unsal, O., Cappello, F., Labarta, J.: Fault-tolerant protocol for hybrid task-parallel message-passing applications. In: 2015 IEEE International Conference on Cluster Computing, pp. 563\u2013570. IEEE (2015)","DOI":"10.1109\/CLUSTER.2015.104"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Moody, A., Bronevetsky, G., Mohror, K., De Supinski, B.R.: Design, modeling, and evaluation of a scalable multi-level checkpointing system. In: SC 2010: Proceedings of the 2010 ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201311. IEEE (2010)","DOI":"10.1109\/SC.2010.18"},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Nicolae, B., Moody, A., Gonsiorowski, E., Mohror, K., Cappello, F.: VeloC: towards high performance adaptive asynchronous checkpointing at large scale (2019)","DOI":"10.1109\/IPDPS.2019.00099"},{"key":"28_CR20","unstructured":"Silveira, A., \u00c1vila, R.B., Barreto, M.E., Navaux, P.O.A.: DPC++: object-oriented programming applied to cluster computing. In: Arabnia, H.R. (ed.) Proceedings of the International Conference on Parallel and Distributed Processing Techniques and Applications, PDPTA 2000, 24\u201329 June, 2000, Las Vegas, Nevada, USA. CSREA Press (2000)"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Subasi, O., Arias, J., Unsal, O., Labarta, J., Cristal, A.: Nanocheckpoints: a task-based asynchronous dataflow framework for efficient and scalable checkpoint\/restart. In: 2015 23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing, pp. 99\u2013102. IEEE (2015)","DOI":"10.1109\/PDP.2015.17"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Teranishi, K., Heroux, M.A.: Toward local failure local recovery resilience model using MPI-ULFM. In: Proceedings of the 21st European Mpi Users\u2019 Group Meeting, p. 51. ACM (2014)","DOI":"10.1145\/2642769.2642774"},{"key":"28_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/978-3-030-29400-7_4","volume-title":"Euro-Par 2019: Parallel Processing","author":"S-M Tseng","year":"2019","unstructured":"Tseng, S.-M., Nicolae, B., Bosilca, G., Jeannot, E., Chandramowlishwaran, A., Cappello, F.: Towards portable online prediction of network utilization using MPI-level monitoring. In: Yahyapour, R. (ed.) Euro-Par 2019. LNCS, vol. 11725, pp. 47\u201360. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-29400-7_4"},{"key":"28_CR24","unstructured":"Van Der Wijngaart, R.I., Gamell, M.R.U., Teranishi, K., Valenzuela, E., Heroux, M.A., Parashaar, M.R.U.: Fenix; a portable flexible fault tolerance programming framework for MPI applications. Tech. rep., Sandia National Lab. (SNL-NM), Albuquerque, NM (United States) (2016)"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2021: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-85665-6_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T22:03:06Z","timestamp":1756332186000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-85665-6_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030856649","9783030856656"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-85665-6_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"25 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lisbon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2021.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"136","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"38","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}