{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:59:23Z","timestamp":1743098363661,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030483395"},{"type":"electronic","value":"9783030483401"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-48340-1_53","type":"book-chapter","created":{"date-parts":[[2020,5,28]],"date-time":"2020-05-28T23:07:41Z","timestamp":1590707261000},"page":"694-706","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Checkpointing Kernel Executions of MPI+CUDA Applications"],"prefix":"10.1007","author":[{"given":"Max","family":"Baird","sequence":"first","affiliation":[]},{"given":"Sven-Bodo","family":"Scholz","sequence":"additional","affiliation":[]},{"given":"Artjoms","family":"\u0160inkarovs","sequence":"additional","affiliation":[]},{"given":"Leonardo","family":"Bautista-Gomez","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,5,29]]},"reference":[{"key":"53_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"826","DOI":"10.1007\/978-3-030-10549-5_64","volume-title":"Euro-Par 2018: Parallel Processing Workshops","author":"M Baird","year":"2019","unstructured":"Baird, M., Fensch, C., Scholz, S.-B., \u0160inkarovs, A.: A lightweight approach to gpu resilience. In: Mencagli, G., et al. (eds.) Euro-Par 2018. LNCS, vol. 11339, pp. 826\u2013838. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-10549-5_64"},{"key":"53_CR2","doi-asserted-by":"publisher","unstructured":"Bautista-Gomez, L., Tsuboi, S., et al.: FTI: high performance fault tolerance interface for hybrid systems. In: SC 2011, pp. 1\u201312 (2011). https:\/\/doi.org\/10.1145\/2063384.2063427","DOI":"10.1145\/2063384.2063427"},{"key":"53_CR3","doi-asserted-by":"publisher","unstructured":"Duato, J., Pe\u00f1a, A.J., et al.: rCUDA: reducing the number of GPU-based accelerators in high performance clusters. In: 2010 International Conference on High Performance Computing Simulation, pp. 224\u2013231 (2010). https:\/\/doi.org\/10.1109\/HPCS.2010.5547126","DOI":"10.1109\/HPCS.2010.5547126"},{"key":"53_CR4","doi-asserted-by":"publisher","unstructured":"Garg, R., Mohan, A., et al.: CRUM: checkpoint-restart support for CUDA\u2019s unified memory. In: CLUSTER 2018, pp. 302\u2013313 (2018). https:\/\/doi.org\/10.1109\/CLUSTER.2018.00047","DOI":"10.1109\/CLUSTER.2018.00047"},{"key":"53_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1007\/978-3-642-15277-1_37","volume-title":"Euro-Par 2010 - Parallel Processing","author":"G Giunta","year":"2010","unstructured":"Giunta, G., Montella, R., Agrillo, G., Coviello, G.: A GPGPU transparent virtualization component for high performance computing clouds. In: D\u2019Ambra, P., Guarracino, M., Talia, D. (eds.) Euro-Par 2010, Part I. LNCS, vol. 6271, pp. 379\u2013391. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15277-1_37"},{"key":"53_CR6","doi-asserted-by":"publisher","unstructured":"Gupta, V., Gavrilovska, A., et al.: GViM: GPU-accelerated virtual machines. In: ACM Workshop on System-level Virtualization for High Performance Computing, pp. 17\u201324. ACM (2009), https:\/\/doi.org\/10.1145\/1519138.1519141","DOI":"10.1145\/1519138.1519141"},{"key":"53_CR7","doi-asserted-by":"publisher","first-page":"494","DOI":"10.1088\/1742-6596\/46\/1\/067","volume":"46","author":"PH Hargrove","year":"2006","unstructured":"Hargrove, P.H., Duell, J.C.: Berkeley lab checkpoint\/restart (BLCR) for linux clusters. J. Phys. Conf. Ser. 46, 494\u2013499 (2006). https:\/\/doi.org\/10.1088\/1742-6596\/46\/1\/067","journal-title":"J. Phys. Conf. Ser."},{"key":"53_CR8","doi-asserted-by":"publisher","unstructured":"Kannan, S., Farooqui, N., et al.: HeteroCheckpoint: efficient checkpointing for accelerator-based systems. In: 2014 44th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks, pp. 738\u2013743 (2014). https:\/\/doi.org\/10.1109\/DSN.2014.76","DOI":"10.1109\/DSN.2014.76"},{"key":"53_CR9","doi-asserted-by":"crossref","unstructured":"Karlin, I., et al.: LULESH Programming Model and Performance Ports Overview. Technical report. LLNL-TR-608824, December 2012. https:\/\/computing.llnl.gov\/projects\/co-design\/lulesh_ports1.pdf","DOI":"10.2172\/1059462"},{"key":"53_CR10","doi-asserted-by":"publisher","unstructured":"Lagar-Cavilla, H.A., et al.: VMM-independent graphics acceleration. In: Proceedings of the 3rd International Conference on Virtual Execution Environments, pp. 33\u201343. ACM (2007). https:\/\/doi.org\/10.1145\/1254810.1254816","DOI":"10.1145\/1254810.1254816"},{"key":"53_CR11","doi-asserted-by":"publisher","unstructured":"Nukada, A., Takizawa, H., et al.: NVCR: a transparent checkpoint-restart library for NVIDIA CUDA. In: 2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and PhD Forum, pp. 104\u2013113 (2011). https:\/\/doi.org\/10.1109\/IPDPS.2011.131","DOI":"10.1109\/IPDPS.2011.131"},{"key":"53_CR12","unstructured":"NVIDIA Corporation: NVIDIA CUDA Compute Unified Device Architecture Programming Guide version 10.1.105 (2019). https:\/\/bit.ly\/2EcQ4hN"},{"key":"53_CR13","doi-asserted-by":"publisher","unstructured":"Oikawa, M., Kawai, A., et al.: DS-CUDA: a middleware to use many GPUs in the cloud environment. In: 2012 SC Companion: High Performance Computing, Networking Storage and Analysis, pp. 1207\u20131214 (2012). https:\/\/doi.org\/10.1109\/SC.Companion.2012.146","DOI":"10.1109\/SC.Companion.2012.146"},{"key":"53_CR14","doi-asserted-by":"publisher","unstructured":"Pe\u00f1a, A.J., Bland, W., et al.: VOCL-FT: introducing techniques for efficient soft error coprocessor recovery. In: SC 2015: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312 (2015). https:\/\/doi.org\/10.1145\/2807591.2807640","DOI":"10.1145\/2807591.2807640"},{"key":"53_CR15","doi-asserted-by":"publisher","unstructured":"Pourghassemi, B., Chandramowlishwaran, A.: cudaCR: an In-Kernel application-level checkpoint\/restart scheme for CUDA-enabled GPUs. In: CLUSTER 2017, pp. 725\u2013732 (2017). https:\/\/doi.org\/10.1109\/CLUSTER.2017.100","DOI":"10.1109\/CLUSTER.2017.100"},{"issue":"6","key":"53_CR16","doi-asserted-by":"publisher","first-page":"804","DOI":"10.1109\/TC.2011.112","volume":"61","author":"L Shi","year":"2012","unstructured":"Shi, L., Chen, H., Sun, J., et al.: vCUDA: GPU-accelerated high-performance computing in virtual machines. IEEE Trans. Comput. 61(6), 804\u2013816 (2012). https:\/\/doi.org\/10.1109\/TC.2011.112","journal-title":"IEEE Trans. Comput."},{"key":"53_CR17","unstructured":"Suzuki, T., Akira Nukada, S.M.: Transparent Checkpoint and Restart Technology for CUDA applications (2016). https:\/\/bit.ly\/2DzHGbO. Accessed 25 April 2019"},{"key":"53_CR18","doi-asserted-by":"publisher","unstructured":"Takizawa, H., Sato, K., et al.: CheCUDA: a checkpoint\/restart tool for CUDA applications. In: 2009 International Conference on Parallel and Distributed Computing, Applications and Technologies, pp. 408\u2013413 (2009). https:\/\/doi.org\/10.1109\/PDCAT.2009.78","DOI":"10.1109\/PDCAT.2009.78"},{"key":"53_CR19","doi-asserted-by":"publisher","unstructured":"Takizawa, H., et al.: CheCL: transparent checkpointing and process migration of OpenCL applications. In: 2011 IEEE International, IPDPS. IEEE (2011). https:\/\/doi.org\/10.1109\/IPDPS.2011.85","DOI":"10.1109\/IPDPS.2011.85"},{"key":"53_CR20","doi-asserted-by":"publisher","unstructured":"Xu, X., et al.: HiAL-Ckpt: a hierarchical application-level checkpointing for CPU-GPU hybrid systems, pp. 1895\u20131899 (2010). https:\/\/doi.org\/10.1109\/ICCSE.2010.5593819","DOI":"10.1109\/ICCSE.2010.5593819"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2019: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-48340-1_53","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T00:14:09Z","timestamp":1716941649000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-48340-1_53"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030483395","9783030483401"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-48340-1_53","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"29 May 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"G\u00f6ttingen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/europar.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"142","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,94","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4,27","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"double blind review in two cases","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}