{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T21:14:28Z","timestamp":1769202868611,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T00:00:00Z","timestamp":1691366400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2106634, 2106635"],"award-info":[{"award-number":["2106634, 2106635"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","award":["DEAC02-06CH11357"],"award-info":[{"award-number":["DEAC02-06CH11357"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006224","name":"Argonne National Laboratory","doi-asserted-by":"publisher","award":["0F-60169"],"award-info":[{"award-number":["0F-60169"]}],"id":[{"id":"10.13039\/100006224","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,7]]},"DOI":"10.1145\/3588195.3592987","type":"proceedings-article","created":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T20:47:00Z","timestamp":1691441220000},"page":"73-85","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["GPU-Enabled Asynchronous Multi-level Checkpoint Caching and Prefetching"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8200-0148","authenticated-orcid":false,"given":"Avinash","family":"Maurya","sequence":"first","affiliation":[{"name":"Rochester Institute of Technology, Rochester, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5034-2880","authenticated-orcid":false,"given":"M. Mustafa","family":"Rafique","sequence":"additional","affiliation":[{"name":"Rochester Institute of Technology, Rochester, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2214-1173","authenticated-orcid":false,"given":"Thierry","family":"Tonellot","sequence":"additional","affiliation":[{"name":"Exploration and Petroleum Engineering Advanced Research Center, Saudi Aramco, Dhahran, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0993-3304","authenticated-orcid":false,"given":"Hussain J.","family":"AlSalem","sequence":"additional","affiliation":[{"name":"Exploration and Petroleum Engineering Advanced Research Center, Saudi Aramco, Dhahran, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7890-3934","authenticated-orcid":false,"given":"Franck","family":"Cappello","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0661-7509","authenticated-orcid":false,"given":"Bogdan","family":"Nicolae","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,8,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00023"},{"key":"e_1_3_2_1_2_1","volume-title":"Euro-Par 2020: Parallel Processing","author":"Alturhkestani Tariq","unstructured":"Tariq Alturhkestani, Hatem Ltaief, and David Keyes. 2020. Maximizing I\/O Bandwidth for Reverse Time Migration on Heterogeneous Large-Scale Systems. In Euro-Par 2020: Parallel Processing, Maciej Malawski and Krzysztof Rzadca (Eds.). Springer International Publishing, Cham, 263--278."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2019.00046"},{"key":"e_1_3_2_1_4_1","volume-title":"n.d.. Theta GPU. https:\/\/www.alcf.anl.gov\/alcf-resources\/theta. Accessed","author":"Computing Facility Argonne Leadership","year":"2023","unstructured":"Argonne Leadership Computing Facility. n.d.. Theta GPU. https:\/\/www.alcf.anl.gov\/alcf-resources\/theta. Accessed: May 9, 2023."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00046"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00017"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2016.30"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476181"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.37"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322224"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.softx.2020.100561"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1088\/1742--6596"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.50"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2012.40"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.3997\/1365--2397.fb2020015"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545090"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","author":"Lofstead Jay","year":"2016","unstructured":"Jay Lofstead, Ivo Jimenez, Carlos Maltzahn, Quincey Koziol, John Bent, and Eric Barton. 2016. DAOS and Friends: A Proposal for an Exascale Storage System. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (Salt Lake City, Utah) (SC '16). IEEE Press, Article 50, 12 pages."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC56025.2022.00043"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS53633.2021.9614284"},{"key":"e_1_3_2_1_20_1","volume-title":"Fine-Grained DNN Checkpointing. In 19th USENIX Conference on File and Storage Technologies (FAST 21)","author":"Mohan Jayashree","year":"2021","unstructured":"Jayashree Mohan, Amar Phanishayee, and Vijay Chidambaram. 2021. CheckFreq: Frequent, Fine-Grained DNN Checkpointing. In 19th USENIX Conference on File and Storage Technologies (FAST 21). USENIX Association, 203--216. https:\/\/www.usenix.org\/conference\/fast21\/presentation\/mohan"},{"key":"e_1_3_2_1_21_1","unstructured":"Dmitriy Monozov and Zarija Lukie. 2016. Henson v1.0. https:\/\/www.osti.gov\/servlets\/purl\/1312559. https:\/\/www.osti.gov\/biblio\/1312559 HENSON; 004707WKSTN00."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.18"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/LDAV.2016.7874307"},{"key":"e_1_3_2_1_24_1","volume-title":"Computational Science -- ICCS","author":"Krishna Narayanan Sri Hari","year":"2022","unstructured":"Sri Hari Krishna Narayanan, Thomas Propson, Marcelo Bongarti, Jan H\u00fcckelheim, and Paul Hovland. 2022. Reducing Memory Requirements of\u00a0Quantum Optimal Control. In Computational Science -- ICCS 2022, Derek Groen, Cl\u00e9lia de Mulatier, Maciej Paszynski, Valeria V. Krzhizhanovskaya, Jack J. Dongarra, and Peter M. A. Sloot (Eds.). Springer International Publishing, Cham, 129--142."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00099"},{"key":"e_1_3_2_1_26_1","volume-title":"VELOC: VEry Low Overhead Checkpointing in the Age of Exascale. CoRR","author":"Nicolae Bogdan","year":"2021","unstructured":"Bogdan Nicolae, Adam Moody, Gregory Kosinovsky, Kathryn Mohror, and Franck Cappello. 2021. VELOC: VEry Low Overhead Checkpointing in the Age of Exascale. CoRR, Vol. abs\/2103.02131 (2021)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.131"},{"key":"e_1_3_2_1_28_1","volume-title":"n.d.. NVIDIA DGX A100 System Architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/dgx-a100\/dgxa100-system-architecture-white-paper.pdf. Accessed","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. n.d.. NVIDIA DGX A100 System Architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/dgx-a100\/dgxa100-system-architecture-white-paper.pdf. Accessed: May 6, 2023."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid49817.2020.00--69"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503224"},{"key":"e_1_3_2_1_31_1","volume-title":"Automatic Differentiation in PyTorch. In NIPS Workshop on Autodiff. USA.","author":"Paszke Adam","year":"2017","unstructured":"Adam Paszke, Sam Gross, Soumith Chintala, Gregory Chanan, Edward Yang, Zachary DeVito, Zeming Lin, Alban Desmaison, Luca Antiga, and Adam Lerer. 2017. Automatic Differentiation in PyTorch. In NIPS Workshop on Autodiff. USA."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1365--246X.2006.02978.x"},{"key":"e_1_3_2_1_33_1","volume-title":"WORKS'22: 17th Workshop on Workflows in Support of Large-Scale Science (in conjunction with SC'22)","author":"Pouchard Line C.","year":"2022","unstructured":"Line C. Pouchard, Tanzima Z. Islam, Bogdan Nicolae, and Robert Ross. 2022. A (meta)data framework for reproducing hybrid workflows with FAIR. In WORKS'22: 17th Workshop on Workflows in Support of Large-Scale Science (in conjunction with SC'22). Dallas, USA."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2021.03.004"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 2003 Linux symposium","volume":"2003","author":"Philip","unstructured":"Philip Schwan et al. 2003. Lustre: Building a file system for 1000-node clusters. In Proceedings of the 2003 Linux symposium, Vol. 2003. 380--386."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2983632"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080214"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611974693.13"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRIS.2019.00128"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/QCS56647.2022.00016"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2012.26"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.earscirev.2018.02.008"},{"key":"e_1_3_2_1_44_1","volume-title":"Importance of data loading pipeline in training DNNs. arXiv preprint arXiv:2005.02130","author":"Zolnouri Mahdi","year":"2020","unstructured":"Mahdi Zolnouri, Xinlin Li, and Vahid Partovi Nia. 2020. Importance of data loading pipeline in training DNNs. arXiv preprint arXiv:2005.02130 (2020)."}],"event":{"name":"HPDC '23: The 32nd International Symposium on High-Performance Parallel and Distributed Computing","location":"Orlando FL USA","acronym":"HPDC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3588195.3592987","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3588195.3592987","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3588195.3592987","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:25Z","timestamp":1750178845000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3588195.3592987"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,7]]},"references-count":43,"alternative-id":["10.1145\/3588195.3592987","10.1145\/3588195"],"URL":"https:\/\/doi.org\/10.1145\/3588195.3592987","relation":{},"subject":[],"published":{"date-parts":[[2023,8,7]]},"assertion":[{"value":"2023-08-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}