{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T21:06:39Z","timestamp":1773349599469,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T00:00:00Z","timestamp":1699747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624255","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1738-1747","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Implementation-Oblivious Transparent Checkpoint-Restart for MPI"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7014-411X","authenticated-orcid":false,"given":"Yao","family":"Xu","sequence":"first","affiliation":[{"name":"Northeastern University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3256-3341","authenticated-orcid":false,"given":"Leonid","family":"Belyaev","sequence":"additional","affiliation":[{"name":"Northeastern University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9576-7391","authenticated-orcid":false,"given":"Twinkle","family":"Jain","sequence":"additional","affiliation":[{"name":"Northeastern University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8438-5144","authenticated-orcid":false,"given":"Derek","family":"Schafer","sequence":"additional","affiliation":[{"name":"University of New Mexico, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5252-6600","authenticated-orcid":false,"given":"Anthony","family":"Skjellum","sequence":"additional","affiliation":[{"name":"Tennessee Tech University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2175-3848","authenticated-orcid":false,"given":"Gene","family":"Cooperman","sequence":"additional","affiliation":[{"name":"Northeastern University, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5161063"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Deborah Bard Cory Snavely Lisa Gerhardt Jason Lee Becci Totzke Katie Antypas William Arndt Johannes Blaschke Suren Byna Ravi Cheema 2022. The LBNL superfacility project report. Technical Report. U.S. Department of Energy Office of Scientific and Technical Information (OSTI); and Lawrence Bekeley National Laboratory (LBNL).","DOI":"10.2172\/1875256"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063427"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/0010-4655(95)00042-E"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI.2015.22"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342013488238"},{"key":"e_1_3_2_2_7_1","unstructured":"Johannes\u00a0P Blaschke Aaron\u00a0S Brewster Daniel\u00a0W Paley Derek Mendez Asmit Bhowmick Nicholas\u00a0K Sauter Wilko Kr\u00f6ger Murali Shankar Bjoern Enders and Deborah Bard. 2021. Real-time XFEL data analysis at SLAC and NERSC: a trial run of nascent exascale experimental data analysis. Technical Report."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1080\/08940886.2023.2245700"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342006067469"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/781498.781513"},{"key":"e_1_3_2_2_11_1","volume-title":"System-level Scalable Checkpoint-Restart for Petascale Computing. In 22nd IEEE Int. Conf. on Parallel and Distributed Systems (ICPADS\u201916)","author":"Cao Jiajun","year":"2016","unstructured":"Jiajun Cao, Kapil Arya, Rohan Garg, Shawn Matott, Dhabaleswar\u00a0K. Panda, Hari Subramoni, J\u00e9\u00f4me Vienne, and Gene Cooperman. 2016. System-level Scalable Checkpoint-Restart for Petascale Computing. In 22nd IEEE Int. Conf. on Parallel and Distributed Systems (ICPADS\u201916). IEEE Press, 932\u2013941."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2600212.2600219"},{"key":"e_1_3_2_2_13_1","volume-title":"First International Symposium on Checkpointing for Supercomputing (SuperCheck\u201921)","author":"Chouhan Prashant\u00a0Singh","year":"2021","unstructured":"Prashant\u00a0Singh Chouhan, Harsh Khetawat, Neil Resnik, Twinkle Jain, Rohan Garg, Gene Cooperman, Rebecca Hartman\u2013Baker, and Zhengji Zhao. 2021. Improving scalability and reliability of MPI-agnostic transparent checkpointing for production workloads at NERSC (extended abstract). In First International Symposium on Checkpointing for Supercomputing (SuperCheck\u201921). Berkeley, CA, 1\u20133. https:\/\/arxiv.org\/abs\/2103.08546; from https:\/\/supercheck.lbl.gov\/resources."},{"key":"e_1_3_2_2_14_1","unstructured":"Cray. 2014. Understanding Communication and MPI on Cray XC40. https:\/\/www.hpc.kaust.edu.sa\/sites\/default\/files\/files\/public\/\/KSL\/150607-Cray_training\/3.05_cray_mpi.pdf"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00039"},{"key":"e_1_3_2_2_16_1","volume-title":"A New Metric for Ranking High-performance Computing Systems. National Science Review","author":"Dongarra Jack","year":"2016","unstructured":"Jack Dongarra, Michael\u00a0A Heroux, and Piotr Luszczek. 2016. A New Metric for Ranking High-performance Computing Systems. National Science Review (2016), 30\u201335. (benchmark at https:\/\/www.hpcg-benchmark.org\/)."},{"key":"e_1_3_2_2_17_1","volume-title":"Automation of NERSC Application Usage Report. In 2020 IEEE\/ACM International Workshop on HPC User Support Tools (HUST) and Workshop on Programming and Performance Visualization Tools (ProTools)","author":"Driscoll Benjamin","unstructured":"Benjamin Driscoll and Zhengji Zhao. 2020. Automation of NERSC Application Usage Report. In 2020 IEEE\/ACM International Workshop on HPC User Support Tools (HUST) and Workshop on Programming and Performance Visualization Tools (ProTools). IEEE, 10\u201318."},{"key":"e_1_3_2_2_18_1","volume-title":"Int. Conf. on Parallel Processing (ICPP\u201906)","author":"Gao Qi","year":"2006","unstructured":"Qi Gao, Weikuan Yu, Wei Huang, and Dhabaleswar\u00a0K. Panda. 2006. Application-Transparent Checkpoint\/Restart for MPI Programs over InfiniBand. In Int. Conf. on Parallel Processing (ICPP\u201906). 471\u2013478."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3325962"},{"key":"e_1_3_2_2_20_1","volume-title":"Experiences with cross-facility real-time light source data analysis workflows. In 2021 IEEE\/ACM HPC for Urgent Decision Making (UrgentHPC)","author":"Giannakou Anna","unstructured":"Anna Giannakou, Johannes\u00a0P Blaschke, Deborah Bard, and Lavanya Ramakrishnan. 2021. Experiences with cross-facility real-time light source data analysis workflows. In 2021 IEEE\/ACM HPC for Urgent Decision Making (UrgentHPC). IEEE, 45\u201353."},{"key":"e_1_3_2_2_21_1","volume-title":"A Comparison of Application Performance Using Open MPI and Cray MPI","author":"Graham L","year":"2007","unstructured":"Richard\u00a0L Graham, George Bosilca, and Jelena Pje\u0161ivac-Grbovic. 2007. A Comparison of Application Performance Using Open MPI and Cray MPI. Cray Users Group (CUG\u201907) (2007), 10\u00a0pages."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/11752578_29"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"William Gropp and Ewing Lusk. 1996. User\u2019s guide for MPICH a portable implementation of MPI.","DOI":"10.2172\/378911"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.21057"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/46\/1\/067"},{"key":"e_1_3_2_2_26_1","unstructured":"Hewlett Packard Enterprise. 2017. Aries High-Speed Network. https:\/\/pubs.cray.com\/bundle\/Urika-GX_Hardware_Guide_H-6142_Rev_C_Urika-GX_HW_Guide_DITAval\/page\/Aries_High_Speed_Network_Urika-GX.html"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1551609.1551619"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370605"},{"key":"e_1_3_2_2_29_1","volume-title":"Technical Report.","author":"Karlin Ian","unstructured":"Ian Karlin, Jeff Keasler, and J\u00a0Robert Neely. 2013. Lulesh 2.0 updates and changes. Technical Report. Lawrence Livermore National Lab.(LLNL), Livermore, CA (United States)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356176"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342015623623"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2020.01.026"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11091369"},{"key":"e_1_3_2_2_34_1","unstructured":"Mellanox Technologies. 2015. RDMA Aware Networks Programming User Manual (Rev\u00a01.7). https:\/\/www.mellanox.com\/related-docs\/prod_software\/RDMA_Aware_Programming_user_manual.pdf"},{"key":"e_1_3_2_2_35_1","volume-title":"Co-design for molecular dynamics: An exascale proxy application. LA-UR 13-20839","author":"Mohd-Yusof Jamaludin","year":"2013","unstructured":"Jamaludin Mohd-Yusof, Sriram Swaminarayan, and Timothy\u00a0C Germann. 2013. Co-design for molecular dynamics: An exascale proxy application. LA-UR 13-20839 (2013), 88\u201389."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.18"},{"key":"e_1_3_2_2_37_1","unstructured":"NERSC [n. d.]. NERSC the primary scientific computing facility for the Office of Science in the U.S. Department of Energy. https:\/\/nersc.gov\/."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00099"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jocs.2020.101208"},{"key":"e_1_3_2_2_40_1","volume-title":"Workshop on Sustainable Software for Science: Practice and Experiences, held in conjunction with Int\u2019l Conference on Supercomputing (WSSPE). 5\u00a0pages.","author":"Panda K","year":"2013","unstructured":"Dhabaleswar\u00a0K Panda, Karen Tomko, Karl Schulz, and Amitava Majumdar. 2013. The MVAPICH project: Evolution and sustainability of an open source production quality MPI library for HPC. In Workshop on Sustainable Software for Science: Practice and Experiences, held in conjunction with Int\u2019l Conference on Supercomputing (WSSPE). 5\u00a0pages."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevC.64.024612"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2015.07.023"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2004.29"},{"key":"e_1_3_2_2_44_1","volume-title":"High Performance Computing: 6th Latin American Conference, CARLA","author":"Skjellum Anthony","year":"2019","unstructured":"Anthony Skjellum, Martin R\u00fcfenacht, Nawrin Sultana, Derek Schafer, Ignacio Laguna, and Kathryn Mohror. 2020. ExaMPI: A modern design and implementation to accelerate Message Passing Interface innovation. In High Performance Computing: 6th Latin American Conference, CARLA 2019, Turrialba, Costa Rica, September 25\u201327, 2019, Revised Selected Papers 6. Springer, 153\u2013169."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2021.108171"},{"key":"e_1_3_2_2_46_1","volume-title":"https:\/\/www.top500.org\/lists\/top500\/2021\/06\/. [Online","author":"June Supercomputers","year":"2021","unstructured":"Top500 2021. Top500 Supercomputers (June, 2021). https:\/\/www.top500.org\/lists\/top500\/2021\/06\/. [Online; accessed Aug., 2021]."},{"key":"e_1_3_2_2_47_1","volume-title":"Int. Symp. on Checkpointing for Supercomputing (SuperCheck\u2019SC-21)","author":"Xu Yao","year":"2021","unstructured":"Yao Xu, Zhengji Zhao, Rohan Garg, Harsh Khetawat, Rebecca Hartman-Baker, and Gene Cooperman. 2021. MANA-2.0: A future-Proof design for transparent checkpointing of MPI at scale. https:\/\/ieeexplore.ieee.org\/document\/9721343; technical report at https:\/\/arxiv.org\/abs\/2112.05858. In Int. Symp. on Checkpointing for Supercomputing (SuperCheck\u2019SC-21), 2021 SC Workshops Supplementary Proceedings (St. Louis, MO). IEEE, 68\u201378."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2642769.2642777"}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","location":"Denver CO USA","acronym":"SC-W 2023"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624255","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624255","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:00:55Z","timestamp":1755745255000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624255"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":48,"alternative-id":["10.1145\/3624062.3624255","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624255","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}