{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T06:16:31Z","timestamp":1778134591720,"version":"3.51.4"},"reference-count":22,"publisher":"SAGE Publications","issue":"3","license":[{"start":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T00:00:00Z","timestamp":1777075200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"},{"start":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T00:00:00Z","timestamp":1777075200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/journals.sagepub.com\/page\/policies\/text-and-data-mining-license"}],"funder":[{"DOI":"10.13039\/100006206","name":"Biological and Environmental Research","doi-asserted-by":"publisher","award":["DE-AC02-06CH11357"],"award-info":[{"award-number":["DE-AC02-06CH11357"]}],"id":[{"id":"10.13039\/100006206","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Work at Argonne National Laboratory","award":["DE-AC02-06CH11357"],"award-info":[{"award-number":["DE-AC02-06CH11357"]}]},{"name":"Work at Lawrence Livermore National","award":["DE-AC52-07NA27344"],"award-info":[{"award-number":["DE-AC52-07NA27344"]}]},{"name":"Work at Oak Ridge National Laboratory","award":["DE-AC05-00OR22725"],"award-info":[{"award-number":["DE-AC05-00OR22725"]}]}],"content-domain":{"domain":["journals.sagepub.com"],"crossmark-restriction":true},"short-container-title":["The International Journal of High Performance Computing Applications"],"published-print":{"date-parts":[[2026,5]]},"abstract":"<jats:p>\n                    We report on our experiences replicating 7.3 petabytes (PB) of Earth System Grid Federation (ESGF) computational simulation data from Lawrence Livermore National Laboratory (LLNL) in California to Argonne National Laboratory (ANL) in Illinois and Oak Ridge National Laboratory (ORNL) in Tennessee\u2014a task motivated by a need for increased reliability, capacity, and performance. This task presented significant challenges: the need to move 29 million files twice under time pressure from aging storage hardware; a source file system bottleneck limiting throughput to 1.5\u00a0GB\/s; frequent site maintenance windows; and the need for complete reliability at scale. We addressed these challenges using a simple replication tool that invoked Globus to transfer large bundles of files while tracking progress in a database, dynamically rerouting transfers to work around maintenance periods and file system limitations. Under the covers, Globus organized transfers to make efficient use of the high-speed Energy Sciences network (ESnet) and the data transfer nodes deployed at participating sites, and also addressed security, integrity checking, and recovery from a variety of transient failures. This success demonstrates the considerable benefits that can accrue from the adoption of performant data replication infrastructure. The replication tool is available at\n                    <jats:ext-link xmlns:xlink=\"http:\/\/www.w3.org\/1999\/xlink\" ext-link-type=\"uri\" xlink:href=\"https:\/\/github.com\/esgf2-us\/data-replication-tools\">https:\/\/github.com\/esgf2-us\/data-replication-tools<\/jats:ext-link>\n                    .\n                  <\/jats:p>","DOI":"10.1177\/10943420261441742","type":"journal-article","created":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T09:49:56Z","timestamp":1777110596000},"page":"421-433","update-policy":"https:\/\/doi.org\/10.1177\/sage-journals-update-policy","source":"Crossref","is-referenced-by-count":0,"title":["Automated, reliable, and efficient continental-scale replication of 7.3 petabytes of computational simulation data: A case study"],"prefix":"10.1177","volume":"40","author":[{"given":"Lukasz","family":"Lacinski","sequence":"first","affiliation":[{"name":"The University of Chicago"},{"name":"Argonne National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lee","family":"Liming","sequence":"additional","affiliation":[{"name":"The University of Chicago"},{"name":"Argonne National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Steven","family":"Turoscy","sequence":"additional","affiliation":[{"name":"The University of Chicago"},{"name":"Argonne National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cameron","family":"Harr","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kyle","family":"Chard","sequence":"additional","affiliation":[{"name":"The University of Chicago"},{"name":"Argonne National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eli","family":"Dart","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Paul J.","family":"Durack","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sasha","family":"Ames","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Forrest M.","family":"Hoffman","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2129-5269","authenticated-orcid":false,"given":"Ian T.","family":"Foster","sequence":"additional","affiliation":[{"name":"The University of Chicago"},{"name":"Argonne National Laboratory"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"179","published-online":{"date-parts":[[2026,4,25]]},"reference":[{"key":"e_1_3_5_2_1","first-page":"54","volume-title":"November","author":"Allcock W","year":"2005","unstructured":"Allcock W, Bresnahan J, Kettimuthu R, et al. (2005) The globus striped GridFTP framework and server ACM\/IEEE Conference on Supercomputing. November. IEEE Computer Society, 11-14, 54."},{"key":"e_1_3_5_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s41781-019-0026-3"},{"key":"e_1_3_5_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.842745"},{"key":"e_1_3_5_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2949550.2949554"},{"key":"e_1_3_5_6_1","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.144"},{"key":"e_1_3_5_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2023.01.010"},{"key":"e_1_3_5_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2013.07.002"},{"key":"e_1_3_5_9_1","unstructured":"Dart E Allcock W Bhimji W et al. (2021) \u201cThe petascale DTN project: high performance data transfer for HPC facilities\u201d. In: Preprint arXiv:2105. 12880."},{"key":"e_1_3_5_10_1","doi-asserted-by":"crossref","unstructured":"Dart E Rotman L Tierney B et al. (2013) The science DMZ: a network design pattern for data-intensive science In: International Conference on High Performance Computing Networking Storage and Analysis November 17-21 pp. 1\u201310. ACM. https:\/\/doi.org\/10.1145\/2503210.2503245","DOI":"10.1145\/2503210.2503245"},{"key":"e_1_3_5_11_1","volume-title":"ESnet Requirements Review Program Through the IRI Lens: A Meta-Analysis of Workflow Patterns Across DOE Office of Science Programs. Tech. Rep","author":"Dart E","year":"2023","unstructured":"Dart E, Zurawski J, Hawk C, et al. (2023) ESnet Requirements Review Program Through the IRI Lens: A Meta-Analysis of Workflow Patterns Across DOE Office of Science Programs. Tech. Rep. Lawrence Berkeley National Laboratory (LBNL). Available at: https:\/\/escholarship.org\/uc\/item\/9fg8k5xh."},{"key":"e_1_3_5_12_1","doi-asserted-by":"publisher","DOI":"10.2172\/1463030"},{"key":"e_1_3_5_13_1","doi-asserted-by":"publisher","DOI":"10.5194\/gmd-9-1937-2016"},{"key":"e_1_3_5_14_1","doi-asserted-by":"publisher","DOI":"10.2172\/1643759"},{"key":"e_1_3_5_15_1","doi-asserted-by":"crossref","unstructured":"Liu Z Kettimuthu R Foster I et al. (2018). \u201cCross-geography scientific data transferring trends and behavior\u201d. In: 27th International Symposium on High-Performance Parallel and Distributed Computing July 15-18 pp. 267\u2013278. ACM. https:\/\/www.osti.gov\/servlets\/purl\/1468117.Tempe Arizona:ACM. https:\/\/doi.org\/10.1145\/3208040.3208053. isbn: 9781450357852.","DOI":"10.1145\/3208040.3208053"},{"key":"e_1_3_5_16_1","article-title":"Integrated Research Infrastructure architecture blueprint activity (final report 2023)","author":"Miller W","year":"2023","unstructured":"Miller W, Bard D, Boehnlein A, et al. (2023) Integrated Research Infrastructure architecture blueprint activity (final report 2023). Tech. rep. US Department of Energy, Office of Science. Available at: https:\/\/doi.org\/10.2172\/1984466","journal-title":"Tech. rep. US Department of Energy, Office of Science."},{"key":"e_1_3_5_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672262"},{"key":"e_1_3_5_18_1","doi-asserted-by":"publisher","DOI":"10.5194\/gmd-14-629-2021"},{"key":"e_1_3_5_19_1","unstructured":"Schmuck F Haskin R (2002) GPFS: a shared-disk file system for large computing clusters In: Conference on File and Storage Technologies January 28-30. USENIX."},{"key":"e_1_3_5_20_1","doi-asserted-by":"publisher","DOI":"10.1175\/BAMS-D-11-00094.1"},{"key":"e_1_3_5_21_1","doi-asserted-by":"publisher","DOI":"10.1175\/BAMS-D-12-00204.1"},{"key":"e_1_3_5_22_1","doi-asserted-by":"publisher","DOI":"10.1175\/2008BAMS2459.1"},{"key":"e_1_3_5_23_1","volume-title":"Biological and Environmental Research Network Requirements Review Final Report. Tech. Rep","author":"Zurawski J","year":"2023","unstructured":"Zurawski J, Dart E, Harlan Z, et al. (2023) Biological and Environmental Research Network Requirements Review Final Report. Tech. Rep. Lawrence Berkeley National Laboratory (LBNL). Available at: https:\/\/escholarship.org\/uc\/item\/3mz7h3mm."}],"container-title":["The International Journal of High Performance Computing Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/journals.sagepub.com\/doi\/pdf\/10.1177\/10943420261441742","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/journals.sagepub.com\/doi\/full-xml\/10.1177\/10943420261441742","content-type":"application\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/journals.sagepub.com\/doi\/pdf\/10.1177\/10943420261441742","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T05:46:18Z","timestamp":1778132778000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/10.1177\/10943420261441742"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,25]]},"references-count":22,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,5]]}},"alternative-id":["10.1177\/10943420261441742"],"URL":"https:\/\/doi.org\/10.1177\/10943420261441742","relation":{},"ISSN":["1094-3420","1741-2846"],"issn-type":[{"value":"1094-3420","type":"print"},{"value":"1741-2846","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,4,25]]}}}