{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T08:03:23Z","timestamp":1776931403629,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","funder":[{"name":"U.S. Department of Energy, Office of Science, Office of Advanced Scientific Computing","award":["DE-SC-0012704"],"award-info":[{"award-number":["DE-SC-0012704"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3731599.3767461","type":"proceedings-article","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T16:13:44Z","timestamp":1762532024000},"page":"968-976","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Error Analysis of Globally Distributed Workflow Management System"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5691-6405","authenticated-orcid":false,"given":"Sankha","family":"Dutta","sequence":"first","affiliation":[{"name":"Brookhaven National Laboratory, Upton, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2129-408X","authenticated-orcid":false,"given":"Ozgur","family":"Kilic","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Brooklyn, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5792-8182","authenticated-orcid":false,"given":"Tatiana","family":"Korchuganova","sequence":"additional","affiliation":[{"name":"University of Pittsburgh, pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6848-7463","authenticated-orcid":false,"given":"Paul","family":"Nilsson","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1847-3976","authenticated-orcid":false,"given":"Sairam","family":"Sri Vatsavai","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4285-9588","authenticated-orcid":false,"given":"Kuan-Chieh","family":"Hsu","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9725-0193","authenticated-orcid":false,"given":"David K.","family":"Park","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9314-5860","authenticated-orcid":false,"given":"Joseph","family":"Boudreau","sequence":"additional","affiliation":[{"name":"University of Pittsburgh, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2681-8105","authenticated-orcid":false,"given":"Tasnuva","family":"Chowdhury","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2464-7226","authenticated-orcid":false,"given":"Feng","family":"Shengyu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8074-6156","authenticated-orcid":false,"given":"Raees","family":"Khan","sequence":"additional","affiliation":[{"name":"University of Pittsburgh, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2622-5087","authenticated-orcid":false,"given":"Jaehyung","family":"Kim","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3559-5772","authenticated-orcid":false,"given":"Scott","family":"Klasky","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0901-1817","authenticated-orcid":false,"given":"Tadashi","family":"Maeno","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7102-6388","authenticated-orcid":false,"given":"Verena Ingrid Martinez","family":"Outschoorn","sequence":"additional","affiliation":[{"name":"University of Massachusetts Amherst, Amherst, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9647-542X","authenticated-orcid":false,"given":"Norbert","family":"Podhorszki","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5750-6964","authenticated-orcid":false,"given":"Yihui","family":"Ren","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1902-1955","authenticated-orcid":false,"given":"Fr\u00e9d\u00e9ric","family":"Suter","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory, Oak Ridge, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7733-8568","authenticated-orcid":false,"given":"Wei","family":"Yang","sequence":"additional","affiliation":[{"name":"SLAC National Accelerator Lab, Menlo Park, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7438-978X","authenticated-orcid":false,"given":"Yiming","family":"Yang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4378-6448","authenticated-orcid":false,"given":"Shinjae","family":"Yoo","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2748-4829","authenticated-orcid":false,"given":"Alexei","family":"Klimentov","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7812-2946","authenticated-orcid":false,"given":"Adolfy","family":"Hoisie","sequence":"additional","affiliation":[{"name":"Brookhaven National Laboratory, Upton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"[n. d.]. Pilot error codes. https:\/\/github.com\/REDWOOD24\/data_files\/blob\/main\/pilot_error_codes_and_descriptions.json. Accessed 15 Aug 2025.."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Zulfiqar Ahmad Ali\u00a0Imran Jehangiri Nader Mohamed Mohamed Othman and Arif\u00a0Iqbal Umar. 2022. Fault tolerant and data oriented scientific workflows management and scheduling system in cloud computing. IEEE Access 10 (2022) 77614\u201377632.","DOI":"10.1109\/ACCESS.2022.3193151"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Martin Barisits Thomas Beermann Frank Berghaus Brian Bockelman Joaquin Bogado David Cameron Dimitrios Christidis Diego Ciangottini Gancho Dimitrov Markus Elsing et\u00a0al. 2019. Rucio: Scientific data management. Computing and Software for Big Science 3 (2019) 1\u201319.","DOI":"10.1007\/s41781-019-0026-3"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Ramon Canal Carles Hernandez Rafa Tornero Alessandro Cilardo Giuseppe Massari Federico Reghenzani William Fornaciari Marina Zapater David Atienza Ariel Oleksiak et\u00a0al. 2020. Predictive reliability and fault management in exascale systems: State of the art and perspectives. ACM Computing Surveys (CSUR) 53 5 (2020) 1\u201332.","DOI":"10.1145\/3403956"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Franck Cappello Geist Al William Gropp Sanjay Kale Bill Kramer and Marc Snir. 2014. Toward exascale resilience: 2014 update. Supercomputing Frontiers and Innovations: an International Journal 1 1 (2014) 5\u201328.","DOI":"10.14529\/jsfi140101"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/E-SCIENCE.2006.261077"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59051-2_4"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Sucharitha Isukapalli and Satish\u00a0Narayana Srirama. 2024. A systematic survey on fault-tolerant solutions for distributed data analytics: Taxonomy comparison and future directions. Computer Science Review 53 (2024) 100660.","DOI":"10.1016\/j.cosrev.2024.100660"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Ozgur\u00a0O Kilic David\u00a0K Park Yihui Ren Tatiana Korchuganova Sairam\u00a0Sri Vatsavai Joseph Boudreau Tasnuva Chowdhury Shengyu Feng Raees Khan Jaehyung Kim et\u00a0al. 2025. Towards an Introspective Dynamic Model of Globally Distributed Computing Infrastructures. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2506.19578 (2025).","DOI":"10.1051\/epjconf\/202533701082"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Feng Li and Fengguang Song. 2023. Efficient in-situ workflow planning for geographically distributed heterogeneous environments. Future Generation Computer Systems 149 (2023) 105\u2013121.","DOI":"10.1016\/j.future.2023.07.010"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"FH\u00a0Barreiro Megino K De A Klimentov Tadashi Maeno P Nilsson D Oleynik S Padolski S Panitkin T Wenaus ATLAS Collaboration et\u00a0al. 2017. PanDA for ATLAS distributed computing in the next decade. 898 5 (2017) 052002.","DOI":"10.1088\/1742-6596\/898\/5\/052002"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Alberto Mulone Doriana Medi\u0107 and Marco Aldinucci. 2024. A fault tolerance mechanism for hybrid scientific workflows. (2024) 141\u2013153.","DOI":"10.1007\/978-3-031-90203-1_13"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00018"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Bianca Schroeder and Garth\u00a0A Gibson. 2009. A large-scale study of failures in high-performance computing systems. IEEE transactions on Dependable and Secure Computing 7 4 (2009) 337\u2013350.","DOI":"10.1109\/TDSC.2009.4"},{"key":"e_1_3_3_1_16_2","unstructured":"Zhaozhong Shi. 2024. Heavy Flavor Physics at the sPHENIX Experiment. arxiv:https:\/\/arXiv.org\/abs\/2401.11036\u00a0[nucl-ex] https:\/\/arxiv.org\/abs\/2401.11036"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","unstructured":"Svirin Pavlo De Kaushik Forti Alessandra Klimentov Alexei Larsen Rasmus Love Peter Maeno Tadashi Mashinistov Ruslan Mukherjee Swagato Nomerotski Andrei Oleynik Danila Panitkin Sergey Park Hye Yun Sheldon Erin Slosar Anze Wells Jack and Wenaus Torre. 2019. BigPanDA: PanDA Workload Management System and its Applications beyond ATLAS. EPJ Web Conf. 214 (2019) 03050. 10.1051\/epjconf\/201921403050","DOI":"10.1051\/epjconf\/201921403050"}],"event":{"name":"SC Workshops '25: Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St Louis MO USA","acronym":"SC Workshops '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the SC '25 Workshops of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731599.3767461","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T19:28:25Z","timestamp":1767986905000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731599.3767461"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":16,"alternative-id":["10.1145\/3731599.3767461","10.1145\/3731599"],"URL":"https:\/\/doi.org\/10.1145\/3731599.3767461","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}