{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T03:59:12Z","timestamp":1754193552266,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2317698","CNS-2317751","CCF-2318937"],"award-info":[{"award-number":["CNS-2317698","CNS-2317751","CCF-2318937"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,4]]},"DOI":"10.1145\/3694715.3695979","type":"proceedings-article","created":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T19:28:18Z","timestamp":1731698898000},"page":"46-62","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Efficient Reproduction of Fault-Induced Failures in Distributed Systems with Feedback-Driven Fault Injection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5010-870X","authenticated-orcid":false,"given":"Jia","family":"Pan","sequence":"first","affiliation":[{"name":"Johns Hopkins University, Baltimore, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7114-1523","authenticated-orcid":false,"given":"Haoze","family":"Wu","sequence":"additional","affiliation":[{"name":"Johns Hopkins University, Baltimore, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9842-0403","authenticated-orcid":false,"given":"Tanakorn","family":"Leesatapornwongsa","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7813-9756","authenticated-orcid":false,"given":"Suman","family":"Nath","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6315-0848","authenticated-orcid":false,"given":"Peng","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, United States"}]}],"member":"320","published-online":{"date-parts":[[2024,11,15]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Jepsen: a framework for distributed systems verification with fault injection. https:\/\/github.com\/jepsen-io\/jepsen."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1629575.1629594"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2723711"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the 6th Symposium on Operating Systems Design and Implementation, OSDI '04","author":"Barham Paul","year":"2004","unstructured":"Paul Barham, Austin Donnelly, Rebecca Isaacs, and Richard Mortier. Using magpie for request extraction and workload modelling. In Proceedings of the 6th Symposium on Operating Systems Design and Implementation, OSDI '04, San Francisco, CA, December 2004. USENIX Association."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/2486788.2486836"},{"key":"e_1_3_2_1_6_1","unstructured":"Cory Bennett and Ariel Tseitlin. Chaos monkey released into the wild. http:\/\/techblog.netflix.com\/2012\/07\/chaos-monkey-released-into-wild.html 2009."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2976749.2978428"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2976749.2978428"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2018.02.002"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3324884.3416548"},{"key":"e_1_3_2_1_11_1","first-page":"198","volume-title":"Star: Stack trace based automatic crash reproduction via symbolic execution","author":"Chen Ning","year":"2015","unstructured":"Ning Chen and Sunghun Kim. Star: Stack trace based automatic crash reproduction via symbolic execution. pages 198--220, 2015."},{"key":"e_1_3_2_1_12_1","first-page":"1701","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Chen Yinfang","year":"2023","unstructured":"Yinfang Chen, Xudong Sun, Suman Nath, Ze Yang, and Tianyin Xu. {Push-Button} reliability testing for {Cloud-Backed} applications with rainmaker. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23), pages 1701--1716, 2023."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1060289.1060309"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/3358807.3358809"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/24039.24041"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2018.00040"},{"key":"e_1_3_2_1_17_1","first-page":"149","volume-title":"Proceedings of the 15th Usenix Conference on File and Storage Technologies, FAST '17","author":"Ganesan Aishwarya","year":"2017","unstructured":"Aishwarya Ganesan, Ramnatthan Alagappan, Andrea C. Arpaci-Dusseau, and Remzi H. Arpaci-Dusseau. Redundancy does not imply fault tolerance: Analysis of distributed storage reactions to single errors and corruptions. In Proceedings of the 15th Usenix Conference on File and Storage Technologies, FAST '17, page 149--165, USA, 2017. USENIX Association."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3542929.3563482"},{"key":"e_1_3_2_1_19_1","first-page":"238","volume-title":"Proceedings of the 8th USENIX Conference on Networked Systems Design and Implementation, NSDI'11","author":"Gunawi Haryadi S.","year":"2011","unstructured":"Haryadi S. Gunawi, Thanh Do, Pallavi Joshi, Peter Alvaro, Joseph M. Hellerstein, Andrea C. Arpaci-Dusseau, Remzi H. Arpaci-Dusseau, Koushik Sen, and Dhruba Borthakur. FATE and DESTINI: A framework for cloud recovery testing. In Proceedings of the 8th USENIX Conference on Networked Systems Design and Implementation, NSDI'11, pages 238--252, Berkeley, CA, USA, 2011. USENIX Association."},{"key":"e_1_3_2_1_20_1","first-page":"193","volume-title":"Proceedings of the 8th USENIX Conference on Operating Systems Design and Implementation, OSDI '08","author":"Guo Zhenyu","year":"2008","unstructured":"Zhenyu Guo, Xi Wang, Jian Tang, Xuezheng Liu, Zhilei Xu, Ming Wu, M. Frans Kaashoek, and Zheng Zhang. R2: An application-level kernel for record and replay. In Proceedings of the 8th USENIX Conference on Operating Systems Design and Implementation, OSDI '08, page 193--208, USA, 2008. USENIX Association."},{"key":"e_1_3_2_1_21_1","volume-title":"Shutdown of WAL stuck at waitforsafepoint. https:\/\/issues.apache.org\/jira\/browse\/HBASE-25905","author":"Base Apache","year":"2021","unstructured":"Apache HBase. Shutdown of WAL stuck at waitforsafepoint. https:\/\/issues.apache.org\/jira\/browse\/HBASE-25905, 2021."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/2.585157"},{"key":"e_1_3_2_1_23_1","first-page":"141","volume-title":"Proceedings of the 34th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI '13","author":"Huang Jeff","year":"2013","unstructured":"Jeff Huang, Charles Zhang, and Julian Dolby. Clap: recording local executions to reproduce concurrency failures. In Proceedings of the 34th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI '13, page 141--152, New York, NY, USA, 2013. Association for Computing Machinery."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3102980.3103005"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/2337223.2337279"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2048066.2048082"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the 4th USENIX Symposium on Networked Systems Design & Implementation, NSDI '07","author":"Killian Charles","year":"2007","unstructured":"Charles Killian, James W. Anderson, Ranjit Jhala, and Amin Vahdat. Life, death, and the critical transition: Finding liveness bugs in systems code. In Proceedings of the 4th USENIX Symposium on Networked Systems Design & Implementation, NSDI '07. USENIX Association, April 2007."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1736020.1736031"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3558956"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3317550.3321438"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3177161"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3192366.3192380"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2043556.2043587"},{"key":"e_1_3_2_1_34_1","first-page":"559","volume-title":"17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20)","author":"Lou Chang","year":"2020","unstructured":"Chang Lou, Peng Huang, and Scott Smith. Understanding, detecting and localizing partial failures in large system software. In 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20), pages 559--574, Santa Clara, CA, February 2020. USENIX Association."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359645"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2009.5270313"},{"key":"e_1_3_2_1_37_1","first-page":"379","volume-title":"2009 IEEE\/IFIP International Conference on Dependable Systems Networks, DSN '09","author":"Paul","year":"2009","unstructured":"Paul D. Marinescu and George Candea. LFI: A practical and general library-level fault injector. In 2009 IEEE\/IFIP International Conference on Dependable Systems Networks, DSN '09, pages 379--388, June 2009."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037751"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3487005"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/1394608.1382146"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.5555\/1855741.1855760"},{"key":"e_1_3_2_1_42_1","volume-title":"An o (nd) difference algorithm and its variations. Algorithmica, 1(1--4):251--266","author":"Myers Eugene W","year":"1986","unstructured":"Eugene W Myers. An o (nd) difference algorithm and its variations. Algorithmica, 1(1--4):251--266, 1986."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2019.00069"},{"volume-title":"The discovery of apache zookeeper's poison packet. https:\/\/www.pagerduty.com\/blog\/the-discovery-of-apache-zookeepers-poison-packet","year":"2015","key":"e_1_3_2_1_44_1","unstructured":"PagerDuty. The discovery of apache zookeeper's poison packet. https:\/\/www.pagerduty.com\/blog\/the-discovery-of-apache-zookeepers-poison-packet, 2015."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/1629575.1629593"},{"key":"e_1_3_2_1_46_1","first-page":"216","volume-title":"Proceedings of the 44th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO-44","author":"Pokam Gilles","year":"2011","unstructured":"Gilles Pokam, Cristiano Pereira, Shiliang Hu, Ali-Reza Adl-Tabatabai, Justin Gottschlich, Jungwoo Ha, and Youfeng Wu. CoreRacer: A practical memory race recorder for multicore x86 tso processors. In Proceedings of the 44th Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO-44, page 216--225, New York, NY, USA, 2011. Association for Computing Machinery."},{"key":"e_1_3_2_1_47_1","first-page":"9","volume-title":"Proceedings of the 3rd Conference on Networked Systems Design & Implementation -","volume":"3","author":"Reynolds Patrick","year":"2006","unstructured":"Patrick Reynolds, Charles Killian, Janet L. Wiener, Jeffrey C. Mogul, Mehul A. Shah, and Amin Vahdat. Pip: Detecting the unexpected in distributed systems. In Proceedings of the 3rd Conference on Networked Systems Design & Implementation - Volume 3, NSDI '06, pages 9--9, Berkeley, CA, USA, 2006. USENIX Association."},{"key":"e_1_3_2_1_48_1","first-page":"143","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Sun Xudong","year":"2022","unstructured":"Xudong Sun, Wenqing Luo, Jiawei Tyler Gu, Aishwarya Ganesan, Ramnatthan Alagappan, Michael Gasch, Lalith Suresh, and Tianyin Xu. Automatic reliability testing for cluster management controllers. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pages 143--159, Carlsbad, CA, July 2022. USENIX Association."},{"key":"e_1_3_2_1_49_1","first-page":"143","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Sun Xudong","year":"2022","unstructured":"Xudong Sun, Wenqing Luo, Jiawei Tyler Gu, Aishwarya Ganesan, Ramnatthan Alagappan, Michael Gasch, Lalith Suresh, and Tianyin Xu. Automatic reliability testing for cluster management controllers. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22), pages 143--159, 2022."},{"key":"e_1_3_2_1_50_1","first-page":"13","volume-title":"Proceedings of the 1999 Conference of the Centre for Advanced Studies on Collaborative Research, CASCON '99","author":"Vall\u00e9e-Rai Raja","unstructured":"Raja Vall\u00e9e-Rai, Phong Co, Etienne Gagnon, Laurie Hendren, Patrick Lam, and Vijay Sundaresan. Soot - a java bytecode optimization framework. In Proceedings of the 1999 Conference of the Centre for Advanced Studies on Collaborative Research, CASCON '99, page 13. IBM Press, 1999."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1950365.1950370"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.5555\/800078.802557"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.5555\/3691825.3691895"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/1736020.1736038"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/1755913.1755946"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/1755913.1755946"},{"key":"e_1_3_2_1_57_1","volume-title":"The fuzzing book","author":"Zeller Andreas","year":"2019","unstructured":"Andreas Zeller, Rahul Gopinath, Marcel B\u00f6hme, Gordon Fraser, and Christian Holler. The fuzzing book, 2019."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2851237"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132747.3132768"},{"key":"e_1_3_2_1_60_1","volume-title":"Fuzzing: a survey for roadmap. ACM Computing Surveys (CSUR), 54(11s):1--36","author":"Zhu Xiaogang","year":"2022","unstructured":"Xiaogang Zhu, Sheng Wen, Seyit Camtepe, and Yang Xiang. Fuzzing: a survey for roadmap. ACM Computing Surveys (CSUR), 54(11s):1--36, 2022."}],"event":{"name":"SOSP '24: ACM SIGOPS 30th Symposium on Operating Systems Principles","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","USENIX"],"location":"Austin TX USA","acronym":"SOSP '24"},"container-title":["Proceedings of the ACM SIGOPS 30th Symposium on Operating Systems Principles"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3694715.3695979","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3694715.3695979","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3694715.3695979","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:05:48Z","timestamp":1750291548000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3694715.3695979"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":60,"alternative-id":["10.1145\/3694715.3695979","10.1145\/3694715"],"URL":"https:\/\/doi.org\/10.1145\/3694715.3695979","relation":{},"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"2024-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}