{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T00:47:59Z","timestamp":1759366079325,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":75,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFE0204100"],"award-info":[{"award-number":["2024YFE0204100"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"US National Science Foundation","award":["CNS-2403254","CNS-2330831","CNS-210683"],"award-info":[{"award-number":["CNS-2403254","CNS-2330831","CNS-210683"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,13]]},"DOI":"10.1145\/3731569.3764832","type":"proceedings-article","created":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T12:43:24Z","timestamp":1759322604000},"page":"286-304","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Orthrus: Efficient and Timely Detection of Silent User Data Corruption in the Cloud with Resource-Adaptive Computation Validation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8591-0402","authenticated-orcid":false,"given":"Chenxiao","family":"Liu","sequence":"first","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1076-2091","authenticated-orcid":false,"given":"Zhenting","family":"Zhu","sequence":"additional","affiliation":[{"name":"UCLA, Los Angeles, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1191-8675","authenticated-orcid":false,"given":"Quanxi","family":"Li","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6111-7901","authenticated-orcid":false,"given":"Yanwen","family":"Xia","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3651-6973","authenticated-orcid":false,"given":"Yifan","family":"Qiao","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8472-6157","authenticated-orcid":false,"given":"Xiangyun","family":"Deng","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6214-5390","authenticated-orcid":false,"given":"Youyou","family":"Lu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6731-216X","authenticated-orcid":false,"given":"Tao","family":"Xie","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2491-7679","authenticated-orcid":false,"given":"Huimin","family":"Cui","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7603-4210","authenticated-orcid":false,"given":"Zidong","family":"Du","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4737-2146","authenticated-orcid":false,"given":"Harry","family":"Xu","sequence":"additional","affiliation":[{"name":"UCLA, Los Angeles, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1451-3101","authenticated-orcid":false,"given":"Chenxi","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Meta CacheLib. https:\/\/cachelib.org."},{"key":"e_1_3_2_2_2_1","volume-title":"http:\/\/memcached.org","author":"Memcached","year":"2020","unstructured":"Memcached - a distributed memory object caching system. http:\/\/memcached.org, 2020."},{"key":"e_1_3_2_2_3_1","volume-title":"https:\/\/github.com\/brianfrankcooper\/YCSB\/","author":"YCSB","year":"2021","unstructured":"YCSB - Yahoo! cloud serving benchmark. https:\/\/github.com\/brianfrankcooper\/YCSB\/, 2021."},{"key":"e_1_3_2_2_4_1","volume-title":"multi-core key-value store. https:\/\/github.com\/kohler\/masstree-beta","author":"A","year":"2023","unstructured":"A fast, multi-core key-value store. https:\/\/github.com\/kohler\/masstree-beta, 2023."},{"key":"e_1_3_2_2_5_1","volume-title":"persistent key-value store for fast storage. https:\/\/github.com\/facebook\/rocksdb\/","author":"DB","year":"2023","unstructured":"RocksDB - a library that provides an embeddable, persistent key-value store for fast storage. https:\/\/github.com\/facebook\/rocksdb\/, 2023."},{"key":"e_1_3_2_2_6_1","volume-title":"https:\/\/llvm.org\/docs\/LangRef.html","author":"The LLVM","year":"2024","unstructured":"The LLVM compiler infrastructure. https:\/\/llvm.org\/docs\/LangRef.html, 2024."},{"key":"e_1_3_2_2_7_1","volume-title":"https:\/\/github.com\/redis","author":"Redis","year":"2024","unstructured":"Redis - an in-memory data structures server. https:\/\/github.com\/redis, 2024."},{"key":"e_1_3_2_2_8_1","volume-title":"https:\/\/github.com\/memcached\/memcached","author":"Memcached The","year":"2025","unstructured":"The GitHub repo of an open-source Memcached. https:\/\/github.com\/memcached\/memcached, 2025."},{"key":"e_1_3_2_2_9_1","volume-title":"Checking object integrity in Amazon S3. https:\/\/docs.aws.amazon.com\/AmazonS3\/latest\/userguide\/checking-object-integrity.html","year":"2025","unstructured":"Amazon. Checking object integrity in Amazon S3. https:\/\/docs.aws.amazon.com\/AmazonS3\/latest\/userguide\/checking-object-integrity.html, 2025."},{"key":"e_1_3_2_2_10_1","volume-title":"Proc. ACM Program. Lang., 8(PLDI)","author":"Anand Aditya","year":"2024","unstructured":"Aditya Anand, Solai Adithya, Swapnil Rustagi, Priyam Seth, Vijay Sundaresan, Daryl Maier, V. Krishna Nandivada, and Manas Thakur. Optimistic stack allocation and dynamic heapification for managed runtimes. Proc. ACM Program. Lang., 8(PLDI), June 2024."},{"key":"e_1_3_2_2_11_1","volume-title":"Personal communication with Alibaba Cloud","author":"Anonymous","year":"2025","unstructured":"Anonymous. Personal communication with Alibaba Cloud, 2025."},{"key":"e_1_3_2_2_12_1","volume-title":"https:\/\/cassandra.apache.org","author":"Cassandra Apache","year":"2021","unstructured":"Apache. Apache Cassandra. https:\/\/cassandra.apache.org, 2021."},{"key":"e_1_3_2_2_13_1","first-page":"620","volume-title":"Proceedings of the 12th USENIX Conference on Networked Systems Design and Implementation, NSDI'15","author":"Behrens Diogo","unstructured":"Diogo Behrens, Marco Serafini, Sergei Arnautov, Flavio P. Junqueira, and Christof Fetzer. Scalable error isolation for distributed systems. In Proceedings of the 12th USENIX Conference on Networked Systems Design and Implementation, NSDI'15, pages 605\u2013620."},{"key":"e_1_3_2_2_14_1","first-page":"768","volume-title":"Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI'20","author":"Berg Benjamin","year":"2020","unstructured":"Benjamin Berg, Daniel S. Berger, Sara McAllister, Isaac Grosof, Sathya Gunasekar, Jimmy Lu, Michael Uhlar, Jim Carrig, Nathan Beckmann, Mor Harchol-Balter, and Gregory R. Ganger. The CacheLib caching engine: Design and experiences at scale. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI'20, pages 753\u2013768, November 2020."},{"key":"e_1_3_2_2_15_1","volume-title":"Proceedings of the 8th USENIX Symposium on Networked Systems Design and Implementation, NSDI'11","author":"Bolosky William J.","year":"2011","unstructured":"William J. Bolosky, Dexter Bradshaw, Randolph B. Haagens, Norbert P. Kusters, and Peng Li. Paxos replicated state machines as the basis of a High-Performance data store. In Proceedings of the 8th USENIX Symposium on Networked Systems Design and Implementation, NSDI'11, March 2011."},{"key":"e_1_3_2_2_16_1","first-page":"98","volume-title":"Proceedings of the 21st International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS'16","author":"Bornholt James","year":"2016","unstructured":"James Bornholt, Antoine Kaufmann, Jialin Li, Arvind Krishnamurthy, Emina Torlak, and Xi Wang. Specifying and checking file system crash-consistency models. In Proceedings of the 21st International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS'16, pages 83\u201398, 2016."},{"key":"e_1_3_2_2_17_1","first-page":"350","volume-title":"Proceedings of the 7th Symposium on Operating Systems Design and Implementation, OSDI'06","author":"Burrows Mike","year":"2006","unstructured":"Mike Burrows. The chubby lock service for loosely-coupled distributed systems. In Proceedings of the 7th Symposium on Operating Systems Design and Implementation, OSDI'06, page 335\u2013350, 2006."},{"key":"e_1_3_2_2_18_1","first-page":"286","volume-title":"Proceedings of the 26th Symposium on Operating Systems Principles, SOSP'17","author":"Chen Haogang","year":"2017","unstructured":"Haogang Chen, Tej Chajed, Alex Konradi, Stephanie Wang, Atalay undefinedleri, Adam Chlipala, M. Frans Kaashoek, and Nickolai Zeldovich. Verifying a high-performance crash-safe file system using a tree specification. In Proceedings of the 26th Symposium on Operating Systems Principles, SOSP'17, pages 270\u2013286, 2017."},{"key":"e_1_3_2_2_19_1","first-page":"37","volume-title":"Proceedings of the 25th Symposium on Operating Systems Principles, SOSP'15","author":"Chen Haogang","year":"2015","unstructured":"Haogang Chen, Daniel Ziegler, Tej Chajed, Adam Chlipala, M. Frans Kaashoek, and Nickolai Zeldovich. Using crash hoare logic for certifying the FSCQ file system. In Proceedings of the 25th Symposium on Operating Systems Principles, SOSP'15, pages 18\u201337, 2015."},{"key":"e_1_3_2_2_20_1","first-page":"56","volume-title":"Proceedings of the 1st ACM\/USENIX International Conference on Virtual Execution Environments, VEE'05","author":"Click Cliff","year":"2005","unstructured":"Cliff Click, Gil Tene, and Michael Wolf. The pauseless gc algorithm. In Proceedings of the 1st ACM\/USENIX International Conference on Virtual Execution Environments, VEE'05, page 46\u201356, 2005."},{"key":"e_1_3_2_2_21_1","volume-title":"December","author":"Collins George E.","year":"1960","unstructured":"George E. Collins. A method for overlapping and erasure of lists. Commun. ACM, (12):655\u2013657, December 1960."},{"key":"e_1_3_2_2_22_1","first-page":"264","volume-title":"Proceedings of the 10th USENIX Symposium on Operating Systems Design and Implementation, OSDI'12","author":"Corbett James C.","year":"2012","unstructured":"James C. Corbett, Jeffrey Dean, Michael Epstein, Andrew Fikes, Christopher Frost, JJ Furman, Sanjay Ghemawat, Andrey Gubarev, Christopher Heiser, Peter Hochschild, Wilson Hsieh, Sebastian Kanthak, Eugene Kogan, Hongyi Li, Alexander Lloyd, Sergey Melnik, David Mwaura, David Nagle, Sean Quinlan, Rajesh Rao, Lindsay Rolig, Yasushi Saito, Michal Szymaniak, Christopher Taylor, Ruth Wang, and Dale Woodford. Spanner: Google's Globally-Distributed database. In Proceedings of the 10th USENIX Symposium on Operating Systems Design and Implementation, OSDI'12, pages 261\u2013264, October 2012."},{"key":"e_1_3_2_2_23_1","first-page":"41","volume-title":"Proceedings of the 2012 USENIX Conference on Annual Technical Conference, ATC'12","author":"Correia Miguel","year":"2012","unstructured":"Miguel Correia, Daniel G\u00f3mez Ferro, Flavio P. Junqueira, and Marco Serafini. Practical hardening of crash-tolerant systems. In Proceedings of the 2012 USENIX Conference on Annual Technical Conference, ATC'12, page 41, 2012."},{"key":"e_1_3_2_2_24_1","volume-title":"Reliable Computer Systems: Design and Evaluation","author":"Swarzd Robert S.","year":"1998","unstructured":"Robert S. Swarzd Daniel P. Siewiorek. Reliable Computer Systems: Design and Evaluation. 1998."},{"key":"e_1_3_2_2_25_1","volume-title":"Proceedings of the 6th Symposium on Operating Systems Design & Implementation, OSDI'04","author":"Dean Jeffrey","year":"2004","unstructured":"Jeffrey Dean and Sanjay Ghemawat. MapReduce: Simplified data processing on large clusters. In Proceedings of the 6th Symposium on Operating Systems Design & Implementation, OSDI'04, December 2004."},{"key":"e_1_3_2_2_26_1","unstructured":"Dependable Systems Lab at UBC. LLFI : an LLVM based fault injection tool. https:\/\/github.com\/DependableSystemsLab\/LLFI."},{"key":"e_1_3_2_2_27_1","first-page":"48","volume-title":"Proceedings of the 4th International Symposium on Memory Management, ISMM'04","author":"Detlefs David","year":"2004","unstructured":"David Detlefs, Christine Flood, Steve Heller, and Tony Printezis. Garbage-first garbage collection. In Proceedings of the 4th International Symposium on Memory Management, ISMM'04, page 37\u201348, 2004."},{"key":"e_1_3_2_2_28_1","volume-title":"Proc. ACM Program. Lang., 8(OOPSLA2)","author":"Ding Boyao","year":"2024","unstructured":"Boyao Ding, Qingwei Li, Yu Zhang, Fugen Tang, and Jinbao Chen. MEA2: A lightweight field-sensitive escape analysis with points-to calculation for Golang. Proc. ACM Program. Lang., 8(OOPSLA2), 2024."},{"key":"e_1_3_2_2_29_1","first-page":"984","volume-title":"Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data, SIGMOD'20","author":"Ding Jialin","year":"2020","unstructured":"Jialin Ding, Umar Farooq Minhas, Jia Yu, Chi Wang, Jaeyoung Do, Yinan Li, Hantian Zhang, Badrish Chandramouli, Johannes Gehrke, Donald Kossmann, David Lomet, and Tim Kraska. ALEX: An updatable adaptive learned index. In Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data, SIGMOD'20, page 969\u2013984, 2020."},{"key":"e_1_3_2_2_30_1","volume-title":"February","author":"Dixit Harish Dattatraya","year":"2021","unstructured":"Harish Dattatraya Dixit, Sneha Pendharkar, Matt Beadon, Chris Mason, Tejasvi Chakravarthy, Bharath Muthiah, and Sriram Sankar. Silent Data Corruptions at Scale, February 2021. arXiv.2102.11245."},{"key":"e_1_3_2_2_31_1","first-page":"626","volume-title":"Proceedings of the 32nd IEEE International Conference on Distributed Computing Systems, ICDCS'12","author":"Elliott James","year":"2012","unstructured":"James Elliott, Kishor Kharbas, David Fiala, Frank Mueller, Kurt Ferreira, and Christian Engelmann. Combining partial redundancy and checkpointing for HPC. In Proceedings of the 32nd IEEE International Conference on Distributed Computing Systems, ICDCS'12, pages 615\u2013626, 2012."},{"key":"e_1_3_2_2_32_1","volume-title":"Model-based fault injection experiments for the safety analysis of exoskeleton system. arXiv preprint arXiv:2101.01283","author":"Fabarisov Tagir","year":"2021","unstructured":"Tagir Fabarisov, Ilshat Mamaev, Andrey Morozov, and Klaus Janschek. Model-based fault injection experiments for the safety analysis of exoskeleton system. arXiv preprint arXiv:2101.01283, 2021."},{"key":"e_1_3_2_2_33_1","first-page":"76","volume-title":"Proceedings of the 46th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks Workshop, DSN-W","author":"Fang Bo","year":"2016","unstructured":"Bo Fang, Panruo Wu, Qiang Guan, Nathan DeBardeleben, Laura Monroe, Sean Blanchard, Zhizong Chen, Karthik Pattabiraman, and Matei Ripeanu. SDC is in the eye of the beholder: A survey and preliminary study. In Proceedings of the 46th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks Workshop, DSN-W, pages 72\u201376, 2016."},{"key":"e_1_3_2_2_34_1","first-page":"12","volume-title":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC'12","author":"Fiala David","year":"2012","unstructured":"David Fiala, Frank Mueller, Christian Engelmann, Rolf Riesen, Kurt Ferreira, and Ron Brightwell. Detection and correction of silent data corruption for large-scale high-performance computing. In Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC'12, pages 1\u201312, 2012."},{"key":"e_1_3_2_2_35_1","volume-title":"Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation, OSDI'20","author":"Fried Joshua","year":"2020","unstructured":"Joshua Fried, Zhenyuan Ruan, Amy Ousterhout, and Adam Belay. Caladan: mitigating interference at microsecond timescales. In Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation, OSDI'20, 2020."},{"key":"e_1_3_2_2_36_1","first-page":"14","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC'17","author":"Georgakoudis Giorgis","year":"2017","unstructured":"Giorgis Georgakoudis, Ignacio Laguna, Dimitrios S Nikolopoulos, and Martin Schulz. Refine: Realistic fault injection via compiler-based instrumentation for accuracy, portability and speed. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC'17, pages 1\u201314, 2017."},{"key":"e_1_3_2_2_37_1","first-page":"30","volume-title":"Proceedings of the 10th USENIX Symposium on Operating Systems Design and Implementation, OSDI'12","author":"Gonzalez Joseph E.","year":"2012","unstructured":"Joseph E. Gonzalez, Yucheng Low, Haijie Gu, Danny Bickson, and Carlos Guestrin. PowerGraph: Distributed Graph-Parallel computation on natural graphs. In Proceedings of the 10th USENIX Symposium on Operating Systems Design and Implementation, OSDI'12, pages 17\u201330, October 2012."},{"key":"e_1_3_2_2_38_1","unstructured":"Google. CPU Check - CPU torture test designed to find CPU hardware faults. https:\/\/github.com\/google\/cpu-check."},{"key":"e_1_3_2_2_39_1","unstructured":"Google. Silent data corruption - industrywide hardware issue impacting computer CPUs. https:\/\/support.google.com\/cloud\/answer\/10759085."},{"key":"e_1_3_2_2_40_1","volume-title":"Verifying data integrity on Google Cloud. https:\/\/cloud.google.com\/kms\/docs\/data-integrity-guidelines","year":"2025","unstructured":"Google. Verifying data integrity on Google Cloud. https:\/\/cloud.google.com\/kms\/docs\/data-integrity-guidelines, 2025."},{"key":"e_1_3_2_2_41_1","first-page":"428","volume-title":"Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS'20","author":"Gorjiara Hamed","year":"2021","unstructured":"Hamed Gorjiara, Guoqing Harry Xu, and Brian Demsky. Jaaru: efficiently model checking persistent memory programs. In Proceedings of the 26th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS'20, pages 415\u2013428, 2021."},{"key":"e_1_3_2_2_42_1","volume-title":"Proceedings of the 18th Workshop on Hot Topics in Operating Systems, HotOS'21","author":"Hochschild Peter H.","year":"2021","unstructured":"Peter H. Hochschild, Paul Jack Turner, Jeffrey C. Mogul, Rama Krishna Govindaraju, Parthasarathy Ranganathan, David E. Culler, and Amin Vahdat. Cores that don't count. In Proceedings of the 18th Workshop on Hot Topics in Operating Systems, HotOS'21, 2021."},{"key":"e_1_3_2_2_43_1","first-page":"10","volume-title":"Proceedings of the 2010 IEEE International Test Conference","author":"Hong Ted","year":"2010","unstructured":"Ted Hong, Yanjing Li, Sung-Boem Park, Diana Mui, David Lin, Ziyad Abdel Kaleq, Nagib Hakim, Helia Naeimi, Donald S. Gardner, and Subhasish Mitra. QED: Quick error detection tests for effective post-silicon validation. In Proceedings of the 2010 IEEE International Test Conference, pages 1\u201310, 2010."},{"key":"e_1_3_2_2_44_1","first-page":"14","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC'22","author":"Huang Yafan","year":"2022","unstructured":"Yafan Huang, Shengjian Guo, Sheng Di, Guanpeng Li, and Franck Cappello. Mitigating silent data corruptions in HPC applications across multiple program inputs. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC'22, pages 1\u201314, 2022."},{"key":"e_1_3_2_2_45_1","volume-title":"Xeon scalable processors, products formerly Cascade Lake. https:\/\/www.intel.com\/content\/www\/us\/en\/products\/platforms\/details\/cascade-lake.html","year":"2023","unstructured":"Intel. Xeon scalable processors, products formerly Cascade Lake. https:\/\/www.intel.com\/content\/www\/us\/en\/products\/platforms\/details\/cascade-lake.html, 2023."},{"key":"e_1_3_2_2_46_1","first-page":"46","volume-title":"Proceedings of the 9th Conference on Machine Translation","author":"Kocmi Tom","year":"2024","unstructured":"Tom Kocmi, Eleftherios Avramidis, Rachel Bawden, Ond\u0159ej Bojar, Anton Dvorkovich, Christian Federmann, Mark Fishel, Markus Freitag, Thamme Gowda, Roman Grundkiewicz, Barry Haddow, Marzena Karpinska, Philipp Koehn, Benjamin Marie, Christof Monz, Kenton Murray, Masaaki Nagata, Martin Popel, Maja Popovi\u0107, Mariya Shmatova, Steinth\u00f3r Steingr\u00edmsson, and Vil\u00e9m Zouhar. Findings of the WMT24 general machine translation shared task: The LLM era is here but MT is not solved yet. In Barry Haddow, Tom Kocmi, Philipp Koehn, and Christof Monz, editors, Proceedings of the 9th Conference on Machine Translation, pages 1\u201346, November 2024."},{"key":"e_1_3_2_2_47_1","first-page":"51","volume":"121","author":"Lamport Leslie","year":"2001","unstructured":"Leslie Lamport. Paxos made simple. ACM SIGACT News (Distributed Computing Column) 32, 4 (Whole Number 121, December 2001), pages 51\u201358, December 2001.","journal-title":"Whole Number"},{"key":"e_1_3_2_2_48_1","volume-title":"Managing data corruption in the cloud. https:\/\/www.mongodb.com\/blog\/post\/managing-data-corruption-in-the-cloud","author":"Liles Bob","year":"2024","unstructured":"Bob Liles. Managing data corruption in the cloud. https:\/\/www.mongodb.com\/blog\/post\/managing-data-corruption-in-the-cloud, 2024."},{"key":"e_1_3_2_2_49_1","first-page":"16","volume-title":"Proceedings of the 2015 IEEE International Conference on Software Quality, Reliability and Security","author":"Lu Qining","year":"2015","unstructured":"Qining Lu, Mostafa Farahani, Jiesheng Wei, Anna Thomas, and Karthik Pattabiraman. LLFI: An intermediate code-level fault injection tool for hardware faults. In Proceedings of the 2015 IEEE International Conference on Software Quality, Reliability and Security, pages 11\u201316, 2015."},{"key":"e_1_3_2_2_50_1","first-page":"107","volume-title":"Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation, PLDI'22","author":"Ma Haoran","year":"2022","unstructured":"Haoran Ma, Shi Liu, Chenxi Wang, Yifan Qiao, Michael D. Bond, Stephen M. Blackburn, Miryung Kim, and Guoqing Harry Xu. Mako: A low-pause, high-throughput evacuating collector for memory-disaggregated datacenters. In Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation, PLDI'22, pages 92\u2013107, 2022."},{"key":"e_1_3_2_2_51_1","first-page":"18","volume-title":"Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation, NSDI'22","author":"McClure Sarah","year":"2022","unstructured":"Sarah McClure, Amy Ousterhout, Scott Shenker, and Sylvia Ratnasamy. Efficient scheduling policies for Microsecond-Scale tasks. In Proceedings of the 19th USENIX Symposium on Networked Systems Design and Implementation, NSDI'22, pages 1\u201318, April 2022."},{"key":"e_1_3_2_2_52_1","first-page":"619","volume-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC'14","author":"Michalak Sarah E.","year":"2014","unstructured":"Sarah E. Michalak, William N. Rust, John T. Daly, Rew J. Dubois, and David H. Dubois. Correctness field testing of production and decommissioned high performance computing platforms at Los Alamos National Laboratory. In Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC'14, pages 609\u2013619, 2014."},{"key":"e_1_3_2_2_53_1","volume-title":"financial, and ML analysis. https:\/\/github.com\/hosseinmoein\/DataFrame","author":"Moein Hossein","year":"2020","unstructured":"Hossein Moein. C++ dataframe for statistical, financial, and ML analysis. https:\/\/github.com\/hosseinmoein\/DataFrame, 2020."},{"key":"e_1_3_2_2_54_1","first-page":"110","volume-title":"Proceedings of the 29th Annual International Symposium on Computer Architecture, ISCA'02","author":"Mukherjee S.S.","year":"2002","unstructured":"S.S. Mukherjee, M. Kontz, and S.K. Reinhardt. Detailed design and evaluation of redundant multi-threading alternatives. In Proceedings of the 29th Annual International Symposium on Computer Architecture, ISCA'02, pages 99\u2013110, 2002."},{"key":"e_1_3_2_2_55_1","first-page":"553","volume-title":"Proceedings of the 27th ACM Symposium on Operating Systems Principles, SOSP'19","author":"Navasca Christian","year":"2019","unstructured":"Christian Navasca, Cheng Cai, Khanh Nguyen, Brian Demsky, Shan Lu, Miryung Kim, and Guoqing Harry Xu. Gerenuk: thin computation over big native data using speculative program transformation. In Proceedings of the 27th ACM Symposium on Operating Systems Principles, SOSP'19, page 538\u2013553, 2019."},{"key":"e_1_3_2_2_56_1","first-page":"598","volume-title":"Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI'20","author":"Ngo Khiem","year":"2020","unstructured":"Khiem Ngo, Siddhartha Sen, and Wyatt Lloyd. Tolerating slowdowns in replicated state machines using copilots. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI'20, pages 583\u2013598, November 2020."},{"key":"e_1_3_2_2_57_1","first-page":"365","volume-title":"Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation, OSDI'16","author":"Nguyen Khanh","year":"2016","unstructured":"Khanh Nguyen, Lu Fang, Guoqing Xu, Brian Demsky, Shan Lu, Sanazsadat Alamian, and Onur Mutlu. Yak: a high-performance big-data-friendly garbage collector. In Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation, OSDI'16, page 349\u2013365, 2016."},{"key":"e_1_3_2_2_58_1","first-page":"690","volume-title":"ASPLOS'15","author":"Nguyen Khanh","year":"2015","unstructured":"Khanh Nguyen, Kai Wang, Yingyi Bu, Lu Fang, Jianfei Hu, and Guoqing Xu. FACADE: A compiler and runtime for (almost) object-bounded Big Data applications. ASPLOS'15, pages 675\u2013690, 2015."},{"key":"e_1_3_2_2_59_1","first-page":"319","volume-title":"2014 USENIX Annual Technical Conference, ATC'14","author":"Ongaro Diego","year":"2014","unstructured":"Diego Ongaro and John Ousterhout. In search of an understandable consensus algorithm. In 2014 USENIX Annual Technical Conference, ATC'14, pages 305\u2013319, June 2014."},{"key":"e_1_3_2_2_60_1","first-page":"377","volume-title":"Proceedings of the 16th USENIX Conference on Networked Systems Design and Implementation, NSDI'19","author":"Ousterhout Amy","year":"2019","unstructured":"Amy Ousterhout, Joshua Fried, Jonathan Behrens, Adam Belay, and Hari Balakrishnan. Shenango: achieving high cpu efficiency for latency-sensitive datacenter workloads. In Proceedings of the 16th USENIX Conference on Networked Systems Design and Implementation, NSDI'19, page 361\u2013377, 2019."},{"issue":"4","key":"e_1_3_2_2_61_1","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1007\/s002360050048","volume":"33","author":"O'Neil Patrick","year":"1996","unstructured":"Patrick O'Neil, Edward Cheng, Dieter Gawlick, and Elizabeth O'Neil. The log-structured merge-tree (LSM-tree). Acta Inf., 33(4):351\u2013385, June 1996.","journal-title":"Acta Inf."},{"key":"e_1_3_2_2_62_1","volume-title":"Proceedings of the 13th IEEE International Symposium on High Performance Computer Architecture, HPCA'07","author":"Ranger Colby","year":"2007","unstructured":"Colby Ranger, Ramanan Raghuraman, Arun Penmetsa, Gary Bradski, and Christos Kozyrakis. Evaluating mapreduce for multi-core and multiprocessor systems. In Proceedings of the 13th IEEE International Symposium on High Performance Computer Architecture, HPCA'07, 2007."},{"key":"e_1_3_2_2_63_1","first-page":"36","volume-title":"Proceedings of the 27th International Symposium on Computer Architecture (IEEE Cat. No.RS00201)","author":"Reinhardt S.K.","year":"2000","unstructured":"S.K. Reinhardt and S.S. Mukherjee. Transient fault detection via simultaneous multithreading. In Proceedings of the 27th International Symposium on Computer Architecture (IEEE Cat. No.RS00201), pages 25\u201336, 2000."},{"key":"e_1_3_2_2_64_1","first-page":"91","volume-title":"Digest of Papers. Twenty-Ninth Annual International Symposium on Fault-Tolerant Computing (Cat. No.99CB36352)","author":"Rotenberg E.","year":"1999","unstructured":"E. Rotenberg. AR-SMT: a microarchitectural approach to fault tolerance in microprocessors. In Digest of Papers. Twenty-Ninth Annual International Symposium on Fault-Tolerant Computing (Cat. No.99CB36352), pages 84\u201391, 1999."},{"key":"e_1_3_2_2_65_1","first-page":"2177","volume-title":"Proceedings of the 1998 IEEE International Conference on Systems, Man, and Cybernetics (Cat. No.98CH36218), SMC'98","volume":"3","author":"Saxena N.R.","year":"1998","unstructured":"N.R. Saxena and E.J. McCluskey. Dependable adaptive computing systems-the ROAR project. In Proceedings of the 1998 IEEE International Conference on Systems, Man, and Cybernetics (Cat. No.98CH36218), SMC'98, pages 2172\u20132177 vol.3, 1998."},{"key":"e_1_3_2_2_66_1","volume-title":"Torsten Hoefler. Evaluating the Cost of Atomic Operations on Modern Architectures. In Proceedings of the 24th International Conference on Parallel Architectures and Compilation, PACT'15, 10","author":"Schweizer H.","year":"2015","unstructured":"H. Schweizer, Maciej Besta, and Torsten Hoefler. Evaluating the Cost of Atomic Operations on Modern Architectures. In Proceedings of the 24th International Conference on Parallel Architectures and Compilation, PACT'15, 10 2015."},{"issue":"1","key":"e_1_3_2_2_67_1","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1147\/rd.201.0020","volume":"20","author":"Sklaroff J. R.","year":"1976","unstructured":"J. R. Sklaroff. Redundancy management technique for space shuttle computers. IBM Journal of Research and Development, 20(1):20\u201328, 1976.","journal-title":"IBM Journal of Research and Development"},{"key":"e_1_3_2_2_68_1","first-page":"1052","volume-title":"Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD'17","author":"Verbitski Alexandre","year":"2017","unstructured":"Alexandre Verbitski, Anurag Gupta, Debanjan Saha, Murali Brahmadesam, Kamal Gupta, Raman Mittal, Sailesh Krishnamurthy, Sandor Maurice, Tengiz Kharatishvili, and Xiaofeng Bao. Amazon Aurora: Design considerations for high throughput cloud-native relational databases. In Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD'17, page 1041\u20131052, 2017."},{"key":"e_1_3_2_2_69_1","first-page":"362","volume-title":"Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI'19","author":"Wang Chenxi","year":"2019","unstructured":"Chenxi Wang, Huimin Cui, Ting Cao, John Zigman, Haris Volos, Onur Mutlu, Fang Lv, Xiaobing Feng, and Guoqing Harry Xu. Panthera: Holistic memory management for big data processing over hybrid memories. In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI'19, pages 347\u2013362, 2019."},{"key":"e_1_3_2_2_70_1","first-page":"404","volume-title":"Proceedings of the 22nd International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS'17","author":"Wang Kai","year":"2017","unstructured":"Kai Wang, Aftab Hussain, Zhiqiang Zuo, Guoqing Xu, and Ardalan Amiri Sani. Graspan: A single-machine disk-based graph system for interprocedural static analyses of large-scale systems code. In Proceedings of the 22nd International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS'17, pages 389\u2013404, 2017."},{"key":"e_1_3_2_2_71_1","volume-title":"Proceedings of the 19th ACM Symposium on Operating Systems Principles, SOSP'23","author":"Wang Shaobu","year":"2023","unstructured":"Shaobu Wang, Guangyan Zhang, Junyu Wei, Yang Wang, Jiesheng Wu, and Qingchao Luo. Understanding silent data corruptions in a large production CPU population. In Proceedings of the 19th ACM Symposium on Operating Systems Principles, SOSP'23, 2023."},{"key":"e_1_3_2_2_72_1","first-page":"10","volume-title":"Proceedings of the 2nd USENIX Conference on Hot Topics in Cloud Computing, HotCloud'10","author":"Zaharia Matei","year":"2010","unstructured":"Matei Zaharia, Mosharaf Chowdhury, Michael J. Franklin, Scott Shenker, and Ion Stoica. Spark: cluster computing with working sets. In Proceedings of the 2nd USENIX Conference on Hot Topics in Cloud Computing, HotCloud'10, page 10, 2010."},{"key":"e_1_3_2_2_73_1","first-page":"649","volume-title":"Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI'20","author":"Zhang Yunhao","year":"2020","unstructured":"Yunhao Zhang, Srinath Setty, Qi Chen, Lidong Zhou, and Lorenzo Alvisi. Byzantine ordered consensus without byzantine oligarchy. In Proceedings of the 14th USENIX Symposium on Operating Systems Design and Implementation, OSDI'20, pages 633\u2013649, November 2020."},{"key":"e_1_3_2_2_74_1","first-page":"91","volume-title":"Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation, PLDI'22","author":"Zhao Wenyu","year":"2022","unstructured":"Wenyu Zhao, Stephen M. Blackburn, and Kathryn S. McKinley. Lowlatency, high-throughput garbage collection. In Proceedings of the 43rd ACM SIGPLAN International Conference on Programming Language Design and Implementation, PLDI'22, page 76\u201391, 2022."},{"key":"e_1_3_2_2_75_1","first-page":"929","volume-title":"Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation, PLDI'21","author":"Zuo Zhiqiang","year":"2021","unstructured":"Zhiqiang Zuo, Yiyu Zhang, Qiuhong Pan, Shenming Lu, Yue Li, Linzhang Wang, Xuandong Li, and Guoqing Harry Xu. Chianina: an evolving graph system for flow- and context-sensitive analyses of million lines of c code. In Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation, PLDI'21, pages 914\u2013929, 2021."}],"event":{"name":"SOSP '25: ACM SIGOPS 31st Symposium on Operating Systems Principles","location":"Lotte Hotel World Seoul Republic of Korea","acronym":"SOSP '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","USENIX"]},"container-title":["Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles"],"original-title":[],"deposited":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T12:57:02Z","timestamp":1759323422000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731569.3764832"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":75,"alternative-id":["10.1145\/3731569.3764832","10.1145\/3731569"],"URL":"https:\/\/doi.org\/10.1145\/3731569.3764832","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}