{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:43:41Z","timestamp":1772120621640,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Qiyuan Lab Innovation Fund","award":["S20210201079"],"award-info":[{"award-number":["S20210201079"]}]},{"name":"ByteDance University Research Project"},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072006, 92167104"],"award-info":[{"award-number":["62072006, 92167104"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Foundation of Shuanghu Laboratory","award":["2024JK15"],"award-info":[{"award-number":["2024JK15"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3672051","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:54:55Z","timestamp":1724561695000},"page":"141-152","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["FaultInsight: Interpreting Hyperscale Data Center Host Faults"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0366-0410","authenticated-orcid":false,"given":"Tingzhu","family":"Bi","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3965-2949","authenticated-orcid":false,"given":"Zhang","family":"Yang","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4139-1477","authenticated-orcid":false,"given":"Yicheng","family":"Pan","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4334-5159","authenticated-orcid":false,"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1963-2513","authenticated-orcid":false,"given":"Meng","family":"Ma","sequence":"additional","affiliation":[{"name":"Peking University &amp; Shuanghu Laboratory, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1591-0480","authenticated-orcid":false,"given":"Xinrui","family":"Jiang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4714-2634","authenticated-orcid":false,"given":"Linlin","family":"Han","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3981-8163","authenticated-orcid":false,"given":"Feng","family":"Wang","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7332-0449","authenticated-orcid":false,"given":"Xian","family":"Liu","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8854-2079","authenticated-orcid":false,"given":"Ping","family":"Wang","sequence":"additional","affiliation":[{"name":"Peking University &amp; Shuanghu Laboratory, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18)","author":"Arzani Behnaz","year":"2018","unstructured":"Behnaz Arzani, Selim Ciraci, Luiz Chamon, Yibo Zhu, Hongqiang Harry Liu, Jitu Padhye, Boon Thau Loo, and Geoff Outhred. 2018. 007: Democratically finding the cause of packet drops. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18). 419--435."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107638"},{"key":"e_1_3_2_2_3_1","volume-title":"An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271","author":"Bai Shaojie","year":"2018","unstructured":"Shaojie Bai, J Zico Kolter, and Vladlen Koltun. 2018. An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271 (2018)."},{"key":"e_1_3_2_2_4_1","volume-title":"Site reliability engineering: How Google runs production systems. \" O'Reilly Media","author":"Beyer Betsy","unstructured":"Betsy Beyer, Chris Jones, Jennifer Petoff, and Niall Richard Murphy. 2016. Site reliability engineering: How Google runs production systems. \" O'Reilly Media, Inc.\"."},{"key":"e_1_3_2_2_5_1","volume-title":"David K Rensin, Kent Kawahara, and Stephen Thorne.","author":"Beyer Betsy","year":"2018","unstructured":"Betsy Beyer, Niall Richard Murphy, David K Rensin, Kent Kawahara, and Stephen Thorne. 2018. The site reliability workbook: practical ways to implement SRE. \" O'Reilly Media, Inc.\"."},{"key":"e_1_3_2_2_6_1","volume-title":"The anatomy of a large-scale hypertextual web search engine. Computer networks and ISDN systems","author":"Brin Sergey","year":"1998","unstructured":"Sergey Brin and Lawrence Page. 1998. The anatomy of a large-scale hypertextual web search engine. Computer networks and ISDN systems, Vol. 30, 1--7 (1998), 107--117."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2014.6848128"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-Companion.2019.00023"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAC.2019.00032"},{"key":"e_1_3_2_2_10_1","volume-title":"Graph drawing by force-directed placement. Software: Practice and experience","author":"Fruchterman Thomas MJ","year":"1991","unstructured":"Thomas MJ Fruchterman and Edward M Reingold. 1991. Graph drawing by force-directed placement. Software: Practice and experience, Vol. 21, 11 (1991), 1129--1164."},{"key":"e_1_3_2_2_11_1","volume-title":"Investigating causal relations by econometric models and cross-spectral methods. Econometrica: journal of the Econometric Society","author":"Granger Clive WJ","year":"1969","unstructured":"Clive WJ Granger. 1969. Investigating causal relations by econometric models and cross-spectral methods. Econometrica: journal of the Econometric Society (1969), 424--438."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1002\/for.3980030207"},{"key":"e_1_3_2_2_13_1","first-page":"31158","article-title":"Root Cause Analysis of Failures in Microservices through Causal Discovery","volume":"35","author":"Ikram Azam","year":"2022","unstructured":"Azam Ikram, Sarthak Chakraborty, Subrata Mitra, Shiv Saini, Saurabh Bagchi, and Murat Kocaoglu. 2022. Root Cause Analysis of Failures in Microservices through Causal Discovery. Advances in Neural Information Processing Systems, Vol. 35 (2022), 31158--31170.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_14_1","volume-title":"Economy statistical recurrent units for inferring nonlinear granger causality. arXiv preprint arXiv:1911.09879","author":"Khanna Saurabh","year":"2019","unstructured":"Saurabh Khanna and Vincent YF Tan. 2019. Economy statistical recurrent units for inferring nonlinear granger causality. arXiv preprint arXiv:1911.09879 (2019)."},{"key":"e_1_3_2_2_15_1","volume-title":"Root cause detection in a service-oriented architecture. ACM SIGMETRICS Performance Evaluation Review","author":"Kim Myunghwan","year":"2013","unstructured":"Myunghwan Kim, Roshan Sumbaly, and Sam Shah. 2013. Root cause detection in a service-oriented architecture. ACM SIGMETRICS Performance Evaluation Review (2013), 93--104."},{"key":"e_1_3_2_2_16_1","series-title":"SIAM review","volume-title":"A survey of eigenvector methods for web information retrieval","author":"Langville Amy N","year":"2005","unstructured":"Amy N Langville and Carl D Meyer. 2005. A survey of eigenvector methods for web information retrieval. SIAM review, Vol. 47, 1 (2005), 135--161."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3040980"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539041"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549092"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-03596-9_1"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-SEIP52600.2021.00043"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSRE.2019.00014"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623374"},{"key":"e_1_3_2_2_24_1","volume-title":"ServiceRank: Root Cause Identification of Anomaly in Large-Scale Microservice Architecture","author":"Ma Meng","year":"2021","unstructured":"Meng Ma, Weilan Lin, Disheng Pan, and Ping Wang. 2021. ServiceRank: Root Cause Identification of Anomaly in Large-Scale Microservice Architecture. IEEE Transactions on Dependable and Secure Computing (2021)."},{"key":"e_1_3_2_2_25_1","volume-title":"AutoMAP: Diagnose Your Microservice-based Web Applications Automatically. In WWW '20: The Web Conference 2020","author":"Ma Meng","year":"2020","unstructured":"Meng Ma, Jingmin Xu, Yuan Wang, Pengfei Chen, Zonghua Zhang, and Ping Wang. 2020. AutoMAP: Diagnose Your Microservice-based Web Applications Automatically. In WWW '20: The Web Conference 2020, Taipei, Taiwan, April 20--24, 2020. ACM \/ IW3C2, 246--258."},{"key":"e_1_3_2_2_26_1","volume-title":"Interpretable Models for Granger Causality Using Self-explaining Neural Networks. In International Conference on Learning Representations.","author":"Marcinkevivcs Rivcards","year":"2020","unstructured":"Rivcards Marcinkevivcs and Julia E Vogt. 2020. Interpretable Models for Granger Causality Using Self-explaining Neural Networks. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICST.2018.00034"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/343374"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.3390\/make1010019"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460319.3464805"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.2000153"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.1981.234519"},{"key":"e_1_3_2_2_34_1","volume-title":"Detecting and quantifying causal associations in large nonlinear time series datasets. Science advances","author":"Runge Jakob","year":"2019","unstructured":"Jakob Runge, Peer Nowack, Marlene Kretschmer, Seth Flaxman, and Dino Sejdinovic. 2019. Detecting and quantifying causal associations in large nonlinear time series datasets. Science advances, Vol. 5, 11 (2019), eaau4996."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1530-9290.2008.00015.x"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-92bf1922-011"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313653"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-statistics-040120-010930"},{"key":"e_1_3_2_2_39_1","volume-title":"prediction, and search","author":"Spirtes Peter","unstructured":"Peter Spirtes, Clark N Glymour, Richard Scheines, and David Heckerman. 2000. Causation, prediction, and search. MIT press."},{"key":"e_1_3_2_2_40_1","volume-title":"Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research, Vol. 15, 1 (2014), 1929--1958."},{"key":"e_1_3_2_2_41_1","first-page":"4267","article-title":"Neural granger causality","volume":"44","author":"Tank Alex","year":"2021","unstructured":"Alex Tank, Ian Covert, Nicholas Foti, Ali Shojaie, and Emily B Fox. 2021. Neural granger causality. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 8 (2021), 4267--4279.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3135974.3135977"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.2202\/1557-4679.1008"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2553070.2553079"},{"key":"e_1_3_2_2_45_1","volume-title":"CloudRanger: Root Cause Identification for Cloud Native Systems. In 18th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing, CCGRID 2018","author":"Wang Ping","year":"2018","unstructured":"Ping Wang, Jingmin Xu, Meng Ma, Weilan Lin, Disheng Pan, Yuan Wang, and Pengfei Chen. 2018. CloudRanger: Root Cause Identification for Cloud Native Systems. In 18th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing, CCGRID 2018, Washington, DC, USA, May 1--4, 2018. IEEE Computer Society, 492--502."},{"key":"e_1_3_2_2_46_1","volume-title":"MicroRCA: Root Cause Localization of Performance Issues in Microservices. In NOMS 2020 - IEEE\/IFIP Network Operations and Management Symposium","author":"Wu Li","year":"2020","unstructured":"Li Wu, Johan Tordsson, Erik Elmroth, and Odej Kao. 2020. MicroRCA: Root Cause Localization of Performance Issues in Microservices. In NOMS 2020 - IEEE\/IFIP Network Operations and Management Symposium, Budapest, Hungary, April 20--24, 2020. IEEE, 1--9."},{"key":"e_1_3_2_2_47_1","volume-title":"Real-Time Diagnosis of Configuration Errors for Software of AI Server Infrastructure","author":"Xu Guangquan","year":"2023","unstructured":"Guangquan Xu, Xinru Ding, Sihan Xu, Yan Jia, Shaoying Liu, Shicheng Feng, and Xi Zheng. 2023. Real-Time Diagnosis of Configuration Errors for Software of AI Server Infrastructure. IEEE Transactions on Dependable and Secure Computing (2023)."},{"key":"e_1_3_2_2_48_1","volume-title":"8th USENIX Symposium on Networked Systems Design and Implementation (NSDI 11)","author":"Yu Minlan","year":"2011","unstructured":"Minlan Yu, Albert Greenberg, Dave Maltz, Jennifer Rexford, Lihua Yuan, Srikanth Kandula, and Changhoon Kim. 2011. Profiling network performance for multi-tier data center applications. In 8th USENIX Symposium on Networked Systems Design and Implementation (NSDI 11)."},{"key":"e_1_3_2_2_49_1","volume-title":"Proceedings, Part I 13","author":"Zeiler Matthew D","year":"2014","unstructured":"Matthew D Zeiler and Rob Fergus. 2014. Visualizing and understanding convolutional networks. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part I 13. Springer, 818--833."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3672051","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3672051","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:23Z","timestamp":1750291463000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3672051"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":48,"alternative-id":["10.1145\/3637528.3672051","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3672051","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}