{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:39:07Z","timestamp":1772908747019,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":85,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T00:00:00Z","timestamp":1701302400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,30]]},"DOI":"10.1145\/3611643.3616316","type":"proceedings-article","created":{"date-parts":[[2023,11,30]],"date-time":"2023-11-30T23:14:38Z","timestamp":1701386078000},"page":"682-694","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Outage-Watch: Early Prediction of Outages using Extreme Event Regularizer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3290-6328","authenticated-orcid":false,"given":"Shubham","family":"Agarwal","sequence":"first","affiliation":[{"name":"Adobe Research, Bangalore, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2051-7424","authenticated-orcid":false,"given":"Sarthak","family":"Chakraborty","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4375-9776","authenticated-orcid":false,"given":"Shaddy","family":"Garg","sequence":"additional","affiliation":[{"name":"Adobe, Bangalore, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4890-6675","authenticated-orcid":false,"given":"Sumit","family":"Bisht","sequence":"additional","affiliation":[{"name":"Amazon, Bangalore, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3663-4331","authenticated-orcid":false,"given":"Chahat","family":"Jain","sequence":"additional","affiliation":[{"name":"Traceable.ai, Bangalore, India"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2170-0493","authenticated-orcid":false,"given":"Ashritha","family":"Gonuguntla","sequence":"additional","affiliation":[{"name":"Cisco, Bangalore, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6568-7104","authenticated-orcid":false,"given":"Shiv","family":"Saini","sequence":"additional","affiliation":[{"name":"Adobe Research, Bangalore, India"}]}],"member":"320","published-online":{"date-parts":[[2023,11,30]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2016. Anatomy Of An IT Outage: Prediction and Detection. https:\/\/blog.opsramp.com\/it-outage-prediction-and-detection"},{"key":"e_1_3_2_2_2_1","unstructured":"2019. Metrics That Matter - ACM Queue. https:\/\/queue.acm.org\/detail.cfm?id=3309571"},{"key":"e_1_3_2_2_3_1","unstructured":"2021. 7 Biggest Cloud Outages of the Past Year. https:\/\/techgenix.com\/7-biggest-cloud-outages-services-2021\/"},{"key":"e_1_3_2_2_4_1","volume-title":"Cloud Adoption Statistics for","year":"2022","unstructured":"2022. Cloud Adoption Statistics for 2022.. https:\/\/webtribunal.net\/blog\/cloud-adoption-statistics\/"},{"key":"e_1_3_2_2_5_1","unstructured":"2023. Grafana.. https:\/\/grafana.com\/"},{"key":"e_1_3_2_2_6_1","unstructured":"2023. Metrics collected by the CloudWatch agent - Amazon CloudWatch. https:\/\/docs.aws.amazon.com\/AmazonCloudWatch\/latest\/monitoring\/metrics-collected-by-CloudWatch-agent.html"},{"key":"e_1_3_2_2_7_1","unstructured":"2023. New Relic.. https:\/\/newrelic.com\/"},{"key":"e_1_3_2_2_8_1","unstructured":"2023. Splunk.. https:\/\/www.splunk.com\/"},{"key":"e_1_3_2_2_9_1","volume-title":"Tensorflow: Large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467.","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Ashish Agarwal, Paul Barham, Eugene Brevdo, Zhifeng Chen, Craig Citro, Greg S Corrado, Andy Davis, Jeffrey Dean, and Matthieu Devin. 2016. Tensorflow: Large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","unstructured":"Ratnadip Adhikari and R. K. Agrawal. 2013. An Introductory Study on Time Series Modeling and Forecasting. https:\/\/doi.org\/10.48550\/arXiv.1302.6613 arxiv:1302.6613. 10.48550\/arXiv.1302.6613","DOI":"10.48550\/arXiv.1302.6613"},{"key":"e_1_3_2_2_11_1","volume-title":"Extreme events in nature and society","author":"Albeverio Sergio","unstructured":"Sergio Albeverio, Volker Jentsch, and Holger Kantz. 2006. Extreme events in nature and society. Springer Science & Business Media."},{"key":"e_1_3_2_2_12_1","unstructured":"Amazon. 2023. Post-Event Summaries. Retrieved from. https:\/\/aws.amazon.com\/cn\/premiumsupport\/technology\/pes\/"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigDataCongress.2018.00044"},{"key":"e_1_3_2_2_14_1","unstructured":"Atlassian. 2023. Atlassian incident management. Retrieved from. https:\/\/www.atlassian.com\/incident-management\/incident-response\/incident-commander"},{"key":"e_1_3_2_2_15_1","unstructured":"Azure. 2023. Azure status history | Microsoft Azure. Retrieved from. https:\/\/status.azure.com\/en-us\/status\/history\/"},{"key":"e_1_3_2_2_16_1","volume-title":"Site Reliability Engineering: How Google Runs Production Systems","author":"Beyer Betsy","year":"1929","unstructured":"Betsy Beyer, Chris Jones, Jennifer Petoff, and Niall Richard Murphy. 2016. Site Reliability Engineering: How Google Runs Production Systems (1st ed.). O\u2019Reilly Media, Inc.. isbn:149192912X","edition":"1"},{"key":"e_1_3_2_2_17_1","volume-title":"Multitask learning. Machine learning, 28, 1","author":"Caruana Rich","year":"1997","unstructured":"Rich Caruana. 1997. Multitask learning. Machine learning, 28, 1 (1997), 41\u201375."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2309.07230"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583274"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1201\/9781420036206"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2016.2607739"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409768"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313501"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313501"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417055"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510085"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1"},{"key":"e_1_3_2_2_28_1","volume-title":"An introduction to statistical modeling of extreme values. 208","author":"Coles Stuart","unstructured":"Stuart Coles, Joanna Bawa, Lesley Trenner, and Pat Dorazio. 2001. An introduction to statistical modeling of extreme values. 208, Springer."},{"key":"e_1_3_2_2_29_1","unstructured":"crn. 2023. 15-biggest-cloud-outages. Retrieved from. https:\/\/www.crn.com\/news\/cloud\/the-15-biggest-cloud-outages-of-2022"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","unstructured":"Zhiyong Cui Ruimin Ke Ziyuan Pu and Yinhai Wang. 2018. Deep bidirectional and unidirectional LSTM recurrent neural network for network-wide traffic speed prediction. arXiv preprint arXiv:1801.02143 https:\/\/doi.org\/10.48550\/arXiv.2005.11627 10.48550\/arXiv.2005.11627","DOI":"10.48550\/arXiv.2005.11627"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-Companion.2019.00023"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10806"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330896"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/BRACIS.2017.72"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1002\/env.2176"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.5194\/npg-18-295-2011"},{"key":"e_1_3_2_2_37_1","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics. 315\u2013323","author":"Glorot Xavier","year":"2011","unstructured":"Xavier Glorot, Antoine Bordes, and Yoshua Bengio. 2011. Deep sparse rectifier neural networks. In Proceedings of the fourteenth international conference on artificial intelligence and statistics. 315\u2013323."},{"key":"e_1_3_2_2_38_1","unstructured":"Google. 2023. Google Cloud Service Health. Retrieved from. https:\/\/status.cloud.google.com\/summary"},{"key":"e_1_3_2_2_39_1","unstructured":"Google. 2023. Google SRE book. Retrieved from. https:\/\/landing.google.com\/sre\/sre-book\/chapters\/managing-incidents\/"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"crossref","unstructured":"John Gurland. 1954. Hypothesis Testing in Time Series Analysis..","DOI":"10.2307\/2281054"},{"key":"e_1_3_2_2_42_1","volume-title":"Extreme value theory: an introduction. 3","author":"Haan Laurens","unstructured":"Laurens Haan and Ana Ferreira. 2006. Extreme value theory: an introduction. 3, Springer."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v027.i03"},{"key":"e_1_3_2_2_45_1","unstructured":"Muhammad Azam Ikram Sarthak Chakraborty Subrata Mitra Shiv Saini Saurabh Bagchi and Murat Kocaoglu. 2022. Root Cause Analysis of Failures in Microservices through Causal Discovery. In Advances in Neural Information Processing Systems Alice H. Oh Alekh Agarwal Danielle Belgrave and Kyunghyun Cho (Eds.). https:\/\/openreview.net\/forum?id=weoLjoYFvXY"},{"key":"e_1_3_2_2_46_1","unstructured":"informationweek. 2023. cloud is fragile. Retrieved from. https:\/\/www.informationweek.com\/cloud\/special-report-how-fragile-is-the-cloud-really-"},{"key":"e_1_3_2_2_47_1","unstructured":"informationweek. 2023. cloud outages. Retrieved from. https:\/\/www.informationweek.com\/cloud\/cloud-outages-causes-consequences-prevention-recovery"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555228.1555232"},{"key":"e_1_3_2_2_49_1","volume-title":"International conference on machine learning. 34","author":"Laptev Nikolay","year":"2017","unstructured":"Nikolay Laptev, Jason Yosinski, Li Erran Li, and Slawek Smyl. 2017. Time-series extreme event forecasting with neural networks at uber. In International conference on machine learning. 34, 1\u20135."},{"key":"e_1_3_2_2_50_1","volume-title":"Fighting the Fog of War: Automated Incident Detection for Cloud Systems. In 2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Li Liqun","year":"2021","unstructured":"Liqun Li, Xu Zhang, Xin Zhao, Hongyu Zhang, Yu Kang, Pu Zhao, Bo Qiao, Shilin He, Pochian Lee, and Jeffrey Sun. 2021. Fighting the Fog of War: Automated Incident Detection for Cloud Systems. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). 131\u2013146."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539041"},{"key":"e_1_3_2_2_52_1","article-title":"Time-series forecasting with deep learning: a survey","volume":"379","author":"Lim Bryan","year":"2021","unstructured":"Bryan Lim and Stefan Zohren. 2021. Time-series forecasting with deep learning: a survey. Philosophical Transactions of the Royal Society A, 379, 2194 (2021), 20200209.","journal-title":"Philosophical Transactions of the Royal Society A"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623360"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/316"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.548162"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2858826"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2013.6693105"},{"key":"e_1_3_2_2_58_1","unstructured":"Sidi Lu Bing Luo Tirthak Patel Yongtao Yao Devesh Tiwari and Weisong Shi. 2020. Making disk failure predictions smarter!. In FAST. 151\u2013167."},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11422"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2801475"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.3390\/s19214612"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.3389\/fbinf.2022.927312"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"crossref","unstructured":"Yao Qin Dongjin Song Haifeng Chen Wei Cheng Guofei Jiang and Garrison Cottrell. 2017. A dual-stage attention-based recurrent neural network for time series prediction. arXiv preprint arXiv:1704.02971.","DOI":"10.24963\/ijcai.2017\/366"},{"key":"e_1_3_2_2_64_1","unstructured":"readitquik. 2023. ReadItQuick Cloud Failures. Retrieved from. https:\/\/www.readitquik.com\/articles\/cloud-3\/6-cloud-computing-failures-that-shocked-the-world\/"},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-73003-5_196"},{"key":"e_1_3_2_2_66_1","unstructured":"Sebastian Ruder. 2017. An Overview of Multi-Task Learning in Deep Neural Networks. arxiv:1706.05098."},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3380966"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","unstructured":"Sima Siami Namini Neda Tavakoli and Akbar Siami Namin. 2018. A Comparison of ARIMA and LSTM in Forecasting Time Series. 1394\u20131401. https:\/\/doi.org\/10.1109\/ICMLA.2018.00227 10.1109\/ICMLA.2018.00227","DOI":"10.1109\/ICMLA.2018.00227"},{"key":"e_1_3_2_2_71_1","doi-asserted-by":"publisher","DOI":"10.1111\/2041-210X.13140"},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"publisher","DOI":"10.1007\/11941439_114"},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330672"},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1089\/big.2020.0159"},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.14778\/3514061.3514067"},{"key":"e_1_3_2_2_76_1","first-page":"10355","volume-title":"International Conference on Machine Learning. 10355\u201310366","author":"Wu Yinjun","year":"2020","unstructured":"Yinjun Wu, Edgar Dobriban, and Susan Davidson. 2020. Deltagrad: Rapid retraining of machine learning models. In International Conference on Machine Learning. 10355\u201310366. https:\/\/doi.org\/PMLR 119:10355-10366"},{"key":"e_1_3_2_2_77_1","unstructured":"Yong Xu Kaixin Sui Randolph Yao Hongyu Zhang Qingwei Lin Yingnong Dang Peng Li Keceng Jiang Wenchi Zhang and Jian-Guang Lou. 2018. Improving service availability of cloud systems by predicting disk error. In 2018 $USENIX$ Annual Technical Conference ($USENIX$$ATC$ 18). 481\u2013494."},{"key":"e_1_3_2_2_78_1","volume-title":"Phyo Phyo San, Xiao Li Li, and Shonali Krishnaswamy.","author":"Yang Jianbo","year":"2015","unstructured":"Jianbo Yang, Minh Nhut Nguyen, Phyo Phyo San, Xiao Li Li, and Shonali Krishnaswamy. 2015. Deep convolutional neural networks on multichannel time series for human activity recognition. In Twenty-fourth international joint conference on artificial intelligence."},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.1002\/1097-0142(1950)3:1<32::AID-CNCR2820030106>3.0.CO;2-3"},{"key":"e_1_3_2_2_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3179405"},{"key":"e_1_3_2_2_81_1","volume-title":"2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Zhang Xu","year":"2019","unstructured":"Xu Zhang, Junghyun Kim, Qingwei Lin, Keunhak Lim, Shobhit O Kanaujia, Yong Xu, Kyle Jamieson, Aws Albarghouthi, Si Qin, and Michael J Freedman. 2019. Cross-dataset time series anomaly detection for cloud systems. In 2019 USENIX Annual Technical Conference (USENIX ATC 19). 1063\u20131076."},{"key":"e_1_3_2_2_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377813.3381363"},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409672"},{"key":"e_1_3_2_2_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155219"},{"key":"e_1_3_2_2_85_1","first-page":"3602","article-title":"What to Do Next: Modeling User Behaviors by Time-LSTM","volume":"17","author":"Zhu Yu","year":"2017","unstructured":"Yu Zhu, Hao Li, Yikang Liao, Beidou Wang, Ziyu Guan, Haifeng Liu, and Deng Cai. 2017. What to Do Next: Modeling User Behaviors by Time-LSTM.. In IJCAI. 17, 3602\u20133608.","journal-title":"IJCAI."}],"event":{"name":"ESEC\/FSE '23: 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","location":"San Francisco CA USA","acronym":"ESEC\/FSE '23","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3616316","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3611643.3616316","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:04Z","timestamp":1750178164000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611643.3616316"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,30]]},"references-count":85,"alternative-id":["10.1145\/3611643.3616316","10.1145\/3611643"],"URL":"https:\/\/doi.org\/10.1145\/3611643.3616316","relation":{},"subject":[],"published":{"date-parts":[[2023,11,30]]},"assertion":[{"value":"2023-11-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}