{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T08:50:17Z","timestamp":1768207817163,"version":"3.49.0"},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2018,1]]},"DOI":"10.1007\/s11390-018-1806-7","type":"journal-article","created":{"date-parts":[[2018,1,30]],"date-time":"2018-01-30T02:44:48Z","timestamp":1517280288000},"page":"24-41","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":15,"title":["A Large-Scale Study of Failures on Petascale Supercomputers"],"prefix":"10.1007","volume":"33","author":[{"given":"Rui-Tao","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zuo-Ning","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,1,26]]},"reference":[{"key":"1806_CR1","unstructured":"Cappello F. Resilience: One of the main challenges for exascale computing. Technical Report of the INRIA-Illinois Joint Laboratory, 2011."},{"key":"1806_CR2","unstructured":"Kusnezov D, Binkley S, Harrod B, Meisner B. DOE exascale initiative. Technical Report of US Department of Energy (DOE), 2013. https:\/\/energy.gov\/downloads\/doe-exascaleinitiative , Dec. 2017."},{"key":"1806_CR3","unstructured":"Kogge P, Bergman K, Borkar S et al. Exascale computing study: Technology challenges in achieving exascale systems. 2008. http:\/\/www.cse.nd.edu\/Reports\/2008\/TR-2008-13.pdf , Dec. 2017."},{"key":"1806_CR4","doi-asserted-by":"crossref","unstructured":"Schroeder B, Gibson G A. A large-scale study of failures in high-performance computing systems. IEEE Transactions on Dependable and Secure Computing, 2010 7(4): 337-350","DOI":"10.1109\/TDSC.2009.4"},{"key":"1806_CR5","unstructured":"Liang Y, Zhang Y, Jette M, Sivasubramaniam A, Sahoo R. BlueGene\/L failure analysis and prediction models. In Proc. the 43rd Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN), June 2006, pp.425-434."},{"key":"1806_CR6","doi-asserted-by":"crossref","unstructured":"Zheng Z, Lan Z, Park B H et al. System log pre-processing to improve failure prediction. In Proc. IEEE\/IFIP International Conference Dependable Systems and Networks, June 29-July 2, 2009.","DOI":"10.1109\/DSN.2009.5270289"},{"key":"1806_CR7","doi-asserted-by":"crossref","unstructured":"Zheng Z, Yu L, Tang W et al. Co-analysis of RAS log and job log on Blue Gene\/P. In Proc. the 2011 IEEE International Parallel & Distributed Processing Symposium, May 2011 pp.840-851.","DOI":"10.1109\/IPDPS.2011.83"},{"key":"1806_CR8","doi-asserted-by":"crossref","unstructured":"Zheng Z, Lan Z. Reliability-aware scalability models for high performance computing. In Proc. IEEE International Conference Cluster Computing and Workshops, Aug. 31-Sept. 4, 2009.","DOI":"10.1109\/CLUSTR.2009.5289177"},{"key":"1806_CR9","doi-asserted-by":"crossref","unstructured":"Heien E, LaPine D, Kondo D et al. Modeling and tolerating heterogeneous failures in large parallel systems. In Proc. the 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, Nov. 2011, Article No. 45.","DOI":"10.1145\/2063384.2063444"},{"key":"1806_CR10","doi-asserted-by":"crossref","unstructured":"Nie B, Tiwari D, Gupta S et al. A large-scale study of softerrors on GPUs in the field. In Proc. the 2016 IEEE International Symposium on High Performance Computer Architecture (HPCA), March 2016, pp.519-530.","DOI":"10.1109\/HPCA.2016.7446091"},{"key":"1806_CR11","doi-asserted-by":"crossref","unstructured":"Schroeder B, Pinheiro E, Weber W. DRAM errors in the wild: A large-scale field study. In Proc. the 11th International Joint Conference on Measurement and Modeling of Computer Systems, June 2009, pp.193-204.","DOI":"10.1145\/1555349.1555372"},{"key":"1806_CR12","unstructured":"Pinheiro E, Weber W, Barroso L A. Failure trends in a large disk drive population. In Proc. the 5th USENIX Conference on File and Storage Technologies, February 2007, pp.17-28."},{"key":"1806_CR13","doi-asserted-by":"crossref","unstructured":"Gunawi H S, Hao M, Suminto R O et al. Why does the cloud stop computing?: Lessons from hundreds of service outages. In Proc. the 7th ACM Symposium on Cloud Computing, October 2016, pp.1-16.","DOI":"10.1145\/2987550.2987583"},{"key":"1806_CR14","doi-asserted-by":"crossref","unstructured":"Gunawi H S, Hao M, Leesatapornwongsa T et al. What bugs live in the cloud? A study of 3000+ issues in cloud systems. In Proc. the ACM Symposium on Cloud Computing, November 2014, pp.1-14.","DOI":"10.1145\/2670979.2670986"},{"key":"1806_CR15","doi-asserted-by":"crossref","unstructured":"Huang P, Guo C, Zhou L et al. Gray failure: The Achilles\u2019 heel of cloud-scale systems. In Proc. the 16th Workshop on Hot Topics in Operating Systems, May 2017, pp.150-155.","DOI":"10.1145\/3102980.3103005"},{"key":"1806_CR16","doi-asserted-by":"crossref","unstructured":"Zheng Z, Lan Z, Gupta R et al. A practical failure prediction with location and lead time for Blue Gene\/P. In Proc. the 2010 International Conference Dependable Systems and Networks Workshops (DSN-W), June 28-July 1, 2010.","DOI":"10.1109\/DSNW.2010.5542627"},{"key":"1806_CR17","doi-asserted-by":"crossref","unstructured":"Sahoo R K, Oliner A J, Rish I et al. Critical event prediction for proactive management in large-scale computer clusters. In Proc. the 9th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, August 2003, pp.426-435.","DOI":"10.1145\/956750.956799"},{"key":"1806_CR18","doi-asserted-by":"crossref","unstructured":"Gu J, Zheng Z, Lan Z et al. Dynamic meta-learning for failure prediction in large-scale systems: A case study. In Proc. the International Conference on Parallel Processing, Sept. 2008.","DOI":"10.1109\/ICPP.2008.17"},{"key":"1806_CR19","doi-asserted-by":"crossref","unstructured":"Gainaru A, Cappello F, Snir M et al. Fault prediction under the microscope: A closer look into HPC systems. In Proc. the International Conference on High Performance Computing, Networking, Storage and Analysis, November 2012, Article No. 77.","DOI":"10.1109\/SC.2012.57"},{"issue":"4","key":"1806_CR20","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1016\/S1005-8885(09)60497-0","volume":"17","author":"X Lu","year":"2010","unstructured":"Lu X, Wang H Q, Zhou R J et al. Autonomic failure prediction based on manifold learning for large-scale distributed systems. The Journal of China Universities of Posts and Telecommunications, 2010, 17(4): 116-124.","journal-title":"The Journal of China Universities of Posts and Telecommunications"},{"key":"1806_CR21","unstructured":"Srikant R, Agrawal R. Mining sequential patterns: Generalizations and performance improvements. In Lecture Notes in Computer Science 1057, Apers P, Bouzeghoub M, Gardarin G (eds.), June 2005."},{"issue":"3","key":"1806_CR22","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1023\/A:1009748302351","volume":"1","author":"H Mannila","year":"1997","unstructured":"Mannila H, Toivonen H, Verkamo A I. Discovery of frequent episodes in event sequences. Data Mining and Knowledge Discovery, 1997, 1(3): 259-289.","journal-title":"Data Mining and Knowledge Discovery"},{"key":"1806_CR23","unstructured":"Joshi M, Karypis G, Kumar V. A universal formulation of sequential patterns. Technical Report, No.99-021, University of Minnesota. https:\/\/www.cs.umn.edu\/research\/technical reports\/view\/99-021, Dec. 2017."},{"key":"1806_CR24","doi-asserted-by":"crossref","unstructured":"Fournier-Viger P,Wu CW, Tseng V S et al. Mining sequential rules common to several sequences with the window size constraint. In Proc. the 25th Conference on Advances in Artificial Intelligence, May 2012, pp.299-304.","DOI":"10.1007\/978-3-642-30353-1_27"},{"key":"1806_CR25","doi-asserted-by":"crossref","unstructured":"Fournier-Viger P, Wu C W, Tseng V S et al. Mining partially-ordered sequential rules common to multiple sequences. IEEE Transactions on Knowledge and Data Engineering, 27(8): 2203-2216.","DOI":"10.1109\/TKDE.2015.2405509"},{"key":"1806_CR26","unstructured":"Zhang Z. Reliability Theory and Engineering Application. Beijing: Science Press, 2012. (in Chinese)"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-018-1806-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-018-1806-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-018-1806-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,9]],"date-time":"2019-10-09T23:18:43Z","timestamp":1570663123000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-018-1806-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,1]]},"references-count":26,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,1]]}},"alternative-id":["1806"],"URL":"https:\/\/doi.org\/10.1007\/s11390-018-1806-7","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"value":"1000-9000","type":"print"},{"value":"1860-4749","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,1]]},"assertion":[{"value":"29 July 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 December 2017","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 January 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}