{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T08:15:27Z","timestamp":1768032927964,"version":"3.49.0"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030598501","type":"print"},{"value":"9783030598518","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-59851-8_18","type":"book-chapter","created":{"date-parts":[[2020,10,19]],"date-time":"2020-10-19T18:04:00Z","timestamp":1603130640000},"page":"280-292","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Characterizing HPC Performance Variation with Monitoring and Unsupervised Learning"],"prefix":"10.1007","author":[{"given":"Gence","family":"Ozer","sequence":"first","affiliation":[]},{"given":"Alessio","family":"Netti","sequence":"additional","affiliation":[]},{"given":"Daniele","family":"Tafani","sequence":"additional","affiliation":[]},{"given":"Martin","family":"Schulz","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,20]]},"reference":[{"key":"18_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1007\/978-3-319-96983-1_7","volume-title":"Euro-Par 2018: Parallel Processing","author":"E Ates","year":"2018","unstructured":"Ates, E., et al.: Taxonomist: application detection through rich monitoring data. In: Aldinucci, M., Padovani, L., Torquati, M. (eds.) Euro-Par 2018. LNCS, vol. 11014, pp. 92\u2013105. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-96983-1_7"},{"key":"18_CR2","unstructured":"Baseman, E., Blanchard, S., DeBardeleben, N., Bonnie, A., et al.: Interpretable anomaly detection for monitoring of high performance computing systems. In: Proceedings of the ACM SIGKDD 2016 Workshops (2016)"},{"key":"18_CR3","doi-asserted-by":"crossref","unstructured":"Borghesi, A., Libri, A., Benini, L., Bartolini, A.: Online anomaly detection in HPC systems. In: Proceedings of AICAS 2019, pp. 229\u2013233. IEEE (2019)","DOI":"10.1109\/AICAS.2019.8771527"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Bourassa, N., Johnson, W., Broughton, J., Carter, D.M., et al.: Operational data analytics: optimizing the national energy research scientific computing center cooling systems. In: Proceedings of the ICPP 2019 Workshops, pp. 5:1\u20135:7. ACM (2019)","DOI":"10.1145\/3339186.3339210"},{"key":"18_CR5","unstructured":"Bourassa, N., Ott, M.: EEHPCWG operational data analytics survey (2019). https:\/\/eehpcwg.llnl.gov\/assets\/sc19_11_425_525_operational_data_analytics_ott_bourassa.pdf"},{"issue":"1","key":"18_CR6","first-page":"5","volume":"1","author":"F Cappello","year":"2014","unstructured":"Cappello, F., Geist, A., Gropp, W., Kale, S., et al.: Toward exascale resilience: 2014 update. Supercomput. Front. Innovations 1(1), 5\u201328 (2014)","journal-title":"Supercomput. Front. Innovations"},{"key":"18_CR7","unstructured":"Cohen, I., Chase, J.S., Goldszmidt, M., Kelly, T., Symons, J.: Correlating instrumentation data to system states: a building block for automated diagnosis and control. In: OSDI, vol. 4, p. 16 (2004)"},{"key":"18_CR8","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1007\/978-3-319-60045-1_23","volume-title":"Advances in Artificial Intelligence: From Theory to Practice","author":"MC Dani","year":"2017","unstructured":"Dani, M.C., Doreau, H., Alt, S.: K-means application for anomaly detection and log classification in HPC. In: Benferhat, S., Tabia, K., Ali, M. (eds.) IEA\/AIE 2017. LNCS (LNAI), vol. 10351, pp. 201\u2013210. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-60045-1_23"},{"key":"18_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1007\/978-3-319-58667-0_21","volume-title":"High Performance Computing","author":"J Eastep","year":"2017","unstructured":"Eastep, J., et al.: Global extensible open power manager: a vehicle for HPC community collaboration on co-designed energy management solutions. In: Kunkel, J.M., Yokota, R., Balaji, P., Keyes, D. (eds.) ISC 2017. LNCS, vol. 10266, pp. 394\u2013412. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-58667-0_21"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Gabel, M., Gilad-Bachrach, R., Bjorner, N., Schuster, A.: Latent fault detection in cloud services. Microsoft Research, Technical report MSR-TR-2011-83 (2011)","DOI":"10.1109\/DSN.2012.6263932"},{"key":"18_CR11","series-title":"Computer Communications and Networks","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/978-3-319-20943-2_2","volume-title":"Fault-Tolerance Techniques for High-Performance Computing","author":"A Gainaru","year":"2015","unstructured":"Gainaru, A., Cappello, F.: Errors and faults. In: Herault, T., Robert, Y. (eds.) Fault-Tolerance Techniques for High-Performance Computing. CCN, pp. 89\u2013144. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-20943-2_2"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Guan, Q., Fu, S.: Adaptive anomaly identification by exploring metric subspace in cloud computing infrastructures. In: Proceedings of SRDS 2013, pp. 205\u2013214. IEEE (2013)","DOI":"10.1109\/SRDS.2013.29"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Inadomi, Y., Patki, T., Inoue, K., Aoyagi, M., et al.: Analyzing and mitigating the impact of manufacturing variability in power-constrained supercomputing. In: Proceedings of SC 2015, pp. 1\u201312. IEEE (2015)","DOI":"10.1145\/2807591.2807638"},{"key":"18_CR14","unstructured":"M\u00fcnz, G., Li, S., Carle, G.: Traffic anomaly detection using k-means clustering. In: Proceedings of the GI\/ITG Workshop MMBnet, pp. 13\u201314 (2007)"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Netti, A., Mueller, M., Auweter, A., Guillen, C., et al.: From facility to application sensor data: modular, continuous and holistic monitoring with DCDB. In: Proceedings of SC 2019. ACM (2019)","DOI":"10.1145\/3295500.3356191"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Netti, A., Mueller, M., Guillen, C., Ott, M., et al.: DCDB Wintermute: enabling online and holistic operational data analytics on HPC systems. In: Proceedings of HPDC 2020. ACM (2020)","DOI":"10.1145\/3369583.3392674"},{"issue":"11","key":"18_CR17","doi-asserted-by":"publisher","first-page":"1133","DOI":"10.1109\/34.730550","volume":"20","author":"SJ Roberts","year":"1998","unstructured":"Roberts, S.J., Husmeier, D., Rezek, I., Penny, W.: Bayesian approaches to Gaussian mixture modeling. IEEE Trans. Pattern Anal. Mach. Intell. 20(11), 1133\u20131142 (1998)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR18","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1109\/TPDS.2018.2870403","volume":"30","author":"O Tuncer","year":"2018","unstructured":"Tuncer, O., Ates, E., Zhang, Y., Turk, A., et al.: Online diagnosis of performance variation in HPC systems using machine learning. IEEE Trans. Parallel Distrib. Syst. 30, 883\u2013896 (2018)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Villa, O., Johnson, D.R., Oconnor, M., Bolotin, E., et al.: Scaling the power wall: a path to exascale. In: Proceedings of SC 2014, pp. 830\u2013841. IEEE (2014)","DOI":"10.1109\/SC.2014.73"},{"issue":"8","key":"18_CR20","first-page":"3865","volume":"10","author":"G Wang","year":"2016","unstructured":"Wang, G., Yang, J., Li, R.: An anomaly detection framework based on ICA and Bayesian classification for IaaS platforms. KSII Trans. Internet Inf. Syst. (TIIS) 10(8), 3865\u20133883 (2016)","journal-title":"KSII Trans. Internet Inf. Syst. (TIIS)"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, X., Meng, F., Chen, P., Xu, J.: TaskInsight: a fine-grained performance anomaly detection and problem locating system. In: Proceedings of CLOUD 2016, pp. 917\u2013920. IEEE (2016)","DOI":"10.1109\/CLOUD.2016.0136"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-59851-8_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,23]],"date-time":"2022-11-23T22:36:18Z","timestamp":1669242978000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-59851-8_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030598501","9783030598518"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-59851-8_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"20 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISC High Performance","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on High Performance Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Frankfurt am Main","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 June 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 June 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"35","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"supercomputing2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.isc-hpc.com\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"87","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.73","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.33","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}