{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:24:45Z","timestamp":1742912685606,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319436586"},{"type":"electronic","value":"9783319436593"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-43659-3_31","type":"book-chapter","created":{"date-parts":[[2016,8,8]],"date-time":"2016-08-08T02:54:01Z","timestamp":1470624841000},"page":"419-430","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Exploring Partial Replication to Improve Lightweight Silent Data Corruption Detection for HPC Applications"],"prefix":"10.1007","author":[{"given":"Eduardo","family":"Berrocal","sequence":"first","affiliation":[]},{"given":"Leonardo","family":"Bautista-Gomez","sequence":"additional","affiliation":[]},{"given":"Sheng","family":"Di","sequence":"additional","affiliation":[]},{"given":"Zhiling","family":"Lan","sequence":"additional","affiliation":[]},{"given":"Franck","family":"Cappello","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,8,9]]},"reference":[{"key":"31_CR1","unstructured":"Fusion cluster at Argonne National Laboratory. \n                      http:\/\/www.lcrc.anl.gov\/guides\/Fusion"},{"issue":"8","key":"31_CR2","doi-asserted-by":"crossref","first-page":"381","DOI":"10.1145\/2692916.2555279","volume":"49","author":"Leonardo Bautista Gomez","year":"2014","unstructured":"Bautista-Gomez, L.A., Cappello, F.: Detecting silent data corruption through data dynamic monitoring for scientific applications. In: PPoPP 2014, pp. 381\u2013382 (2014)","journal-title":"ACM SIGPLAN Notices"},{"key":"31_CR3","doi-asserted-by":"crossref","unstructured":"Bautista-Gomez, L.A., Cappello, F.: Detecting and correcting data corruption in stencil applications through multivariate interpolation. In: 1st International Workshop on Fault Tolerant Systems (part of Cluster 2015), pp. 595\u2013602 (2015)","DOI":"10.1109\/CLUSTER.2015.108"},{"key":"31_CR4","doi-asserted-by":"crossref","unstructured":"Bautista-Gomez, L.A., Tsuboi, S., Komatitsch, D., Cappello, F., Maruyama, N., Matsuoka, S.: FTI: high performance fault tolerance interface for hybrid systems. In: SC 2011, pp. 32:1\u201332:32 (2011)","DOI":"10.1145\/2063384.2063427"},{"issue":"4","key":"31_CR5","first-page":"1","volume":"29","author":"AR Benson","year":"2014","unstructured":"Benson, A.R., Schmit, S., Schreiber, R.: Silent error detection in numerical time-stepping schemes. Int. J. High Perform. Comput. Appl. 29(4), 1\u201320 (2014)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"31_CR6","doi-asserted-by":"crossref","unstructured":"Berrocal, E., Bautista-Gomez, L., Di, S., Lan, Z., Cappello, F.: Lightweight silent data corruption detection based on runtime data analysis for HPC applications. In: HPDC 2015 (short paper) (2015)","DOI":"10.1145\/2749246.2749253"},{"key":"31_CR7","unstructured":"Borkar, S.: Major challenges to achieve exascale performance. Intel Corporation, April 2009"},{"key":"31_CR8","unstructured":"Briere, D., Traverse, P.: AIRBUS A320\/A330\/A340 electrical flight controls - a family of fault-tolerant systems. In: Proceedings of the IEEE International Symposium on Fault-Tolerant Computing, pp. 616\u2013623 (1993)"},{"key":"31_CR9","doi-asserted-by":"crossref","unstructured":"Chalermarrewong, T., Achalakul, T., See, S.C.W.: Failure prediction of data centers using time series and fault tree analysis. In: ICPads 2012, pp. 794\u2013799 (2012)","DOI":"10.1109\/ICPADS.2012.129"},{"key":"31_CR10","unstructured":"Dell, T.J.: A white paper on the benefits of chipkill-correct ECC for PC server main memory. In: IBM Microelectronics Division, pp. 1\u201323 (1997)"},{"key":"31_CR11","doi-asserted-by":"crossref","unstructured":"Fiala, D., Mueller, F., Engelmann, C., Riesen, R., Ferreira, K., Brightwell, R.: Detection and correction of silent data corruption for large-scale high-performance computing. In: SC 2012, pp. 78:1\u201378:12 (2012)","DOI":"10.1109\/SC.2012.49"},{"key":"31_CR12","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1086\/317361","volume":"131","author":"B Fryxell","year":"2000","unstructured":"Fryxell, B., Olson, K., Ricker, P., Timmes, F.X., Zingale, M., Lamb, D.Q., MacNeice, P., Rosner, R., Truran, J.W., Tufo, H.: Flash: an adaptive mesh hydrodynamics code for modeling astrophysical thermonuclear flashes. Astrophys. J. Suppl. Ser. (ApJS) 131, 273\u2013334 (2000)","journal-title":"Astrophys. J. Suppl. Ser. (ApJS)"},{"issue":"1","key":"31_CR13","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1198\/004017007000000461","volume":"50","author":"NW Hengartner","year":"2008","unstructured":"Hengartner, N.W., Takala, E., Michalak, S.E., Wender, S.A.: Evaluating experiments for estimating the bit failure cross-section of semiconductors using a colored spectrum neutron beam. Technometrics 50(1), 8\u201314 (2008)","journal-title":"Technometrics"},{"issue":"6","key":"31_CR14","doi-asserted-by":"publisher","first-page":"518","DOI":"10.1109\/TC.1984.1676475","volume":"100","author":"KH Huang","year":"1984","unstructured":"Huang, K.H., Abraham, J.A.: Algorithm-based fault tolerance for matrix operations. IEEE Trans. Comput. 100(6), 518\u2013528 (1984)","journal-title":"IEEE Trans. Comput."},{"key":"31_CR15","doi-asserted-by":"crossref","unstructured":"Hukerikar, S., Diniz, P.C., Lucas, R.F., Teranishi, K.: Opportunistic application-level fault detection through adaptive redundant multithreading. In: HPCS 2014 (2014)","DOI":"10.1109\/HPCSim.2014.6903692"},{"key":"31_CR16","doi-asserted-by":"crossref","unstructured":"Hwang, A.A., Stefanovici, I.A., Schroeder, B.: Cosmic rays don\u2019t strike twice: understanding the nature of dram errors and the implications for system design. In: ASPLOS XVII, pp. 111\u2013122 (2012)","DOI":"10.1145\/2248487.2150989"},{"key":"31_CR17","doi-asserted-by":"crossref","unstructured":"Liu, J., Kurt, M.C., Agrawal, G.: A practical approach for handling soft errors in iterative applications. In: Cluster 2015, pp. 158\u2013161 (2015)","DOI":"10.1109\/CLUSTER.2015.32"},{"key":"31_CR18","doi-asserted-by":"crossref","unstructured":"Mukherjee, S., Kontz, M., Reinhardt, S.: Detailed design and evaluation of redundant multi-threading alternatives. In: ISCA 2002, pp. 99\u2013110 (2002)","DOI":"10.1145\/545214.545227"},{"key":"31_CR19","unstructured":"Mukherjee, S.S., Emer, J., Reinhardt, S.K.: The soft error problem: an architectural perspective. In: HPCA 2005 (2005)"},{"key":"31_CR20","doi-asserted-by":"crossref","unstructured":"Nakka, N., Pattabiraman, K., Iyer, R.: Processor-level selective replication. In: DSN 2007, pp. 544\u2013553 (2007)","DOI":"10.1109\/DSN.2007.75"},{"key":"31_CR21","volume-title":"Similarity and Dimensional Methods in Mechanics","author":"LI Sedov","year":"1959","unstructured":"Sedov, L.I.: Similarity and Dimensional Methods in Mechanics, 10th edn. Academic Press, New York (1959)","edition":"10"},{"issue":"2","key":"31_CR22","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1177\/1094342014522573","volume":"28","author":"M Snir","year":"2014","unstructured":"Snir, M., et al.: Addressing failures in exascale computing. Int. J. High Perform. Comput. 28(2), 129\u2013173 (2014)","journal-title":"Int. J. High Perform. Comput."},{"key":"31_CR23","doi-asserted-by":"crossref","unstructured":"Stearly, J., Ferreira, K., Robinson, D., Laros, J., Pedretti, K., Arnold, D., Bridges, P., Riesen, R.: Does partial replication pay off? In: DSN 2012 (2012)","DOI":"10.1109\/DSNW.2012.6264669"},{"key":"31_CR24","doi-asserted-by":"crossref","unstructured":"Subasi, O., Arias, J., Unsal, O., Labarta, J., Cristal, A.: Programmer-directed partial redundancy for resilient HPC. In: CF 2015 (2015)","DOI":"10.1145\/2742854.2742903"},{"key":"31_CR25","unstructured":"Wang, C., Mueller, F., Engelmann, C., Scott, S.L.: Proactive process-level live migration in HPC environments. In: SC 2008 (2008)"},{"key":"31_CR26","doi-asserted-by":"crossref","unstructured":"Yim, K.S.: Characterization of impact of transient faults and detection of data corruption errors in large-scale n-body programs using graphics processing units. In: IPDPS 2014, pp. 458\u2013467 (2014)","DOI":"10.1109\/IPDPS.2014.55"},{"issue":"2","key":"31_CR27","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1137\/0915019","volume":"15","author":"AL Zachary","year":"1994","unstructured":"Zachary, A.L., Malagoli, A., Colella, P.: A higher-order godunov method for multidimensional ideal magnetohydrodynamics. SIAM J. Sci. Comput. 15(2), 263\u2013284 (1994)","journal-title":"SIAM J. Sci. Comput."}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2016: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-43659-3_31","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,9]],"date-time":"2020-08-09T00:06:11Z","timestamp":1596931571000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-43659-3_31"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319436586","9783319436593"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-43659-3_31","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"9 August 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Grenoble","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 August 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/europar2016.inria.fr\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}