{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T04:10:48Z","timestamp":1748751048440,"version":"3.41.0"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319271392"},{"type":"electronic","value":"9783319271408"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-27140-8_1","type":"book-chapter","created":{"date-parts":[[2015,11,16]],"date-time":"2015-11-16T13:27:57Z","timestamp":1447680477000},"page":"3-17","source":"Crossref","is-referenced-by-count":2,"title":["FT-Offload: A Scalable Fault-Tolerance Programing Model on MIC Cluster"],"prefix":"10.1007","author":[{"given":"Cheng","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunfei","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhen","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Canqun","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,12,16]]},"reference":[{"key":"1_CR1","unstructured":"Luebke, D., Harris, M., Govindaraju, N., Lefohn, A., Houston, M., Owens, J., Segal, M., Papakipos, M., Buck, L.: GPGPU: general-purpose computation on graphics hardware. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006. ACM, New York, USA (2006)"},{"key":"1_CR2","unstructured":"Schulz, K.W., Ulerich, R., Malaya, N., Bauman, P.T., Stogner, R., Simmons, C.: Early experiences porting scientific applications to the many integrated core (MIC) platform. In: TACC-Intel Highly Parallel Computing Symposium, Austin, Texas (2012)"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Kirk, D.: NVIDIA CUDA software and GPU parallel computing architecture. In: Proceedings of the 6th International Symposium on Memory Management, ISMM 2007, pp. 103\u2013104. ACM, New York, USA (2007)","DOI":"10.1145\/1296907.1296909"},{"key":"1_CR4","doi-asserted-by":"publisher","first-page":"344","DOI":"10.1007\/s02011-011-1137-8","volume":"26","author":"X Yang","year":"2011","unstructured":"Yang, X., Liao, X., Lu, K., Hu, Q., Song, J., Su, J.: The TianHe-1A supercomputer: Its hardware and software. J. Comput. Sci. Technol. 26, 344\u2013351 (2011)","journal-title":"J. Comput. Sci. Technol."},{"key":"1_CR5","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1007\/s11771-013-1602-z","volume":"20","author":"C Yang","year":"2013","unstructured":"Yang, C., Wu, Q., Tang, T., Wang, F., Xue, J.: Programming for scientific computing on peta-scale heterogeneous parallel systems. J. Cent. S. Univ. 20, 1189\u20131203 (2013)","journal-title":"J. Cent. S. Univ."},{"key":"1_CR6","unstructured":"http:\/\/www.top500.org"},{"key":"1_CR7","unstructured":"Knights corner software developers guide, 27 April 2012"},{"issue":"3","key":"1_CR8","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/s11704-014-3501-3","volume":"8","author":"X Liao","year":"2014","unstructured":"Liao, X., Xiao, L., Yang, C., Lu, Y.: MilkyWay-2 supercomputer: system and application. Front. Comput. Sci. 8(3), 345\u2013356 (2014)","journal-title":"Front. Comput. Sci."},{"key":"1_CR9","doi-asserted-by":"publisher","first-page":"532","DOI":"10.1007\/s11390-014-1447-4","volume":"29","author":"X Liao","year":"2014","unstructured":"Liao, X., Yung, C., Tang, T., Yi, H., Wang, F., Wu, Q., Xue, J.: OpenMC: towards simplifying programming for tianhe supercomputers. J. Comput. Sci. Technol. (JCST) 29, 532\u2013546 (2014)","journal-title":"J. Comput. Sci. Technol. (JCST)"},{"key":"1_CR10","unstructured":"Nasertayoob, P., Shahbazian, S.: Stampede supercomputer. http:\/\/www.top500.org\/system\/17793"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Rajachandrasekar, R., Potluri, S., Venkatesh, A., Hamidouche, K., Wasi-ur Rahman, M., Panda, D.K.: MIC-Check: a distributed check pointing framework for the intel many integrated cores architecture. In: Proceedings of the 23rd International Symposium on High-Performance Parallel and Distributed Computing, pp. 121\u2013124. ACM (2014)","DOI":"10.1145\/2600212.2600713"},{"key":"1_CR12","unstructured":"User and reference guide for the intel c++ compiler 14.0, intel corporation (2014)"},{"key":"1_CR13","unstructured":"Intel Corporation. Openmp application program interface, version 4.0. OpenMP Architecture Review Board, July 2013"},{"key":"1_CR14","unstructured":"The openacc application programming interface, version 1.0., November 2011"},{"key":"1_CR15","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/TSE.1987.232562","volume":"1","author":"R Koo","year":"1987","unstructured":"Koo, R., Toueg, S.: Checkpointing and rollback-recovery for distributed systems. IEEE Trans. Softw. Eng. 1, 23\u201331 (1987)","journal-title":"IEEE Trans. Softw. Eng."},{"key":"1_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24644-2_20","volume-title":"Languages and Compilers for Parallel Computing","author":"C Huang","year":"2004","unstructured":"Huang, C., Lawlor, O., Kale, L.V.: Adaptive MPI. In: Rauchwerger, L. (ed.) LCPC 2003. LNCS, vol. 2958. Springer, Heidelberg (2004)"},{"key":"1_CR17","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1007\/s11390-012-1220-5","volume":"27","author":"X Xu","year":"2012","unstructured":"Xu, X., Yang, X., Xue, J., Lin, Y., Lin, Y.: PartialRC: a partial recomputing method for efficient fault recovery on gpgpus. J. Comput. Sci. Technol. (JCST) 27, 240\u2013255 (2012)","journal-title":"J. Comput. Sci. Technol. (JCST)"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Zheng, G., Ni, X., Kal\u00e9, L.V.: A scalable double in-memory checkpoint and restart scheme towards exascale. In: 2012 IEEE\/IFIP 42nd International Conference on Dependable Systems and Networks Workshops (DSN-W), pages 1\u20136. IEEE (2012)","DOI":"10.1109\/DSNW.2012.6264677"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Kale, L.V., Zheng, G.: Charm++ and ampi: adaptive runtime strategies via migratable objects. In: Advanced Computational Infrastructures for Parallel and Distributed Applications, pp. 265\u2013282 (2009)","DOI":"10.1002\/9780470558027.ch13"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Stellner, G.: Cocheck: Checkpointing and process migration for MPI. In: Proceedings of the 10th Internationa Parallel Processing Symposium, IPPS 1996, pp. 526\u2013531. IEEE (1996)","DOI":"10.1109\/IPPS.1996.508106"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Agbaria, A.M., Friedman, R.: Starfish: fault-tolerant dynamic MPI programs on clusters of workstations. In: Proceedings. The Eighth International Symposium on High Performance Distributed Computing, pp. 167\u2013176. IEEE (1999)","DOI":"10.1109\/HPDC.1999.805295"},{"issue":"10","key":"1_CR22","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/966049.781513","volume":"38","author":"G Bronevetsky","year":"2003","unstructured":"Bronevetsky, G., Marques, D., Pingali, K., Stodghill, P.: Automated application-level checkpointing of MPI programs. ACM Sigplan Notices 38(10), 84\u201394 (2003)","journal-title":"ACM Sigplan Notices"},{"key":"1_CR23","unstructured":"Sheaffer, J.W., Luebke, D.P., Skadron, K:. A hardware redundancy and recovery mechanism for reliable scientific computation on graphics processors. In: Graphics Hardware, vol. 2007, pp. 55\u201364. Citeseer (2007)"},{"key":"1_CR24","unstructured":"Karablieh, F., Bazzi, R.A.: Heterogeneous checkpointing for multithreaded applications. In: Proceedings of 21st IEEE Symposium on Reliable Distributed Systems, 2002, pp. 140\u2013149. IEEE (2002)"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Takizawa, H., Koyama, K., Sato, K., Komatsu, K., Kobayashi, H.: Checl: transparent checkpointing and process migration of opencl applications. In: 2011 IEEE International Parallel & Distributed Processing Symposium (IPDPS), pp. 864\u2013876. IEEE (2011)","DOI":"10.1109\/IPDPS.2011.85"},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Takizawa, H., Sato, K., Komatsu, K., Kobayashi, H.: CheCUDA: a checkpoint\/restart tool for cuda applications. In: 2009 International Conference on Parallel and Distributed Computing, Applications and Technologies, pp. 408\u2013413. IEEE (2009)","DOI":"10.1109\/PDCAT.2009.78"},{"key":"1_CR27","first-page":"494","volume":"46","author":"PH Hargrove","year":"2006","unstructured":"Hargrove, P.H., Duell, J.C.: Berkeley lab checkpoint\/restart (blcr) for linux clusters. J. Phys.: Conf. Ser. 46, 494 (2006)","journal-title":"J. Phys.: Conf. Ser."},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Dong, X., Wen, M., Chai, J., Cai, X., Zhao, M., Zhang1, C.: Communication-hiding programming for clusters with multi-coprocessor nodes. Published online in Wiley Online Library (2015)","DOI":"10.1002\/cpe.3507"},{"key":"1_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/3-540-45255-9_47","volume-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface","author":"GE Fagg","year":"2000","unstructured":"Fagg, G.E., Dongarra, J.: FT-MPI: Fault Tolerant MPI, supporting dynamic applications in a dynamic world. In: Dongarra, J., Kacsuk, P., Podhorszki, N. (eds.) PVM\/MPI 2000. LNCS, vol. 1908, p. 346. Springer, Heidelberg (2000)"},{"key":"1_CR30","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611971538","volume-title":"Templates for the Solution of Linear Systems: Building Blocks for Iterative Methods","author":"R Barrett","year":"1994","unstructured":"Barrett, R., Berry, M.W., Chan, T.F., Demmel, J., Donato, J., Dongarra, J., Eijkhout, V., Pozo, R., Romine, C., Van der Vorst, H.: Templates for the Solution of Linear Systems: Building Blocks for Iterative Methods, vol. 43. Siam, Philadelphia (1994)"},{"key":"1_CR31","doi-asserted-by":"crossref","unstructured":"Yang, C., Wang, F., Du, Y., Chen, J., Liu, J., Yi, H., Lu, K.: Adaptive optimization for petascale heterogeneous CPU\/GPU computing. In: 2010 IEEE International Conference on Cluster Computing (CLUSTER), pp. 19\u201328. IEEE (2010)","DOI":"10.1109\/CLUSTER.2010.12"},{"issue":"9","key":"1_CR32","doi-asserted-by":"publisher","first-page":"1477","DOI":"10.1002\/qua.21665","volume":"108","author":"S Shahbazian","year":"2008","unstructured":"Shahbazian, S.: Revisiting the foundations of quantum theory of atoms in molecules: the variational procedure and the zero-flux conditions. Int. J. Quantum Chem. 108(9), 1477\u20131484 (2008)","journal-title":"Int. J. Quantum Chem."},{"key":"1_CR33","doi-asserted-by":"crossref","unstructured":"Chen, Z.: Algorithm-based recovery for iterative methods without checkpointing. In: Proceedings of the 20th International Symposium on High Performance Distributed Computing, pp. 73\u201384. ACM (2011)","DOI":"10.1145\/1996130.1996142"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-27140-8_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T12:06:09Z","timestamp":1748693169000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-27140-8_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319271392","9783319271408"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-27140-8_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]}}}