{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T19:17:18Z","timestamp":1743103038399,"version":"3.40.3"},"publisher-location":"Cham","reference-count":66,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030479558"},{"type":"electronic","value":"9783030479565"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2020,7,31]],"date-time":"2020-07-31T00:00:00Z","timestamp":1596153600000},"content-version":"vor","delay-in-days":212,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-47956-5_16","type":"book-chapter","created":{"date-parts":[[2020,7,30]],"date-time":"2020-07-30T17:26:56Z","timestamp":1596130016000},"page":"483-516","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["FFMK: A Fast and Fault-Tolerant Microkernel-Based System for Exascale Computing"],"prefix":"10.1007","author":[{"given":"Carsten","family":"Weinhold","sequence":"first","affiliation":[]},{"given":"Adam","family":"Lackorzynski","sequence":"additional","affiliation":[]},{"given":"Jan","family":"Bierbaum","sequence":"additional","affiliation":[]},{"given":"Martin","family":"K\u00fcttler","sequence":"additional","affiliation":[]},{"given":"Maksym","family":"Planeta","sequence":"additional","affiliation":[]},{"given":"Hannes","family":"Weisbach","sequence":"additional","affiliation":[]},{"given":"Matthias","family":"Hille","sequence":"additional","affiliation":[]},{"given":"Hermann","family":"H\u00e4rtig","sequence":"additional","affiliation":[]},{"given":"Alexander","family":"Margolin","sequence":"additional","affiliation":[]},{"given":"Dror","family":"Sharf","sequence":"additional","affiliation":[]},{"given":"Ely","family":"Levy","sequence":"additional","affiliation":[]},{"given":"Pavel","family":"Gak","sequence":"additional","affiliation":[]},{"given":"Amnon","family":"Barak","sequence":"additional","affiliation":[]},{"given":"Masoud","family":"Gholami","sequence":"additional","affiliation":[]},{"given":"Florian","family":"Schintke","sequence":"additional","affiliation":[]},{"given":"Thorsten","family":"Sch\u00fctt","sequence":"additional","affiliation":[]},{"given":"Alexander","family":"Reinefeld","sequence":"additional","affiliation":[]},{"given":"Matthias","family":"Lieber","sequence":"additional","affiliation":[]},{"given":"Wolfgang E.","family":"Nagel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,7,31]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Asmussen, N., V\u00f6lp, M., N\u00f6then, B., H\u00e4rtig, H., Fettweis, G.: M3: A hardware\/operating-system co-design to tame heterogeneous manycores. In: Proceedings of the Twenty-First International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) (2016)","DOI":"10.1145\/2872362.2872371"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Barak, A., Guday, S., Wheeler, R.: The MOSIX Distributed Operating System: Load Balancing for UNIX. Lecture Notes in Computer Science, vol. 672. Springer, Berlin (1993)","DOI":"10.1007\/3-540-56663-5"},{"issue":"17","key":"16_CR3","doi-asserted-by":"publisher","first-page":"4797","DOI":"10.1002\/cpe.3465","volume":"27","author":"A Barak","year":"2015","unstructured":"Barak, A., Drezner, Z., Levy, E., Lieber, M., Shiloh, A.: Resilient gossip algorithms for collecting online management information in exascale clusters. Concurr. Comput. Pract. Exp. 27(17), 4797\u20134818 (2015)","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"16_CR4","unstructured":"Bautista-Gomez, L.A., et al.: FTI: high performance fault tolerance interface for hybrid systems. In: SC\u201911 Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 32:1\u201332:32 (2011). \nhttp:\/\/doi.acm.org\/10.1145\/2063384.2063427"},{"key":"16_CR5","doi-asserted-by":"publisher","unstructured":"Beckman, P., Iskra, K., Yoshii, K., Coghlan, S.: The influence of operating systems on the performance of collective operations at extreme scale. In: 2006 IEEE International Conference on Cluster Computing, pp. 1\u201312 (2006). \nhttps:\/\/doi.org\/10.1109\/CLUSTR.2006.311846","DOI":"10.1109\/CLUSTR.2006.311846"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Binkert, N., Beckmann, B., Black, G., Reinhardt, S.K., Saidi, A., Basu, A., Hestness, J., Hower, D.R., Krishna, T., Sardashti, S., Sen, R., Sewell, K., Shoaib, M., Vaish, N., Hill, M.D., Wood, D.A.: The Gem5 simulator. SIGARCH Computer Architecture News (2011)","DOI":"10.1145\/2024716.2024718"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Bland, W.: User level failure mitigation in MPI. In: Euro-Par 2012: Parallel Processing Workshops - BDMC, CGWS, HeteroPar, HiBB, OMHI, Paraphrase, PROPER, Resilience, UCHPC, VHPC, Rhodes Islands, August 27\u201331, 2012. Revised Selected Papers, pp. 499\u2013504. Springer, Berlin (2012). \nhttps:\/\/doi.org\/10.1007\/978-3-642-36949-0_57","DOI":"10.1007\/978-3-642-36949-0_57"},{"key":"16_CR8","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1007\/978-3-642-33518-1_24","volume-title":"Recent Advances in the Message Passing Interface","author":"W Bland","year":"2012","unstructured":"Bland, W., Bouteiller, A., Herault, T., Hursey, J., Bosilca, G., Dongarra, J.J.: An evaluation of user-level failure mitigation support in MPI. In: Tr\u00e4ff, J.L., Benkner, S., Dongarra, J.J. (eds.) Recent Advances in the Message Passing Interface, pp. 193\u2013203. Springer, Berlin (2012)"},{"key":"16_CR9","unstructured":"Cavium: ThunderX_CP Family of Workload Optimized Compute Processors (2014). \nhttps:\/\/www.marvell.com\/content\/dam\/marvell\/en\/public-collateral\/server-processors\/marvell-server-processors-thunderx-cp-product-brief.pdf"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Culler, D., Karp, R., Patterson, D., Sahay, A., Schauser, K.E., Santos, E., Subramonian, R., Von Eicken, T.: LogP: towards a realistic model of parallel computation. In: Symposium on Principles and Practice of Parallel Programming, PPoPP, pp. 1\u201312. ACM, New York (1993). \nhttps:\/\/doi.org\/10.1145\/155332.155333","DOI":"10.1145\/155332.155333"},{"issue":"3","key":"16_CR11","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/j.future.2004.11.016","volume":"22","author":"JT Daly","year":"2006","unstructured":"Daly, J.T.: A higher order estimate of the optimum checkpoint interval for restart dumps. Future Gener. Comput. Syst. 22(3), 303\u2013312 (2006). \nhttps:\/\/doi.org\/10.1016\/j.future.2004.11.016","journal-title":"Future Gener. Comput. Syst."},{"issue":"2","key":"16_CR12","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/5992.988653","volume":"4","author":"K Devine","year":"2002","unstructured":"Devine, K., Boman, E., Heaphy, R., Hendrickson, B., Vaughan, C.: Zoltan data management services for parallel dynamic applications. Comput. Sci. Eng. 4(2), 90\u201397 (2002)","journal-title":"Comput. Sci. Eng."},{"issue":"7","key":"16_CR13","doi-asserted-by":"publisher","first-page":"789","DOI":"10.1016\/S0167-8191(99)00018-6","volume":"25","author":"R Diekmann","year":"1999","unstructured":"Diekmann, R., Frommer, A., Monien, B.: Efficient schemes for nearest neighbor load balancing. Parallel Comput. 25(7), 789\u2013812 (1999)","journal-title":"Parallel Comput."},{"issue":"12","key":"16_CR14","doi-asserted-by":"publisher","first-page":"1555","DOI":"10.1016\/S0167-8191(00)00043-0","volume":"26","author":"R Diekmann","year":"2000","unstructured":"Diekmann, R., Preis, R., Schlimbach, F., Walshaw, C.: Shape-optimized mesh partitioning and load balancing for parallel adaptive FEM. Parallel Comput. 26(12), 1555\u20131581 (2000)","journal-title":"Parallel Comput."},{"key":"16_CR15","unstructured":"Feinberg, A.: An 83,000-processor supercomputer can only match 1% of your brain (2013). \nhttp:\/\/gizmodo.com\/an-83-000-processor-supercomputer-only-matched-one-perc-1045026757"},{"key":"16_CR16","unstructured":"Ferreira, K.B., Bridges, P., Brightwell, R.: Characterizing application sensitivity to OS interference using Kernel-level noise injection. In: Proceedings of the 2008 ACM\/IEEE Conference on Supercomputing, SC\u201908, pp. 19:1\u201319:12. IEEE Press, Piscataway (2008). \nhttp:\/\/dl.acm.org\/citation.cfm?id=1413370.1413390"},{"key":"16_CR17","unstructured":"FFMK Website. \nhttp:\/\/ffmk.tudos.org\n\n. Accessed 5 Aug 2019"},{"key":"16_CR18","unstructured":"Forum, M.P.I.: MPI: a message-passing interface standard. Standard 3.1, University of Tennessee, Knoxville (2015)"},{"issue":"7","key":"16_CR19","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-016-5588-7","volume":"59","author":"H Fu","year":"2016","unstructured":"Fu, H., Liao, J., Yang, J., Wang, L., Song, Z., Huang, X., Yang, C., Xue, W., Liu, F., Qiao, F., Zhao, W., Yin, X., Hou, C., Zhang, C., Ge, W., Zhang, J., Wang, Y., Zhou, C., Yang, G.: The Sunway TaihuLight supercomputer: system and applications. Sci. China Inf. Sci. 59(7), 072001 (2016). \nhttps:\/\/doi.org\/10.1007\/s11432-016-5588-7","journal-title":"Sci. China Inf. Sci."},{"key":"16_CR20","doi-asserted-by":"publisher","unstructured":"Gerofi, B., Takagi, M., Hori, A., Nakamura, G., Shirasawa, T., Ishikawa, Y.: On the scalability, performance isolation and device driver transparency of the IHK\/McKernel hybrid lightweight kernel. In: 2016 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 1041\u20131050 (2016). \nhttps:\/\/doi.org\/10.1109\/IPDPS.2016.80","DOI":"10.1109\/IPDPS.2016.80"},{"key":"16_CR21","unstructured":"Gholami, M., Schintke, F.: Multilevel checkpoint\/restart for large computational jobs on distributed computing resources. In: 38th Symposium on Reliable Distributed Systems (SRDS\u201919) (2019)"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Gholami, M., Schintke, F., Sch\u00fctt, T.: Checkpoint scheduling for shared usage of burst-buffers in supercomputers. In: The 47th International Conference on Parallel Processing, ICPP 2018, Workshop Proceedings, Eugene, August 13\u201316, 2018, pp. 44:1\u201344:10. ACM, New York (2018). \nhttps:\/\/doi.org\/10.1145\/3229710.3229755","DOI":"10.1145\/3229710.3229755"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Giampapa, M., Gooding, T., Inglett, T., Wisniewski, R.W.: Experiences with a lightweight supercomputer Kernel: lessons learned from Blue Gene\u2019s CNK. In: Proceedings of the 2010 ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis, SC (2010). \nhttps:\/\/doi.org\/10.1109\/SC.2010.22","DOI":"10.1109\/SC.2010.22"},{"key":"16_CR24","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/978-3-540-30218-6_17","volume":"3241","author":"WD Gropp","year":"2004","unstructured":"Gropp, W.D., et al.: Providing efficient I\/O redundancy in MPI environments. In: Recent Advances in Parallel Virtual Machine and Message Passing Interface, 11th European PVM\/MPI Users\u2019 Group Meeting. Lecture Notes in Computer Science, vol. 3241, pp. 77\u201386 (2004). \nhttps:\/\/doi.org\/10.1007\/978-3-540-30218-6_17","journal-title":"Lecture Notes in Computer Science"},{"key":"16_CR25","unstructured":"Hille, M., Asmussen, N., Bhatotia, P., H\u00e4rtig, H.: SemperOS: A distributed capability system. In: 2019 USENIX Annual Technical Conference (ATC) (2019)"},{"key":"16_CR26","doi-asserted-by":"crossref","unstructured":"Hoefler, T., Schneider, T., Lumsdaine, A.: Characterizing the influence of system noise on large-scale applications by simulation. In: Proceedings of the 2010 ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis, SC\u201910. IEEE Computer Society, Washington (2010). \nhttps:\/\/doi.org\/10.1109\/SC.2010.12","DOI":"10.1109\/SC.2010.12"},{"key":"16_CR27","doi-asserted-by":"publisher","unstructured":"Hoefler, T., Barak, A., Shiloh, A., Drezner, Z.: Corrected gossip algorithms for fast reliable broadcast on unreliable systems. In: International Parallel and Distributed Processing Symposium, IPDPS, pp. 357\u2013366. IEEE Computer Society, Washington (2017). \nhttps:\/\/doi.org\/10.1109\/IPDPS.2017.36","DOI":"10.1109\/IPDPS.2017.36"},{"key":"16_CR28","doi-asserted-by":"publisher","unstructured":"IBM: Design of the IBM Blue Gene\/Q Compute chip. IBM J. Res. Develop. 57(1\/2), 1:1\u20131:13 (2013). \nhttps:\/\/doi.org\/10.1147\/JRD.2012.2222991","DOI":"10.1147\/JRD.2012.2222991"},{"key":"16_CR29","unstructured":"Intel: Intel xeon processor E5-1600\/E5-2600\/E5-4600 v2 product families (2014). \nhttps:\/\/www.intel.com\/content\/www\/us\/en\/processors\/xeon\/xeon-e5-1600-2600-vol-2-datasheet.html"},{"key":"16_CR30","unstructured":"Kelly, S.M., Brightwell, R.: Software architecture of the light weight kernel, Catamount. In: Cray User Group, pp. 16\u201319 (2005)"},{"key":"16_CR31","unstructured":"K\u00fcttler, M., Planeta, M., Bierbaum, J., Weinhold, C., H\u00e4 rtig, H., Barak, A., Hoefler, T.: Corrected trees for reliable group communication. In: Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming, PPoPP\u201919, pp. 287\u2013299. ACM, New York (2019). \nhttp:\/\/doi.acm.org\/10.1145\/3293883.3295721"},{"key":"16_CR32","first-page":"31","volume":"2016","author":"A Lackorzynski","year":"2016","unstructured":"Lackorzynski, A., Weinhold, C., H\u00e4rtig, H.: Combining predictable execution with full-featured commodity systems. In: Proceedings of OSPERT2016, the 12th Annual Workshop on Operating Systems Platforms for Embedded Real-Time Applications, OSPERT 2016, pp. 31\u201336 (2016)","journal-title":"OSPERT"},{"key":"16_CR33","doi-asserted-by":"crossref","unstructured":"Lackorzynski, A., Weinhold, C., H\u00e4rtig, H.: Decoupled: Low-effort noise-free execution on commodity system. In: Proceedings of the 6th International Workshop on Runtime and Operating Systems for Supercomputers, ROSS\u201916. ACM, New York (2016)","DOI":"10.1145\/2931088.2931095"},{"key":"16_CR34","first-page":"19","volume":"2017","author":"A Lackorzynski","year":"2017","unstructured":"Lackorzynski, A., Weinhold, C., H\u00e4rtig, H.: Predictable low-latency interrupt response with general-purpose systems. In: Proceedings of OSPERT2017, the 13th Annual Workshop on Operating Systems Platforms for Embedded Real-Time Applications, OSPERT 2017, pp. 19\u201324 (2017)","journal-title":"OSPERT"},{"key":"16_CR35","unstructured":"Lawrence Livermore National Laboratory: The FTQ\/FWQ benchmark. \nhttps:\/\/asc.llnl.gov\/sequoia\/benchmarks\/FTQ_summary_v1.1.pdf"},{"key":"16_CR36","doi-asserted-by":"crossref","unstructured":"Levy, E., Barak, A., Shiloh, A., Lieber, M., Weinhold, C., H\u00e4rtig, H.: Overhead of a decentralized gossip algorithm on the performance of HPC applications. In: Proceedings of ROSS\u201914, pp. 10:1\u201310:7. ACM, New York (2014)","DOI":"10.1145\/2612262.2612271"},{"key":"16_CR37","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1016\/j.future.2017.04.042","volume":"82","author":"M Lieber","year":"2018","unstructured":"Lieber, M., Nagel, W.E.: Highly scalable sfc-based dynamic load balancing and its application to atmospheric modeling. Future Gener. Comput. Syst. 82, 575\u2013590 (2018)","journal-title":"Future Gener. Comput. Syst."},{"key":"16_CR38","doi-asserted-by":"crossref","unstructured":"Lieber, M., Gr\u00fctzun, V., Wolke, R., M\u00fcller, M.S., Nagel, W.E.: Highly scalable dynamic load balancing in the atmospheric modeling system COSMO-SPECS+FD4. In: International Workshop on Applied Parallel Computing PARA 2010: Applied Parallel and Scientific Computing 2010. Lecture Notes in Computer Science, vol. 7133, pp. 131\u2013141. Springer, Berlin (2012)","DOI":"10.1007\/978-3-642-28151-8_13"},{"key":"16_CR39","doi-asserted-by":"crossref","unstructured":"Lieber, M., G\u00f6\u00dfner, K., Nagel, W.E.: The potential of diffusive load balancing at large scale. In: Proceedings of the 23rd European MPI Users\u2019 Group Meeting (EuroMPI 2016), pp. 154\u2013157 (2016)","DOI":"10.1145\/2966884.2966887"},{"key":"16_CR40","unstructured":"Liedtke, J.: On micro-kernel construction. In: SOSP\u201995: Proceedings of the Fifteenth ACM Symposium on Operating Systems Principles, pp. 237\u2013250. ACM Press, New York (1995). \nhttp:\/\/doi.acm.org\/10.1145\/224056.224075"},{"key":"16_CR41","doi-asserted-by":"publisher","unstructured":"Ligon, W.B., Ross, R.B.: Implementation and performance of a parallel file system for high performance distributed applications. In: Proceedings of 5th IEEE International Symposium on High Performance Distributed Computing (HPDC), pp. 471\u2013480 (1996). \nhttps:\/\/doi.org\/10.1109\/HPDC.1996.546218","DOI":"10.1109\/HPDC.1996.546218"},{"key":"16_CR42","doi-asserted-by":"publisher","unstructured":"Liu, N., et al.: On the role of burst buffers in leadership-class storage systems. In: Proceedings of the 2012 IEEE Conference on Massive Data Storage (MSST), pp. 1\u201311 (2012). \nhttps:\/\/doi.org\/10.1109\/MSST.2012.6232369","DOI":"10.1109\/MSST.2012.6232369"},{"key":"16_CR43","doi-asserted-by":"crossref","unstructured":"Margolin, A., Barak, A.: Tree-based fault-tolerant collective operations for MPI. In: Workshop on Exascale MPI (ExaMPI) (2018)","DOI":"10.1109\/ExaMPI49596.2019.00010"},{"key":"#cr-split#-16_CR44.1","doi-asserted-by":"crossref","unstructured":"Moody, A., Bronevetsky, G., Mohror, K., de Supinski, B.R.: Design, modeling, and evaluation of a scalable multi-level checkpointing system. In: 2010 International Conference for High Performance Computing, Networking, Storage and Analysis","DOI":"10.2172\/984082"},{"key":"#cr-split#-16_CR44.2","doi-asserted-by":"crossref","unstructured":"(SC) pp. 1-11 (2010).  https:\/\/doi.org\/10.1109\/SC.2010.18","DOI":"10.1109\/SC.2010.18"},{"key":"16_CR45","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s002240000092","volume":"31","author":"S Muthukrishnan","year":"1998","unstructured":"Muthukrishnan, S., Ghosh, B., Schultz, M.H.: First and second order diffusive methods for rapid, coarse, distributed load balancing. Theory Comput. Syst. 31, 331\u2013354 (1998)","journal-title":"Theory Comput. Syst."},{"key":"16_CR46","unstructured":"Nicolae, B., et al.: Veloc: Very low overhead checkpointing system. \nhttps:\/\/veloc.readthedocs.io\/en\/latest\/"},{"key":"16_CR47","unstructured":"Patterson, D.A., et al.: A case for redundant arrays of inexpensive disks (RAID). In: ACM SIGMOD Record, pp. 109\u2013116 (1988). \nhttp:\/\/doi.acm.org\/10.1145\/50202.50214"},{"key":"16_CR48","unstructured":"Pedretti, K.T., Levenhagen, M., Ferreira, K., Brightwell, R., Kelly, S., Bridges, P., Hudson, T.: LDRD final report: a lightweight operating system for multi-core capability class supercomputers. Technical report SAND2010-6232, Sandia National Laboratories (2010)"},{"key":"16_CR49","doi-asserted-by":"crossref","unstructured":"Petrini, F., Kerbyson, D., Pakin, S.: The case of the missing supercomputer performance: achieving optimal performance on the 8,192 processors of ASCI Q. In: Proceedings of the 15th Annual IEEE\/ACM International Conference for High Performance Computing, Networking, Storage and Anaylsis (SC\u201903) (2003)","DOI":"10.1145\/1048935.1050204"},{"issue":"6","key":"16_CR50","doi-asserted-by":"publisher","first-page":"793","DOI":"10.1002\/cpe.1361","volume":"21","author":"R Riesen","year":"2009","unstructured":"Riesen, R., Brightwell, R., Bridges, P.G., Hudson, T., Maccabe, A.B., Widener, P.M., Ferreira, K.: Designing and implementing lightweight kernels for capability computing. Concurrency and Computation: Practice and Experience 21(6), 793\u2013817 (2009). \nhttp:\/\/dx.doi.org\/10.1002\/cpe.v21:6","journal-title":"Concurrency and Computation: Practice and Experience"},{"key":"16_CR51","doi-asserted-by":"crossref","unstructured":"Riesen, R., Maccabe, A.B., Gerofi, B., Lombard, D.N., Lange, J.J., Pedretti, K., Ferreira, K., Lang, M., Keppel, P., Wisniewski, R.W., Brightwell, R., Inglett, T., Park, Y., Ishikawa, Y.: What is a lightweight kernel? In: Proceedings of the 5th International Workshop on Runtime and Operating Systems for Supercomputers, ROSS. ACM, New York (2015). \nhttps:\/\/doi.org\/10.1145\/2768405.2768414","DOI":"10.1145\/2768405.2768414"},{"key":"16_CR52","doi-asserted-by":"crossref","unstructured":"Schloegel, K., Karypis, G., Kumar, V.: A unified algorithm for load-balancing adaptive scientific simulations. In: Proceedings of the IEEE\/ACM SC2000 Conference, pp. 59\u201359 (2000)","DOI":"10.1109\/SC.2000.10035"},{"key":"16_CR53","doi-asserted-by":"crossref","unstructured":"Seelam, S., Fong, L., Tantawi, A., Lewars, J., Divirgilio, J., Gildea, K.: Extreme scale computing: modeling the impact of system noise in multicore clustered systems. In: 2010 IEEE International Symposium on Parallel Distributed Processing (IPDPS) (2010). \nhttps:\/\/doi.org\/10.1109\/IPDPS.2010.5470398","DOI":"10.1109\/IPDPS.2010.5470398"},{"key":"16_CR54","doi-asserted-by":"crossref","unstructured":"Shamis, P., Venkata, M.G., Lopez, M.G., Baker, M.B., Hernandez, O., Itigin, Y., Dubman, M., Shainer, G., Graham, R.L., Liss, L., Shahar, Y., Potluri, S., Rossetti, D., Becker, D., Poole, D., Lamb, C., Kumar, S., Stunkel, C., Bosilca, G., Bouteiller, A.: UCX: an open source framework for HPC network APIs and beyond. In: 2015 IEEE 23rd Annual Symposium on High-Performance Interconnects, pp. 40\u201343 (2015)","DOI":"10.1109\/HOTI.2015.13"},{"key":"16_CR55","doi-asserted-by":"crossref","unstructured":"Shimosawa, T., Gerofi, B., Takagi, M., Nakamura, G., Shirasawa, T., Saeki, Y., Shimizu, M., Hori, A., Ishikawa, Y.: Interface for Heterogeneous Kernels: a framework to enable hybrid OS designs targeting high performance computing on manycore architectures. In: 21th International Conference on High Performance Computing, HiPC (2014)","DOI":"10.1109\/HiPC.2014.7116885"},{"key":"16_CR56","doi-asserted-by":"publisher","unstructured":"Sodani, A.: Knights landing (KNL): 2nd generation intel xeon phi processor. In: 2015 IEEE Hot Chips 27 Symposium (HCS), pp. 1\u201324 (2015). \nhttps:\/\/doi.org\/10.1109\/HOTCHIPS.2015.7477467","DOI":"10.1109\/HOTCHIPS.2015.7477467"},{"key":"16_CR57","doi-asserted-by":"crossref","unstructured":"Teresco, J.D., Devine, K.D., Flaherty, J.E.: Partitioning and dynamic load balancing for the numerical solution of partial differential equations. In: Numerical Solution of Partial Differential Equations on Parallel Computers. Lecture Notes in Computational Science and Engineering, vol. 51, pp. 55\u201388. Springer, Berlin (2006)","DOI":"10.1007\/3-540-31619-1_2"},{"key":"16_CR58","doi-asserted-by":"crossref","unstructured":"Walshaw, C., Cross, M.: Jostle \u2013 multilevel graph partitioning software: an overview. In: Mesh Partitioning Techniques and Domain Decomposition Methods, chap. 2, pp. 27\u201358 (2007)","DOI":"10.4203\/csets.17.2"},{"key":"16_CR59","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1007\/978-3-319-40528-5_18","volume-title":"Software for Exascale Computing - SPPEXA 2013\u20132015","author":"C Weinhold","year":"2016","unstructured":"Weinhold, C., Lackorzynski, A., Bierbaum, J., K\u00fcttler, M., Planeta, M., H\u00e4rtig, H., Shiloh, A., Levy, E., Ben-Nun, T., Barak, A., Steinke, T., Sch\u00fctt, T., Fajerski, J., Reinefeld, A., Lieber, M., Nagel, W.E.: FFMK: a fast and fault-tolerant microkernel-based system for exascale computing. In: Bungartz, H.J., Neumann, P., Nagel, W.E. (eds.) Software for Exascale Computing - SPPEXA 2013\u20132015, pp. 405\u2013426. Springer, Cham (2016)"},{"key":"16_CR60","doi-asserted-by":"crossref","unstructured":"Weinhold, C., Lackorzynski, A., H\u00e4rtig, H.: FFMK: an HPC OS based on the L4Re Microkernel. In: R.W. Wisniewski, B. Gerofi, R. Riesen, Y. Ishikawa (eds.) Operating Systems for Supercomputers and High Performance Computing. Springer Singapore (2019)","DOI":"10.1007\/978-981-13-6624-6_19"},{"key":"16_CR61","doi-asserted-by":"publisher","first-page":"246","DOI":"10.1007\/978-3-319-92040-5_13","volume-title":"High Performance Computing","author":"H Weisbach","year":"2018","unstructured":"Weisbach, H., Gerofi, B., Kocoloski, B., H\u00e4rtig, H., Ishikawa, Y.: Hardware performance variation: a comparative study using lightweight kernels. In: Yokota, R., Weiland, M., Keyes, D., Trinitis, C. (eds.) High Performance Computing, pp. 246\u2013265. Springer, Cham (2018)"},{"key":"16_CR62","first-page":"13","volume":"2015","author":"F Wende","year":"2015","unstructured":"Wende, F., Steinke, T., Reinefeld, A.: The impact of process placement and oversubscription on application performance: a case study for exascale computing. In: Gray, A., Smith, L., Weiland, M. (eds.) Proceedings of the 3rd International Conference on Exascale Applications and Software, EASC 2015, pp. 13\u201318 (2015)","journal-title":"EASC"},{"key":"16_CR63","doi-asserted-by":"crossref","unstructured":"Wisniewski, R.W., Inglett, T., Keppel, P., Murty, R., Riesen, R.: mOS: an architecture for extreme-scale operating systems. In: Proceedings of the 4th International Workshop on Runtime and Operating Systems for Supercomputers (ROSS\u201914), pp. 2:1\u20132:8. ACM, New York (2014)","DOI":"10.1145\/2612262.2612263"},{"issue":"2","key":"16_CR64","doi-asserted-by":"publisher","first-page":"6","DOI":"10.1109\/MM.2015.11","volume":"35","author":"T Yoshida","year":"2015","unstructured":"Yoshida, T., Hondou, M., Tabata, T., Kan, R., Kiyota, N., Kojima, H., Hosoe, K., Okano, H.: Sparc64 XIfx: Fujitsu\u2019s next-generation processor for high-performance computing. IEEE Micro 35(2), 6\u201314 (2015). \nhttps:\/\/doi.org\/10.1109\/MM.2015.11","journal-title":"IEEE Micro"},{"issue":"9","key":"16_CR65","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1145\/361147.361115","volume":"17","author":"JW Young","year":"1974","unstructured":"Young, J.W.: A first order approximation to the optimal checkpoint interval. Commun. ACM 17(9), 530\u2013531 (1974). \nhttp:\/\/doi.acm.org\/10.1145\/361147.361115","journal-title":"Commun. ACM"}],"container-title":["Lecture Notes in Computational Science and Engineering","Software for Exascale Computing - SPPEXA 2016-2019"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-47956-5_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,7,30]],"date-time":"2020-07-30T17:34:58Z","timestamp":1596130498000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-47956-5_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030479558","9783030479565"],"references-count":66,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-47956-5_16","relation":{},"ISSN":["1439-7358","2197-7100"],"issn-type":[{"type":"print","value":"1439-7358"},{"type":"electronic","value":"2197-7100"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"31 July 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}