{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T16:38:16Z","timestamp":1758127096132,"version":"3.41.0"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2017,2,11]],"date-time":"2017-02-11T00:00:00Z","timestamp":1486771200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2017,2,11]],"date-time":"2017-02-11T00:00:00Z","timestamp":1486771200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100006192","name":"Advanced Scientific Computing Research","doi-asserted-by":"publisher","award":["DE-SC0006844"],"award-info":[{"award-number":["DE-SC0006844"]}],"id":[{"id":"10.13039\/100006192","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF-13-1-0219"],"award-info":[{"award-number":["W911NF-13-1-0219"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1007\/s10766-017-0492-3","type":"journal-article","created":{"date-parts":[[2017,2,11]],"date-time":"2017-02-11T10:20:23Z","timestamp":1486808423000},"page":"225-251","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["RedThreads: An Interface for Application-Level Fault Detection\/Correction Through Adaptive Redundant Multithreading"],"prefix":"10.1007","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2612-2001","authenticated-orcid":false,"given":"Saurabh","family":"Hukerikar","sequence":"first","affiliation":[]},{"given":"Keita","family":"Teranishi","sequence":"additional","affiliation":[]},{"given":"Pedro C.","family":"Diniz","sequence":"additional","affiliation":[]},{"given":"Robert F.","family":"Lucas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,11]]},"reference":[{"key":"492_CR1","unstructured":"Advanced configuration and power interface (ACPI). http:\/\/www.uefi.org\/acpi\/specs (2013)"},{"key":"492_CR2","unstructured":"Austin, T.M.: Diva: A reliable substrate for deep submicron microarchitecture design. In: Proceedings of the 32nd Annual ACM\/IEEE International Symposium on Microarchitecture, pp. 196\u2013207 (1999)"},{"key":"492_CR3","doi-asserted-by":"crossref","unstructured":"Bernick, D., Bruckert, B., Vigna, P.D., Garcia, D., Jardine, R., Klecka, J., Smullen, J.: Nonstopadvanced architecture. In: Proceedings of the 2005 International Conference on Dependable Systems and Networks, DSN \u201905, pp. 12\u201321 (2005)","DOI":"10.1109\/DSN.2005.70"},{"issue":"6","key":"492_CR4","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1109\/MM.2005.110","volume":"25","author":"S Borkar","year":"2005","unstructured":"Borkar, S.: Designing reliable systems from unreliable components: the challenges of transistor variability and degradation. IEEE Micro 25(6), 10\u201316 (2005)","journal-title":"IEEE Micro"},{"key":"492_CR5","doi-asserted-by":"crossref","unstructured":"Cheng, E., Mirkhani, S., Szafaryn, L.G., Cher, C.Y., Cho, H., Skadron, K., Stan, M.R., Lilja, K., Abraham, J.A., Bose, P., Mitra, S.: Clear: cross-layer exploration for architecting resilience\u2014combining hardware and software techniques to tolerate soft errors in processor cores. In: Proceedings of the 53rd Annual Design Automation Conference, DAC \u201916, pp. 68:1\u201368:6 (2016)","DOI":"10.1145\/2897937.2897996"},{"key":"492_CR6","unstructured":"Dongarra, J., Beckman, P., Moore, T., et al.: The international exascale software project roadmap. Int. J. High Perform. Comput. Appl. 3\u201360 (2011)"},{"key":"492_CR7","unstructured":"Elnozahy, E., Bianchini, R., El-Ghazawi, T., et al.: System resilience at extreme scale. White Paper. Tech. rep, DARPA (2009)"},{"key":"492_CR8","unstructured":"Engelmann, C., Ong, H.H., Scott, S.L.: The case for modular redundancy in large-scale high performance computing systems. In: Proceedings of the 27th IASTED International Conference on Parallel and Distributed Computing and Networks (PDCN), pp. 189\u2013194 (2009)"},{"key":"492_CR9","doi-asserted-by":"crossref","unstructured":"Ferreira, K., Stearley, J., Laros III, J.H., et al.: Evaluating the viability of process replication reliability for exascale systems. In: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312 (2011)","DOI":"10.1145\/2063384.2063443"},{"key":"492_CR10","doi-asserted-by":"crossref","unstructured":"Gomaa, M.A., Vijaykumar, T.N.: Opportunistic transient-fault detection. In: SIGARCH Computer Architecture News, pp. 172\u2013183 (2005)","DOI":"10.1145\/1080695.1069985"},{"key":"492_CR11","unstructured":"Hoemmen, M., Heroux, M.A.: Fault-tolerant iterative methods via selective reliability. In: Proceedings of the 2011 International Conference for High Performance Computing, Networking, Storage and Analysis (SC). IEEE Computer Society, vol. 3, p. 9 (2011)"},{"key":"492_CR12","doi-asserted-by":"publisher","unstructured":"Hukerikar, S., Diniz, P.C., Lucas, R.F., Teranishi, K.: Opportunistic application-level fault detection through adaptive redundant multithreading. In: International Conference on High Performance Computing Simulation (HPCS), pp. 243\u2013250 (2014). doi: 10.1109\/HPCSim.2014.6903692","DOI":"10.1109\/HPCSim.2014.6903692"},{"key":"492_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11227-016-1752-5","volume":"72","author":"S Hukerikar","year":"2016","unstructured":"Hukerikar, S., Lucas, R.F.: Rolex: resilience-oriented language extensions for extreme-scale systems. J. Supercomput. 72, 1\u201333 (2016). doi: 10.1007\/s11227-016-1752-5","journal-title":"J. Supercomput."},{"key":"492_CR14","doi-asserted-by":"publisher","unstructured":"Hukerikar, S., Teranishi, K., Diniz, P.C., Lucas, R.F.: An evaluation of lazy fault detection based on adaptive redundant multithreading. In: IEEE High Performance Extreme Computing Conference (HPEC), pp. 1\u20136 (2014) doi: 10.1109\/HPEC.2014.7040999","DOI":"10.1109\/HPEC.2014.7040999"},{"key":"492_CR15","unstructured":"Kogge, P., Bergman, K., Borkar, S., et al.: Exascale computing study: technology challenges in achieving exascale systems. Tech. rep, DARPA (2008)"},{"key":"492_CR16","doi-asserted-by":"crossref","unstructured":"Liao, C., Quinlan, D.J., Vuduc, R., Panas, T.: Effective source-to-source outlining to support whole program empirical optimization pp. 308\u2013322 (2010)","DOI":"10.1007\/978-3-642-13374-9_21"},{"key":"492_CR17","doi-asserted-by":"publisher","unstructured":"Lidman, J., Quinlan, D.J., Liao, C., McKee, S.A.: ROSE::FTTransform\u2014a source-to-source translation framework for exascale fault-tolerance research. In: Dependable Systems and Networks Workshops (DSN-W), 2012 IEEE\/IFIP 42nd International Conference on, pp. 1\u20136 (2012). doi: 10.1109\/DSNW.2012.6264672","DOI":"10.1109\/DSNW.2012.6264672"},{"key":"492_CR18","doi-asserted-by":"publisher","DOI":"10.1002\/0471739219","volume-title":"Error correction coding: mathematical methods and algorithms","author":"TK Moon","year":"2005","unstructured":"Moon, T.K.: Error correction coding: mathematical methods and algorithms. Wiley, New York (2005)"},{"key":"492_CR19","doi-asserted-by":"crossref","unstructured":"Mukherjee, S.S., Kontz, M., Reinhardt, S.K.: Detailed design and evaluation of redundant multithreading alternatives. In: SIGARCH Computer Architecture News, pp. 99\u2013110. Wiley-Interscience, Hoboken, N.J. (2002)","DOI":"10.1145\/545214.545227"},{"key":"492_CR20","doi-asserted-by":"crossref","unstructured":"Oh, N., Shirvani, P.P., McCluskey, E.J.: Error detection by duplicated instructions in super-scalar processors. IEEE Trans. Reliab. pp. 63\u201375 (2002)","DOI":"10.1109\/24.994913"},{"key":"492_CR21","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1145\/1168917.1168870","volume":"5","author":"A Parashar","year":"2006","unstructured":"Parashar, A., Sivasubramaniam, A., Gurumurthi, S.: Slick: Slice-based locality exploitation for efficient redundant multithreading. SIGOPS Oper. Syst. Rev. 5, 95\u2013105 (2006)","journal-title":"SIGOPS Oper. Syst. Rev."},{"key":"492_CR22","unstructured":"Quinlan, D., et al.: Rose Compiler (2000) http:\/\/www.rosecompiler.org"},{"key":"492_CR23","doi-asserted-by":"crossref","unstructured":"Reinhardt, S.K., Mukherjee, S.S.: Transient fault detection via simultaneous multithreading. In: Proceedings of the 27th Annual International Symposium on Computer Architecture, pp. 25\u201336 (2000)","DOI":"10.1145\/339647.339652"},{"key":"492_CR24","doi-asserted-by":"crossref","unstructured":"Reis, G., Chang, J., Vachharajani, N., et al.: SWIFT: software implemented fault tolerance. In: International Symposium on Code Generation and Optimization, pp. 243\u2013254 (2005)","DOI":"10.1109\/CGO.2005.34"},{"key":"492_CR25","doi-asserted-by":"crossref","unstructured":"Sao, P., Vuduc, R.: Self-stabilizing iterative solvers. In: Proceedings of the Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, ScalA \u201913, pp. 4:1\u20134:8 (2013)","DOI":"10.1145\/2530268.2530272"},{"issue":"2","key":"492_CR26","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1109\/TDSC.2008.62","volume":"6","author":"A Shye","year":"2009","unstructured":"Shye, A., Blomstedt, J., Moseley, T., Reddi, V.J., Connors, D.A.: Plr: a software approach to transient fault tolerance for multicore architectures. IEEE Trans. Dependable Secure Comput. 6(2), 135\u2013148 (2009)","journal-title":"IEEE Trans. Dependable Secure Comput."},{"key":"492_CR27","doi-asserted-by":"crossref","unstructured":"Siddiqua, T., Gurumurthi, S.: Balancing soft error coverage with lifetime reliability in redundantly multithreaded processors. In: 2009 IEEE International Symposium on Modeling, Analysis Simulation of Computer and Telecommunication Systems, pp. 1\u201312 (2009)","DOI":"10.1109\/MASCOT.2009.5363142"},{"key":"492_CR28","doi-asserted-by":"crossref","unstructured":"Slegel, T., Averill R.M., I., Check, M., et. al: IBM\u2019s S\/390 G5 Microprocessor Design. In: IEEE Micro, pp. 12\u201323 (1999)","DOI":"10.1109\/40.755464"},{"key":"492_CR29","unstructured":"Somers, J.: Stratus ftserver\u2013intel fault tolerant platform. Intel Developer Forum (2002)"},{"key":"492_CR30","doi-asserted-by":"crossref","unstructured":"Stearley, J., Ferreira, K., Robinson, D., et al.: Does partial replication pay off? In: IEEE\/IFIP 42nd International Conference on Dependable Systems and Networks Workshops (DSN-W) (2012)","DOI":"10.1109\/DSNW.2012.6264669"},{"key":"492_CR31","unstructured":"The Opportunities and Challenges of Exascale Computing. Tech. rep., Summary Report of the Advanced Scientific Computing Advisory Committee (ASCAC) Subcommittee (2010)"},{"key":"492_CR32","unstructured":"USC: Center for high-performance computing. https:\/\/hpcc.usc.edu\/"},{"key":"492_CR33","doi-asserted-by":"crossref","unstructured":"Vadlamani, R., Zhao, J., Burleson, W., Tessier, R.: Multicore soft error rate stabilization using adaptive dual modular redundancy. In: Proceedings of the Conference on Design, Automation and Test in Europe, DATE \u201910, pp. 27\u201332 (2010)","DOI":"10.1109\/DATE.2010.5457242"},{"key":"492_CR34","doi-asserted-by":"crossref","unstructured":"Vijaykumar, T., Pomeranz, I., Cheng, K.: Transient-fault recovery using simultaneous multithreading. In: 29th Annual International Symposium on Computer Architecture, pp. 87\u201398 (2002)","DOI":"10.1145\/545214.545226"},{"key":"492_CR35","doi-asserted-by":"crossref","unstructured":"von Neumann, J.: Probabilistic logics and the synthesis of reliable organisms from unreliable components. In Automata Studies, pp. 43\u201398. ACM, New York, NY (1956)","DOI":"10.1515\/9781400882618-003"},{"key":"492_CR36","doi-asserted-by":"publisher","unstructured":"Wang, C., Kim, H., Wu, Y., Ying, V.: Compiler-managed software-based redundant multi-threading for transient fault detection. In: International Symposium on Code Generation and Optimization, pp. 244\u2013258 (2007). doi: 10.1109\/CGO.2007.7","DOI":"10.1109\/CGO.2007.7"},{"key":"492_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lee, J.W., Johnson, N.P., August, D.I.: DAFT: Decoupled acyclic fault tolerance. In: Proceedings of the 19th International Conference on Parallel Architectures and Compilation Techniques, PACT \u201910, pp. 87\u201398 (2010)","DOI":"10.1145\/1854273.1854289"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-017-0492-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-017-0492-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-017-0492-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,15]],"date-time":"2025-06-15T08:15:18Z","timestamp":1749975318000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-017-0492-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,11]]},"references-count":37,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2018,4]]}},"alternative-id":["492"],"URL":"https:\/\/doi.org\/10.1007\/s10766-017-0492-3","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"type":"print","value":"0885-7458"},{"type":"electronic","value":"1573-7640"}],"subject":[],"published":{"date-parts":[[2017,2,11]]},"assertion":[{"value":"7 June 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 January 2017","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 February 2017","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}