{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,1,27]],"date-time":"2024-01-27T09:33:42Z","timestamp":1706348022118},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2014,5,29]],"date-time":"2014-05-29T00:00:00Z","timestamp":1401321600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2016,4]]},"DOI":"10.1007\/s10766-014-0312-y","type":"journal-article","created":{"date-parts":[[2014,5,28]],"date-time":"2014-05-28T12:25:39Z","timestamp":1401279939000},"page":"208-232","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Architectural Support for Fault Tolerance in a Teradevice Dataflow System"],"prefix":"10.1007","volume":"44","author":[{"given":"Sebastian","family":"Weis","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Arne","family":"Garbade","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bernhard","family":"Fechner","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Avi","family":"Mendelson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roberto","family":"Giorgi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Theo","family":"Ungerer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,5,29]]},"reference":[{"key":"312_CR1","unstructured":"International Technology Roadmap for Semiconductors 2011 Edition. Website. http:\/\/www.itrs.net"},{"key":"312_CR2","doi-asserted-by":"crossref","unstructured":"Agarwal, R., Garg, P., Torrellas, J.: Rebound: scalable checkpointing for coherent shared memory. In: International Symposium on Computer Architecture (ISCA), pp. 153\u2013164. IEEE (2011)","DOI":"10.1145\/2000064.2000083"},{"key":"312_CR3","unstructured":"AMD Inc.: AMD64 Architecture Programmer\u2019s Manual Volume 2: System Programming (2006)"},{"key":"312_CR4","unstructured":"Arandi, S., Kyriacou, C., George, M., George, M., Masrujeh, N., Trancoso, P., Evripidou, S., Giorgi, R., Zhibin, Y., Collange, S., Scionti, A., Khan, B., Khan, S., Lujan, M., Watson, I., Etsion, Y., Ungerer, T., Fechner, B., Garbade, A., Weis, S.: D6.2-advanced teraflux architecture. Public deliverable, The TERAFLUX Project (FP7\/2007-2013 Grant Agreement No. 249013) (2011)"},{"key":"312_CR5","doi-asserted-by":"crossref","unstructured":"Argollo, E., Falc\u00f3n, A., Faraboschi, P., Monchiero, M., Ortega, D.: COTSon: infrastructure for full system simulation. ACM SIGOPS Oper. Syst. Rev. 43(1), 52\u201361 (2009)","DOI":"10.1145\/1496909.1496921"},{"key":"312_CR6","doi-asserted-by":"crossref","unstructured":"Austin, T.: DIVA: a reliable substrate for deep submicron microarchitecture design. In: International Symposium on Microarchitecture (MICRO), pp. 196\u2013207. IEEE (1999)","DOI":"10.1109\/MICRO.1999.809458"},{"key":"312_CR7","doi-asserted-by":"crossref","unstructured":"Bell, S., et al.: TILE64-processor: a 64-core soc with mesh interconnect. In: International Solid-State Circuits Conference (ISSCC). Digest of Technical Papers, pp. 88\u201389. IEEE (2008)","DOI":"10.1109\/ISSCC.2008.4523070"},{"key":"312_CR8","doi-asserted-by":"crossref","unstructured":"Bernick, D., Bruckert, B., Vigna, P., Garcia, D., Jardine, R., Klecka, J., Smullen, J.: Nonstop advanced architecture. In: International Conference on Dependable Systems and Networks (DSN), pp. 12\u201321. IEEE (2005)","DOI":"10.1109\/DSN.2005.70"},{"issue":"6","key":"312_CR9","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1109\/MM.2005.110","volume":"25","author":"S Borkar","year":"2005","unstructured":"Borkar, S.: Designing reliable systems from unreliable components: the challenges of transistor variability and degradation. IEEE Micro 25(6), 10\u201316 (2005)","journal-title":"IEEE Micro"},{"key":"312_CR10","doi-asserted-by":"crossref","unstructured":"Borkar, S.: Thousand core chips: a technology perspective. In: Annual Design Automation Conference (DAC), pp. 746\u2013749. ACM (2007)","DOI":"10.1145\/1278480.1278667"},{"key":"312_CR11","doi-asserted-by":"crossref","unstructured":"Elnozahy, E.N.M., Alvisi, L., Wang, Y.M., Johnson, D.B.: A survey of rollback-recovery protocols in message-passing systems. ACM Comput. Surv. 34(3), 375\u2013408 (2002)","DOI":"10.1145\/568522.568525"},{"key":"312_CR12","doi-asserted-by":"crossref","unstructured":"Etsion, Y., Cabarcas, F., Rico, A., Ramirez, A., Badia, R. M., Ayguade, E., Labarta, J., Valero, M.: Task superscalar: an out-of-order task pipeline. In: International Symposium on Microarchitecture (MICRO), pp. 89\u2013100. IEEE (2010)","DOI":"10.1109\/MICRO.2010.13"},{"key":"312_CR13","doi-asserted-by":"crossref","unstructured":"Gautier, T., Besseron, X., Pigeon, L.: KAAPI: a thread scheduling runtime system for data flow computations on cluster of multi-processors. In: International Workshop on Parallel Symbolic Computation (PASCO), pp. 15\u201323. ACM (2007)","DOI":"10.1145\/1278177.1278182"},{"key":"312_CR14","doi-asserted-by":"crossref","unstructured":"Giorgi, R.: TERAFLUX: exploiting dataflow parallelism in teradevices. In: International Conference on Computing Frontiers (CF), pp. 303\u2013304. ACM (2012)","DOI":"10.1145\/2212908.2212959"},{"key":"312_CR15","doi-asserted-by":"crossref","unstructured":"Giorgi, R., Popovic, Z., Puzovic, N.: DTA-C: a decoupled multi-threaded architecture for CMP systems. In: International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD), pp. 263\u2013270. IEEE (2007)","DOI":"10.1109\/SBAC-PAD.2007.27"},{"key":"312_CR16","doi-asserted-by":"crossref","unstructured":"Giorgi, R., Popovic, Z., Puzovic, N.: Implementing fine\/medium grained TLP support in a many-core architecture. In: Bertels, K., Dimopoulos, N., Silvano, C., Wong, S. (eds.) Embedded Computer Systems: Architectures, Modeling, and Simulation, Lecture Notes in Computer Science (LNCS), vol. 5657, pp. 78\u201387. Springer (2009)","DOI":"10.1007\/978-3-642-03138-0_9"},{"key":"312_CR17","doi-asserted-by":"crossref","unstructured":"Gupta, G., Sohi, G.S.: Dataflow execution of sequential imperative programs on multicore architectures. In: International Symposium on Microarchitecture (MICRO), pp. 59\u201370. ACM (2011)","DOI":"10.1145\/2155620.2155628"},{"key":"312_CR18","doi-asserted-by":"crossref","unstructured":"Hammond, L., Wong, V., Chen, M., Carlstrom, B.D., Davis, J.D., Hertzberg, B., Prabhu, M.K., Wijaya, H., Kozyrakis, C., Olukotun, K.: Transactional memory coherence and consistency. In: International Symposium on Computer Architecture (ISCA), pp. 102\u2013113. IEEE (2004)","DOI":"10.1109\/ISCA.2004.1310767"},{"key":"312_CR19","doi-asserted-by":"crossref","unstructured":"Howard, J., et al.: A 48-core ia-32 message-passing processor with dvfs in 45nm CMOS. In: International Solid-State Circuits Conference (ISSCC). Digest of Technical Papers, pp. 108\u2013109. IEEE (2010)","DOI":"10.1109\/ISSCC.2010.5434077"},{"key":"312_CR20","unstructured":"Hum, H.H.J., Maquelin, O., Theobald, K.B., Tian, X., Tang, X., Gao, G.R., Cupryk, P., Elmasri, N., Hendren, L.J., Jimenez, A., Krishnan, S., Marquez, A., Merali, S., Nemawarkar, S.S., Panangaden, P., Xue, X., Zhu, Y.: A design study of the EARTH multiprocessor. In: International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 59\u201368. IFIP Working Group (1995)"},{"issue":"6","key":"312_CR21","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/MM.2005.119","volume":"25","author":"R Iyer","year":"2005","unstructured":"Iyer, R., Nakka, N., Kalbarczyk, Z., Mitra, S.: Recent advances and new avenues in hardware-level reliability support. IEEE Micro 25(6), 18\u201329 (2005)","journal-title":"IEEE Micro"},{"key":"312_CR22","doi-asserted-by":"crossref","unstructured":"Jafar, S., Gautier, T., Krings, A., louis Roch, J.: A checkpoint\/recovery model for heterogeneous dataflow computations using work-stealing. In: Cunha, J.C., Medeiros P.D. (eds.) Euro-Par 2005 Parallel Processing, Lecture Notes in Computer Science (LNCS), vol. 3648, pp. 675\u2013684. Springer, Berlin, Heidelberg (2005)","DOI":"10.1007\/11549468_74"},{"key":"312_CR23","doi-asserted-by":"crossref","unstructured":"Kelm, J.H., Johnson, D.R., Johnson, M.R., Crago, N.C., Tuohy, W., Mahesri, A., Lumetta, S.S., Frank, M.I., Patel, S.J.: Rigel: an architecture and scalable programming interface for a 1000-core accelerator. In: International Symposium on Computer Architecture (ISCA), pp. 140\u2013151. IEEE (2009)","DOI":"10.1145\/1555754.1555774"},{"issue":"1","key":"312_CR24","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1109\/MC.2003.1160055","volume":"36","author":"J Kephart","year":"2003","unstructured":"Kephart, J., Chess, D.: The vision of autonomic computing. Computer 36(1), 41\u201350 (2003)","journal-title":"Computer"},{"key":"312_CR25","doi-asserted-by":"crossref","unstructured":"LaFrieda, C., Ipek, E., Martinez, J., Manohar, R.: Utilizing dynamically coupled cores to form a resilient chip multiprocessor. In: International Conference on Dependable Systems and Networks (DSN), pp. 317\u2013326. IEEE (2007)","DOI":"10.1109\/DSN.2007.100"},{"issue":"8","key":"312_CR26","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1109\/2.303620","volume":"27","author":"B Lee","year":"1994","unstructured":"Lee, B., Hurson, A.R.: Dataflow architectures and multithreading. Computer 27(8), 27\u201339 (1994)","journal-title":"Computer"},{"key":"312_CR27","doi-asserted-by":"crossref","unstructured":"Li, F., Pop, A., Cohen, A.: Automatic extraction of coarse-grained data-flow threads from imperative programs. IEEE Micro 32(4), 19\u201331 (2012)","DOI":"10.1109\/MM.2012.49"},{"key":"312_CR28","doi-asserted-by":"crossref","unstructured":"Mukherjee, S.S., Kontz, M., Reinhardt, S.K.: Detailed design and evaluation of redundant multithreading alternatives In: International Symposium on Computer Architecture (ISCA), pp. 99\u2013110. IEEE (2002)","DOI":"10.1145\/545214.545227"},{"key":"312_CR29","doi-asserted-by":"crossref","unstructured":"Nguyen-tuong, A., Grimshaw, A.S., Hyett, M.: Exploiting data-flow for fault-tolerance in a wide-area parallel system. In: International Symposium on Reliable and Distributed Systems, pp. 1\u201311 (1996)","DOI":"10.1109\/RELDIS.1996.559687"},{"key":"312_CR30","doi-asserted-by":"crossref","unstructured":"Prvulovic, M., Zhang, Z., Torrellas, J.: Revive: cost-effective architectural support for rollback recovery in shared-memory multiprocessors. In: International Symposium on Computer Architecture (ISCA), pp. 111\u2013122. IEEE (2002)","DOI":"10.1145\/545214.545228"},{"key":"312_CR31","doi-asserted-by":"crossref","unstructured":"Rashid, M., Huang, M.: Supporting highly-decoupled thread-level redundancy for parallel programs. In: International Symposium on High Performance Computer Architecture (HPCA), pp. 393\u2013404. IEEE (2008)","DOI":"10.1109\/HPCA.2008.4658655"},{"key":"312_CR32","doi-asserted-by":"crossref","unstructured":"Ray, J., Hoe, J.C., Falsafi, B.: Dual use of superscalar datapath for transient-fault detection and recovery. In: International Symposium on Microarchitecture (MICRO), pp. 214\u2013224. IEEE (2001)","DOI":"10.1109\/MICRO.2001.991120"},{"key":"312_CR33","doi-asserted-by":"crossref","unstructured":"Reinhardt, S.K., Mukherjee, S.S.: Transient fault detection via simultaneous multithreading. In: International Symposium on Computer Architecture (ISCA), pp. 25\u201336. ACM (2000)","DOI":"10.1145\/339647.339652"},{"key":"312_CR34","doi-asserted-by":"crossref","unstructured":"Rotenberg, E.: AR-SMT: a microarchitectural approach to fault tolerance in microprocessors. In: Twenty-Ninth Annual International Symposium on Fault-Tolerant Computing, 1999. Digest of Papers, pp. 84-91 (1999)","DOI":"10.1109\/FTCS.1999.781037"},{"key":"312_CR35","unstructured":"S\u00e1nchez, D., Arag\u00f3n, J., Garc\u00eda, J.: Evaluating dynamic core coupling in a scalable tiled-cmp architecture. In: International Workshop on Duplicating, Deconstructing, and Debunking (WDDD) (2008)"},{"key":"312_CR36","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez, D., Arag\u00f3n, J., Garc\u00eda, J.: Extending SRT for parallel applications in tiled-CMP architectures. In: International Symposium on Parallel and Distributed Processing (IPDPS), pp. 1\u20138. IEEE (2009)","DOI":"10.1109\/IPDPS.2009.5160902"},{"key":"312_CR37","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez, D., Arag\u00f3n, J.L., Garc\u00eda, J.M.: REPAS: Reliable Execution for Parallel Applications in Tiled-CMPs. In: Sips, H., Epema, D., Lin, H.X. (eds.) International Euro-Par Conference on Parallel Processing, Lecture Notes in Computer Science (LNCS), vol. 5704, pp. 321\u2013333. Springer, Berlin, Heidelberg (2009)","DOI":"10.1007\/978-3-642-03869-3_32"},{"key":"312_CR38","doi-asserted-by":"crossref","unstructured":"S\u00e1nchez, D., Arag\u00f3n, J. L., Garc\u00eda, J.M.: A log-based redundant architecture for reliable parallel computation. In: International Conference on High Performance Computing (HiPC), pp. 1\u201310. IEEE (2010)","DOI":"10.1109\/HIPC.2010.5713183"},{"key":"312_CR39","doi-asserted-by":"crossref","unstructured":"Slegel, T., Averill, R.M.I., Check, M., Giamei, B., Krumm, B., Krygowski, C., Li, W., Liptay, J., Macdougall, J., McPherson, T., Navarro, J., Schwarz, E., Shum, K., Webb, C.: IBM\u2019s S\/390 G5 microprocessor design. IEEE Micro 19(2), 12\u201323 (1999)","DOI":"10.1109\/40.755464"},{"key":"312_CR40","doi-asserted-by":"crossref","unstructured":"Smolens, J.C., Gold, B.T., Falsafi, B., Hoe, J.C.: Reunion: complexity-effective multicore redundancy. In: International Symposium on Microarchitecture (MICRO), pp. 223\u2013234. IEEE (2006)","DOI":"10.1109\/MICRO.2006.42"},{"key":"312_CR41","doi-asserted-by":"crossref","unstructured":"Smolens, J.C., Gold, B.T., Kim, J., Falsafi, B., Hoe, J.C., Nowatzyk, A.G.: Fingerprinting: bounding soft-error detection latency and bandwidth. In: International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS), pp. 224\u2013234. IEEE (2004)","DOI":"10.1145\/1037949.1024420"},{"key":"312_CR42","doi-asserted-by":"crossref","unstructured":"Sorin, D.J., Martin, M.M.K., Hill, M.D., Wood, D.A.: Safetynet: improving the availability of shared memory multiprocessors with global checkpoint\/recovery. In: International Symposium on Computer Architecture (ISCA), pp. 123\u2013134. IEEE (2002)","DOI":"10.1145\/545214.545229"},{"key":"312_CR43","doi-asserted-by":"crossref","unstructured":"Srinivasan, J., Adve, S.V., Bose, P., Rivers, J.A.: The impact of technology scaling on lifetime reliability. In: International Conference on Dependable Systems and Networks (DSN), pp. 177\u2013186. IEEE (2004)","DOI":"10.1109\/DSN.2004.1311888"},{"key":"312_CR44","doi-asserted-by":"crossref","unstructured":"Stavrou, K., Evripidou, P., Trancoso, P.: DDM-CMP: Data-Driven Multithreading on a Chip Multiprocessor. In: H\u00e4m\u00e4l\u00e4inen, T.D., Pimentel, A.D., Takala, J., Vassiliadis, S. (eds.) Embedded Computer Systems: Architectures, Modeling, and Simulation, Lecture Notes in Computer Science (LNCS), vol. 3553, pp. 364\u2013373. Springer, Berlin, Heidelberg (2005)","DOI":"10.1007\/11512622_39"},{"key":"312_CR45","unstructured":"Weis, S., Garbade, A., Schlingmann, S., Ungerer, T.: Towards fault detection units as an autonomous fault detection approach for future many-cores. In: ARCS 2011 Workshop Proceedings, pp. 20\u201323. VDE (2011)"},{"key":"312_CR46","doi-asserted-by":"crossref","unstructured":"Weis, S., Garbade, A., Wolf, J., Fechner, B., Mendelson, A., Giorgi, R., Ungerer, T.: A fault detection and recovery architecture for a teradevice dataflow system. In: International Workshop on Data-Flow Execution Models for Extreme Scale Computing (DFM), pp. 38\u201344. IEEE (2011)","DOI":"10.1109\/DFM.2011.9"},{"key":"312_CR47","doi-asserted-by":"crossref","unstructured":"Wittenbrink, C., Kilgariff, E., Prabhu, A.: Fermi GF100 GPU architecture. IEEE Micro 31(2), 50\u201359 (2011)","DOI":"10.1109\/MM.2011.24"},{"key":"312_CR48","doi-asserted-by":"crossref","unstructured":"Yeh, Y.: Triple-triple redundant 777 primary flight computer. In: Proceedings of the Aerospace Applications Conference, pp. 293\u2013307. IEEE (1996)","DOI":"10.1109\/AERO.1996.495891"},{"key":"312_CR49","doi-asserted-by":"crossref","unstructured":"Zuckerman, S., Suetterlein, J., Knauerhase, R., Gao, G.R.: Using a \u201ccodelet\u201d program execution model for exascale machines: position paper. In: Proceedings of the International Workshop on Adaptive Self-Tuning Computing Systems for the Exaflop Era (EXADAPT), pp. 64\u201369. ACM (2011)","DOI":"10.1145\/2000417.2000424"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-014-0312-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-014-0312-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-014-0312-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,13]],"date-time":"2023-07-13T18:51:58Z","timestamp":1689274318000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-014-0312-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,5,29]]},"references-count":49,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2016,4]]}},"alternative-id":["312"],"URL":"https:\/\/doi.org\/10.1007\/s10766-014-0312-y","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"value":"0885-7458","type":"print"},{"value":"1573-7640","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,5,29]]}}}