{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T07:31:14Z","timestamp":1781076674277,"version":"3.54.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T00:00:00Z","timestamp":1692316800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T00:00:00Z","timestamp":1692316800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","award":["308959\/2020-5"],"award-info":[{"award-number":["308959\/2020-5"]}],"id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001807","name":"Funda\u00e7\u00e3o de Amparo \u00e1 Pesquisa do Estado de S\u00e3o Paulo","doi-asserted-by":"publisher","award":["2021\/06923-0"],"award-info":[{"award-number":["2021\/06923-0"]}],"id":[{"id":"10.13039\/501100001807","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s00607-023-01211-8","type":"journal-article","created":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T05:01:38Z","timestamp":1692334898000},"page":"2821-2845","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["The missing piece: a distributed system-level diagnosis model for the implementation of unreliable failure detectors"],"prefix":"10.1007","volume":"105","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8916-3302","authenticated-orcid":false,"suffix":"Jr.","given":"Elias P.","family":"Duarte","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9516-1282","authenticated-orcid":false,"given":"Luiz A.","family":"Rodrigues","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6520-9142","authenticated-orcid":false,"given":"Edson T.","family":"Camargo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5242-5057","authenticated-orcid":false,"given":"Rog\u00e9rio C.","family":"Turchetti","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,8,18]]},"reference":[{"key":"1211_CR1","unstructured":"NYT: gone in minutes, out for hours: outage shakes facebook (2021) https:\/\/www.nytimes.com\/2021\/10\/04\/technology\/facebook-down.html"},{"key":"1211_CR2","unstructured":"Codestone: the true impact of IT failures (2017) https:\/\/www.codestone.net\/our-thoughts\/true-impact-of-it-failures"},{"key":"1211_CR3","first-page":"43","volume-title":"Probabilistic logics and the synthesis of reliable organisms from unreliable components","author":"J Neumann","year":"1956","unstructured":"Neumann J, Shannon CE, McCarthy J (1956) Probabilistic logics and the synthesis of reliable organisms from unreliable components. Princeton University Press, Princeton, pp 43\u201398"},{"issue":"1","key":"1211_CR4","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1109\/TDSC.2004.2","volume":"1","author":"A Avizienis","year":"2004","unstructured":"Avizienis A, Laprie J-C, Randell B, Landwehr C (2004) Basic concepts and taxonomy of dependable and secure computing. IEEE Trans Dep Secure Comput 1(1):11\u201333. https:\/\/doi.org\/10.1109\/TDSC.2004.2","journal-title":"IEEE Trans Dep Secure Comput"},{"key":"1211_CR5","unstructured":"Beyer B, Jones C, Petoff J, Murphy NR (2016) Site reliability engineering: how Google runs production systems. O\u2019Reilly, Sebastopol, United States http:\/\/landing.google.com\/sre\/book.html"},{"issue":"4","key":"1211_CR6","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1109\/MPDT.1996.7102341","volume":"4","author":"NK Jha","year":"1996","unstructured":"Jha NK (1996) Fault-tolerant computer system design. IEEE Parallel Distrib Technol Syst Appl 4(4):84\u201384. https:\/\/doi.org\/10.1109\/MPDT.1996.7102341","journal-title":"IEEE Parallel Distrib Technol Syst Appl"},{"key":"1211_CR7","doi-asserted-by":"publisher","unstructured":"Duarte\u00a0Jr EP, Santini R, Cohen J (2004) Delivering packets during the routing convergence latency interval through highly connected detours. In: DSN, pp 495\u2013504. https:\/\/doi.org\/10.1109\/DSN.2004.1311919","DOI":"10.1109\/DSN.2004.1311919"},{"issue":"1","key":"1211_CR8","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1145\/1052796.1052806","volume":"36","author":"M Reynal","year":"2005","unstructured":"Reynal M (2005) A short introduction to failure detectors for asynchronous distributed systems. SIGACT News 36(1):53\u201370. https:\/\/doi.org\/10.1145\/1052796.1052806","journal-title":"SIGACT News"},{"issue":"6","key":"1211_CR9","doi-asserted-by":"publisher","first-page":"848","DOI":"10.1109\/PGEC.1967.264748","volume":"16","author":"FP Preparata","year":"1967","unstructured":"Preparata FP, Metze G, Chien RT (1967) On the connection assignment problem of diagnosable systems. IEEE Trans Electron Comput 16(6):848\u2013854. https:\/\/doi.org\/10.1109\/PGEC.1967.264748","journal-title":"IEEE Trans Electron Comput"},{"key":"1211_CR10","first-page":"478","volume-title":"System diagnosis","author":"GM Masson","year":"1996","unstructured":"Masson GM, Blough DM, Sullivan GF, Pradhan DK (1996) System diagnosis. Prentice-Hall Inc, USA, pp 478\u2013536"},{"key":"1211_CR11","doi-asserted-by":"publisher","DOI":"10.1145\/1922649.1922659","author":"EP Duarte","year":"2011","unstructured":"Duarte EP, Ziwich RP, Albini LCP (2011) A survey of comparison-based system-level diagnosis. ACM Comput Surv. https:\/\/doi.org\/10.1145\/1922649.1922659","journal-title":"ACM Comput Surv"},{"issue":"2","key":"1211_CR12","doi-asserted-by":"publisher","first-page":"374","DOI":"10.1145\/3149.214121","volume":"32","author":"MJ Fischer","year":"1985","unstructured":"Fischer MJ, Lynch NA (1985) Impossibility of distributed consensus with one faulty process. J ACM 32(2):374\u2013382. https:\/\/doi.org\/10.1145\/3149.214121","journal-title":"J ACM"},{"issue":"2","key":"1211_CR13","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1145\/226643.226647","volume":"43","author":"TD Chandra","year":"1996","unstructured":"Chandra TD, Toueg S (1996) Unreliable failure detectors for reliable distributed systems. J ACM 43(2):225\u2013267. https:\/\/doi.org\/10.1145\/226643.226647","journal-title":"J ACM"},{"key":"1211_CR14","doi-asserted-by":"publisher","unstructured":"Bertier M, Marin O, Sens P (2002) Implementation and performance evaluation of an adaptable failure detector. In: DSN, pp 354\u2013363. https:\/\/doi.org\/10.1109\/DSN.2002.1028920","DOI":"10.1109\/DSN.2002.1028920"},{"issue":"1","key":"1211_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13174-016-0051-y","volume":"7","author":"RC Turchetti","year":"2016","unstructured":"Turchetti RC, Duarte EP, Arantes L, Sens P (2016) A QoS-configurable failure detection service for internet applications. J Internet Serv Appl (JISA) 7(1):1\u201314. https:\/\/doi.org\/10.1186\/s13174-016-0051-y","journal-title":"J Internet Serv Appl (JISA)"},{"issue":"6","key":"1211_CR16","doi-asserted-by":"publisher","first-page":"1988","DOI":"10.1002\/nem.1988","volume":"27","author":"RC Turchetti","year":"2017","unstructured":"Turchetti RC, Duarte EP (2017) NFV-FD: implementation of a failure detector using network virtualization technology. Int J Netw Manag 27(6):1988. https:\/\/doi.org\/10.1002\/nem.1988","journal-title":"Int J Netw Manag"},{"key":"1211_CR17","doi-asserted-by":"publisher","unstructured":"Gupta I, Chandra TD, Goldszmidt GS (2001) On scalable and efficient distributed failure detectors. In: 20th PODCP, ACM, New York, pp 170\u2013179 https:\/\/doi.org\/10.1145\/383962.384010","DOI":"10.1145\/383962.384010"},{"issue":"1","key":"1211_CR18","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1109\/T-C.1974.223782","volume":"23","author":"SL Hakimi","year":"1974","unstructured":"Hakimi SL, Amin AT (1974) Characterization of connection assignment of diagnosable systems. IEEE Trans Comput 23(1):86\u201388. https:\/\/doi.org\/10.1109\/T-C.1974.223782","journal-title":"IEEE Trans Comput"},{"issue":"3","key":"1211_CR19","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1109\/TC.1984.1676420","volume":"33","author":"N Hakimi","year":"1984","unstructured":"Hakimi N (1984) On adaptive system diagnosis. IEEE Trans Comput 33(3):234\u2013240. https:\/\/doi.org\/10.1109\/TC.1984.1676420","journal-title":"IEEE Trans Comput"},{"key":"1211_CR20","doi-asserted-by":"publisher","unstructured":"Hosseini, Kuhl, Reddy (1984) A diagnosis algorithm for distributed computing systems with dynamic failure and repair. IEEE Trans Comput 33(3):223\u2013233. https:\/\/doi.org\/10.1109\/TC.1984.1676419","DOI":"10.1109\/TC.1984.1676419"},{"issue":"5","key":"1211_CR21","doi-asserted-by":"publisher","first-page":"616","DOI":"10.1109\/12.142688","volume":"41","author":"RP Bianchini","year":"1992","unstructured":"Bianchini RP, Buskens RW (1992) Implementation of online distributed system-level diagnosis theory. IEEE Trans Comput 41(5):616\u2013626. https:\/\/doi.org\/10.1109\/12.142688","journal-title":"IEEE Trans Comput"},{"issue":"1","key":"1211_CR22","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1109\/12.656078","volume":"47","author":"EP Duarte","year":"1998","unstructured":"Duarte EP, Nanya T (1998) A hierarchical adaptive distributed system-level diagnosis algorithm. IEEE Trans Comput 47(1):34\u201345. https:\/\/doi.org\/10.1109\/12.656078","journal-title":"IEEE Trans Comput"},{"key":"1211_CR23","doi-asserted-by":"publisher","unstructured":"Duarte EP, De\u00a0Bona LCE (2002) A dependable snmp-based tool for distributed network management. In: DSN, IEEE, pp 279\u2013284. https:\/\/doi.org\/10.1109\/DSN.2002.1028911","DOI":"10.1109\/DSN.2002.1028911"},{"key":"1211_CR24","doi-asserted-by":"publisher","unstructured":"Duarte EP, Bona LCE, Ruoso VK (2014) Vcube: a provably scalable distributed diagnosis algorithm. In: 2014 5th Workshop on latest advances in scalable algorithms for large-scale systems, pp 17\u201322. https:\/\/doi.org\/10.1109\/ScalA.2014.14","DOI":"10.1109\/ScalA.2014.14"},{"key":"1211_CR25","doi-asserted-by":"publisher","unstructured":"Rodrigues LA, Arantes L, Duarte EP (2016) An autonomic majority quorum system. In: 2016 IEEE 30th international conference on advanced information networking and applications (AINA), IEEE, pp 524\u2013531. https:\/\/doi.org\/10.1109\/AINA.2016.73","DOI":"10.1109\/AINA.2016.73"},{"key":"1211_CR26","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1016\/j.jpdc.2018.10.011","volume":"125","author":"JP Araujo","year":"2019","unstructured":"Araujo JP, Arantes L, Duarte EP Jr, Rodrigues LA, Sens P (2019) VCube-PS: a causal broadcast topic-based publish\/subscribe system. J Parallel Distrib Comput 125:18\u201330. https:\/\/doi.org\/10.1016\/j.jpdc.2018.10.011","journal-title":"J Parallel Distrib Comput"},{"issue":"8","key":"1211_CR27","doi-asserted-by":"publisher","first-page":"1415","DOI":"10.1109\/TPDS.2011.284","volume":"23","author":"EP Duarte","year":"2012","unstructured":"Duarte EP, Weber A, Fonseca KVO (2012) Distributed diagnosis of dynamic events in partitionable arbitrary topology networks. IEEE Trans Parallel Distrib 23(8):1415\u20131426. https:\/\/doi.org\/10.1109\/TPDS.2011.284","journal-title":"IEEE Trans Parallel Distrib"},{"issue":"1","key":"1211_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13173-018-0069-z","volume":"24","author":"ET Camargo","year":"2018","unstructured":"Camargo ET, Duarte EP (2018) Running resilient MPI applications on a dynamic group of recommended processes. J Braz Comput Soc 24(1):1\u201316. https:\/\/doi.org\/10.1186\/s13173-018-0069-z","journal-title":"J Braz Comput Soc"},{"issue":"11","key":"1211_CR29","doi-asserted-by":"publisher","first-page":"3131","DOI":"10.1109\/TPDS.2016.2524004","volume":"27","author":"RP Ziwich","year":"2016","unstructured":"Ziwich RP (2016) A nearly optimal comparison-based diagnosis algorithm for systems of arbitrary topology. IEEE Trans Parallel Distrib 27(11):3131\u20133143. https:\/\/doi.org\/10.1109\/TPDS.2016.2524004","journal-title":"IEEE Trans Parallel Distrib"},{"key":"1211_CR30","doi-asserted-by":"publisher","unstructured":"Ziwich RP, Duarte EP, Albini LCP (2005) Distributed integrity checking for systems with replicated data. In: 11th ICPADS\u201905, vol 1, pp 363\u20133691. https:\/\/doi.org\/10.1109\/ICPADS.2005.130","DOI":"10.1109\/ICPADS.2005.130"},{"issue":"4","key":"1211_CR31","doi-asserted-by":"publisher","first-page":"1253","DOI":"10.1109\/TPDS.2023.3242089","volume":"34","author":"J Song","year":"2023","unstructured":"Song J, Lin L, Huang Y, Hsieh SY (2023) Intermittent fault diagnosis of split-star networks and its applications. IEEE Trans Parallel Distrib Syst 34(4):1253\u20131264. https:\/\/doi.org\/10.1109\/TPDS.2023.3242089","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"1211_CR32","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1016\/j.dam.2023.05.029","volume":"339","author":"C Guo","year":"2023","unstructured":"Guo C, Wu C, Xiao Z, Lu J, Liu Z (2023) The intermittent diagnosability for two families of interconnection networks under the PMC model and mm* model. Discret Appl Math 339:89\u2013106. https:\/\/doi.org\/10.1016\/j.dam.2023.05.029","journal-title":"Discret Appl Math"},{"key":"1211_CR33","doi-asserted-by":"publisher","unstructured":"Delporte-Gallet C, Fauconnier H, Guerraoui R, Hadzilacos V, Kouznetsov P, Toueg S (2004) The weakest failure detectors to solve certain fundamental problems in distributed computing, ACM, New York, pp. 338\u2013346 https:\/\/doi.org\/10.1145\/1011767.1011818","DOI":"10.1145\/1011767.1011818"},{"issue":"4","key":"1211_CR34","doi-asserted-by":"publisher","first-page":"685","DOI":"10.1145\/234533.234549","volume":"43","author":"TD Chandra","year":"1996","unstructured":"Chandra TD, Hadzilacos V, Toueg S (1996) The weakest failure detector for solving consensus. J ACM 43(4):685\u2013722. https:\/\/doi.org\/10.1145\/234533.234549","journal-title":"J ACM"},{"issue":"1","key":"1211_CR35","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1109\/12.980014","volume":"51","author":"W Chen","year":"2002","unstructured":"Chen W, Toueg S, Aguilera MK (2002) On the quality of service of failure detectors. IEEE Trans Comput 51(1):13\u201332. https:\/\/doi.org\/10.1109\/12.980014","journal-title":"IEEE Trans Comput"},{"key":"1211_CR36","doi-asserted-by":"publisher","unstructured":"Urban P, Defago X, Schiper A (2001) Neko: a single environment to simulate and prototype distributed algorithms. In: 15th ICOIN, pp 503\u2013511. https:\/\/doi.org\/10.1109\/ICOIN.2001.905471","DOI":"10.1109\/ICOIN.2001.905471"},{"key":"1211_CR37","doi-asserted-by":"publisher","first-page":"777","DOI":"10.1016\/j.ins.2020.08.068","volume":"547","author":"SU Jan","year":"2021","unstructured":"Jan SU, Lee YD, Koo IS (2021) A distributed sensor-fault detection and diagnosis framework using machine learning. Inf Sci 547:777\u2013796. https:\/\/doi.org\/10.1016\/j.ins.2020.08.068","journal-title":"Inf Sci"},{"issue":"5","key":"1211_CR38","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1109\/JCN.2020.000023","volume":"22","author":"KT Bui","year":"2020","unstructured":"Bui KT, Van Vo L, Nguyen CM, Pham TV, Tran HC (2020) A fault detection and diagnosis approach for multi-tier application in cloud computing. J Commun Net 22(5):399\u2013414. https:\/\/doi.org\/10.1109\/JCN.2020.000023","journal-title":"J Commun Net"},{"issue":"7","key":"1211_CR39","doi-asserted-by":"publisher","first-page":"5926","DOI":"10.1109\/JIOT.2020.3032544","volume":"8","author":"W Zhang","year":"2020","unstructured":"Zhang W, Lu Q, Yu Q et al (2020) Blockchain-based federated learning for device failure detection in industrial IoT. IEEE Internet Things J 8(7):5926\u20135937. https:\/\/doi.org\/10.1109\/JIOT.2020.3032544","journal-title":"IEEE Internet Things J"}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-023-01211-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00607-023-01211-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-023-01211-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,14]],"date-time":"2023-10-14T13:03:43Z","timestamp":1697288623000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00607-023-01211-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,18]]},"references-count":39,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["1211"],"URL":"https:\/\/doi.org\/10.1007\/s00607-023-01211-8","relation":{},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"value":"0010-485X","type":"print"},{"value":"1436-5057","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,18]]},"assertion":[{"value":"20 October 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 August 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors hereby ensure that there are no conflicts of interest regarding this manuscript and its publication on Computing. The research\/paper is fully compliant with all ethical standards. Elias P. Duarte Jr. is an Associate Editor of the Computing journal.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}