{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:15:31Z","timestamp":1767320131035,"version":"3.48.0"},"publisher-location":"Singapore","reference-count":25,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819550111","type":"print"},{"value":"9789819550128","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5012-8_12","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:12:36Z","timestamp":1767319956000},"page":"155-170","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GRACE: A Strategic LLM-Enhanced Graph Reinforcement Learning Framework for\u00a0Adaptive Fault Recovery in\u00a0Microservice Systems"],"prefix":"10.1007","author":[{"given":"Ruibo","family":"Chen","sequence":"first","affiliation":[]},{"given":"Yanjun","family":"Pu","sequence":"additional","affiliation":[]},{"given":"Ji","family":"Xin","sequence":"additional","affiliation":[]},{"given":"Junle","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xingchuang","family":"Liao","sequence":"additional","affiliation":[]},{"given":"Kui","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Wenjun","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"12_CR1","unstructured":"Candea, G., Kawamoto, S., Fujiki, Y., Friedman, G., Fox, A.: Microreboot - A technique for cheap recovery, pp. 31\u201344. USENIX Association (2004)"},{"key":"12_CR2","doi-asserted-by":"publisher","unstructured":"Chen, R., Ren, J., Wang, L., Pu, Y., Yang, K., Wu, W.: Microegrcl: An edge-attention-based graph neural network approach for root cause localization in microservice systems. vol. 13740, pp. 264\u2013272. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20984-0_18","DOI":"10.1007\/978-3-031-20984-0_18"},{"key":"12_CR3","unstructured":"Cheng, S., Huang, A., Garlan, D., Schmerl, B.R., Steenkiste, P.: Rainbow: architecture-based self-adaptation with reusable infrastructure, pp. 276\u2013277. IEEE Computer Society (2004)"},{"key":"12_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/978-3-642-10665-1_5","volume-title":"Cloud Computing","author":"Y Dai","year":"2009","unstructured":"Dai, Y., Xiang, Y., Zhang, G.: Self-healing and hybrid diagnosis in cloud computing. In: Jaatun, M.G., Zhao, G., Rong, C. (eds.) CloudCom 2009. LNCS, vol. 5931, pp. 45\u201356. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-10665-1_5"},{"key":"12_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"560","DOI":"10.1007\/978-3-030-05063-4_42","volume-title":"Algorithms and Architectures for Parallel Processing","author":"Q Du","year":"2018","unstructured":"Du, Q., Xie, T., He, Yu.: Anomaly detection and diagnosis for container-based microservices with performance monitoring. In: Vaidya, J., Li, J. (eds.) ICA3PP 2018. LNCS, vol. 11337, pp. 560\u2013572. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-05063-4_42"},{"issue":"2","key":"12_CR6","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1109\/MS.2006.61","volume":"23","author":"J Floch","year":"2006","unstructured":"Floch, J., Hallsteinsen, S.O., Stav, E., Eliassen, F., Lund, K., Gj\u00f8rven, E.: Using architecture models for runtime adaptability. IEEE Softw. 23(2), 62\u201370 (2006)","journal-title":"IEEE Softw."},{"issue":"2","key":"12_CR7","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1002\/spe.2427","volume":"47","author":"M Fu","year":"2017","unstructured":"Fu, M., Zhu, L., Sun, D., Liu, A., Bass, L., Lu, Q.: Runtime recovery actions selection for sporadic operations on public cloud. Softw. Pract. Exp. 47(2), 223\u2013248 (2017)","journal-title":"Softw. Pract. Exp."},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Joshi, K.R., Sanders, W.H., Hiltunen, M.A., Schlichting, R.D.: Automatic model-driven recovery in distributed systems, pp. 25\u201338. IEEE Computer Society (2005)","DOI":"10.1109\/RELDIS.2005.11"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Joshi, K.R., Sanders, W.H., Hiltunen, M.A., Schlichting, R.D.: Automatic recovery using bounded partially observable markov decision processes, pp. 445\u2013456. IEEE Computer Society (2006)","DOI":"10.1109\/DSN.2006.16"},{"issue":"1","key":"12_CR10","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/TSE.1987.232562","volume":"13","author":"R Koo","year":"1987","unstructured":"Koo, R., Toueg, S.: Checkpointing and rollback-recovery for distributed systems. IEEE Trans. Software Eng. 13(1), 23\u201331 (1987)","journal-title":"IEEE Trans. Software Eng."},{"key":"12_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"654","DOI":"10.1007\/978-3-642-37401-2_64","volume-title":"Web Technologies and Applications","author":"G Li","year":"2013","unstructured":"Li, G., Liao, L., Song, D., Wang, J., Sun, F., Liang, G.: A self-healing framework for qos-aware web service composition via case-based reasoning. In: Ishikawa, Y., Li, J., Wang, W., Zhang, R., Zhang, W. (eds.) APWeb 2013. LNCS, vol. 7808, pp. 654\u2013661. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-37401-2_64"},{"key":"12_CR12","doi-asserted-by":"publisher","unstructured":"Manessi, F., Rozza, A., Manzo, M.: Dynamic graph convolutional networks. Pattern Recognit. 97 (2020). https:\/\/doi.org\/10.1016\/J.PATCOG.2019.107000","DOI":"10.1016\/J.PATCOG.2019.107000"},{"key":"12_CR13","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1007\/978-3-540-85502-6_23","volume-title":"Advances in Case-Based Reasoning","author":"D McSherry","year":"2008","unstructured":"McSherry, D., Hassan, S., Bustard, D.: Conversational case-based reasoning in self-healing and recovery. In: Althoff, K.-D., Bergmann, R., Minor, M., Hanft, A. (eds.) ECCBR 2008. LNCS (LNAI), vol. 5239, pp. 340\u2013354. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-85502-6_23"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Mdhaffar, A., Halima, R.B., Jmaiel, M., Freisleben, B.: Cep4cloud: complex event processing for self-healing clouds, pp. 62\u201367. IEEE Computer Society (2014)","DOI":"10.1109\/WETICE.2014.56"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"Mfula, H., Nurminen, J.K.: Self-healing cloud services in private multi-clouds, pp. 165\u2013170. IEEE (2018)","DOI":"10.1109\/HPCS.2018.00041"},{"issue":"7540","key":"12_CR16","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/NATURE14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nat. 518(7540), 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/NATURE14236","journal-title":"Nat."},{"key":"12_CR17","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1007\/11805816_36","volume-title":"Advances in Case-Based Reasoning","author":"S Montani","year":"2006","unstructured":"Montani, S., Anglano, C.: Case-based reasoning for autonomous service failure diagnosis and remediation in software systems. In: Roth-Berghofer, T.R., G\u00f6ker, M.H., G\u00fcvenir, H.A. (eds.) ECCBR 2006. LNCS (LNAI), vol. 4106, pp. 489\u2013503. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11805816_36"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Nasir, S., Taimoor, M., Gul, H., Ali, A., Khan, M.J.: Optimization of decision making in CBR based self-healing systems, pp. 68\u201372. IEEE Computer Society (2012)","DOI":"10.1109\/FIT.2012.21"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Ossenbuhl, S., Steinberger, J., Baier, H.: Towards automated incident handling: how to select an appropriate response against a network-based attack? pp. 51\u201367. IEEE Computer Society (2015)","DOI":"10.1109\/IMF.2015.13"},{"key":"12_CR20","first-page":"137","volume":"1","author":"C Pahl","year":"2016","unstructured":"Pahl, C., Jamshidi, P.: Microservices: a systematic mapping study. Closer 1, 137\u2013146 (2016)","journal-title":"Closer"},{"issue":"2","key":"12_CR21","doi-asserted-by":"publisher","first-page":"957","DOI":"10.1109\/TCC.2020.2968522","volume":"10","author":"BK Ray","year":"2022","unstructured":"Ray, B.K., Saha, A., Khatua, S., Roy, S.: Proactive fault-tolerance technique to enhance reliability of cloud service in cloud federation environment. IEEE Trans. Cloud Comput. 10(2), 957\u2013971 (2022)","journal-title":"IEEE Trans. Cloud Comput."},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Samir, A., Pahl, C.: Self-adaptive healing for containerized cluster architectures with hidden markov models, pp. 68\u201373. IEEE (2019)","DOI":"10.1109\/FMEC.2019.8795322"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Schwefel, H., Antonios, I.: Performability models for multi-server systems with high-variance repair durations, pp. 770\u2013779. IEEE Computer Society (2007)","DOI":"10.1109\/DSN.2007.73"},{"issue":"23","key":"12_CR24","volume":"6","author":"N Tabassum","year":"2019","unstructured":"Tabassum, N., Khan, M.S., Abbas, S., Alyas, T., Athar, A., Khan, M.A.: Intelligent reliability management in hyper-convergence cloud infrastructure using fuzzy inference system. EAI Endorsed Trans. Scalable Inf. Syst. 6(23), e1 (2019)","journal-title":"EAI Endorsed Trans. Scalable Inf. Syst."},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Wu, L., Tordsson, J., Acker, A., Kao, O.: Microras: automatic recovery in the absence of historical failure data for microservice systems, pp. 227\u2013236. IEEE (2020)","DOI":"10.1109\/UCC48980.2020.00041"}],"container-title":["Lecture Notes in Computer Science","Service-Oriented Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5012-8_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:12:37Z","timestamp":1767319957000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5012-8_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819550111","9789819550128"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5012-8_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICSOC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Service-Oriented Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenzhen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 December 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icsoc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icsoc2025.hit.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}