{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T21:05:31Z","timestamp":1778015131469,"version":"3.51.4"},"reference-count":60,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62133002"],"award-info":[{"award-number":["62133002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Sci. Eng."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/tase.2025.3592721","type":"journal-article","created":{"date-parts":[[2025,7,25]],"date-time":"2025-07-25T17:59:16Z","timestamp":1753466356000},"page":"19007-19024","source":"Crossref","is-referenced-by-count":3,"title":["Toward Fault Tolerance in Multi-Agent Reinforcement Learning"],"prefix":"10.1109","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1021-8478","authenticated-orcid":false,"given":"Yuchen","family":"Shi","sequence":"first","affiliation":[{"name":"Department of Automation, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4815-2778","authenticated-orcid":false,"given":"Huaxin","family":"Pei","sequence":"additional","affiliation":[{"name":"Qiyuan Lab, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Feng","sequence":"additional","affiliation":[{"name":"Qiyuan Lab, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5526-866X","authenticated-orcid":false,"given":"Yi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Automation, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5032-6322","authenticated-orcid":false,"given":"Danya","family":"Yao","sequence":"additional","affiliation":[{"name":"Department of Automation, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2023.3336076"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/VTC2024-Spring62846.2024.10683524"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2018.2792327"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.2020.3013599"},{"key":"ref6","first-page":"1485","article-title":"Heterogeneous multi-robot reinforcement learning","volume-title":"Proc. Int. Conf. Auto. Agents Multiagent Syst.","author":"Bettini"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2025.3558282"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICSTCC.2019.8885721"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-020-09442-1"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2019.2945004"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2024.3479294"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1177\/01423312221142138"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/SAFEPROCESS52771.2021.9693666"},{"key":"ref14","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"12","author":"Sutton"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2024.3369592"},{"key":"ref16","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref17","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative, multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref18","article-title":"Prioritized experience replay","volume-title":"Proc. 4rd Int. Conf. Learn. Represent.","author":"Schaul"},{"key":"ref19","first-page":"4295","article-title":"QMix: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rashid"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3151607"},{"key":"ref22","first-page":"19490","article-title":"Race: Improve multi-agent reinforcement learning with representation asymmetry and collaborative evolution","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2024.3398712"},{"key":"ref24","article-title":"R-MADDPG for partially observable environments and limited communication","author":"Wang","year":"2020","journal-title":"arXiv:2002.06684"},{"key":"ref25","article-title":"Multi-agent deep reinforcement learning with extremely noisy observations","author":"Kilinc","year":"2018","journal-title":"arXiv:1812.00922"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/336595.337570"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2021.3070140"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3230951"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2022.3159088"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2017.2679066"},{"key":"ref31","article-title":"Robust multi-agent reinforcement learning with state uncertainty","author":"He","year":"2023","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014213"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812321"},{"key":"ref34","first-page":"10571","article-title":"Robust multi-agent reinforcement learning with model uncertainty","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Zhang"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3302131"},{"key":"ref36","first-page":"44909","article-title":"Sample-efficient robust multi-agent reinforcement learning in the face of environmental uncertainty","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Shi"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i13.17348"},{"key":"ref38","first-page":"31771","article-title":"Byzantine robust cooperative multi-agent reinforcement learning as a Bayesian game","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Li"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6214"},{"key":"ref40","first-page":"66556","article-title":"Multi-agent meta-reinforcement learning: Sharper convergence rates with task similarity","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Mao"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2025.3540758"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3126456"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.neuro.26.041002.131047"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref45","first-page":"2204","article-title":"Recurrent models of visual attention","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Mnih"},{"key":"ref46","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Dosovitskiy"},{"key":"ref47","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Iqbal"},{"key":"ref48","first-page":"27840","article-title":"Attention-based recurrence for multi-agent reinforcement learning under stochastic partial observability","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Phan"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2025.3563725"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3503092"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.211"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3251193"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.89"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref56","first-page":"3053","article-title":"RLlib: Abstractions for distributed reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liang"},{"key":"ref57","article-title":"RLlib: Abstractions for distributed reinforcement learning","author":"Liang","year":"2017","journal-title":"arXiv:1712.09381"},{"key":"ref58","article-title":"Rlpyt: A research code base for deep reinforcement learning in PyTorch","author":"Stooke","year":"2019","journal-title":"arXiv:1909.01500"},{"key":"ref59","volume-title":"Experiment Tracking With Weights and Biases","author":"Biewald","year":"2020"},{"key":"ref60","volume-title":"An Environment for Autonomous Driving Decision-making","author":"Leurent","year":"2018"}],"container-title":["IEEE Transactions on Automation Science and Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8856\/10839176\/11096942.pdf?arnumber=11096942","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T18:17:44Z","timestamp":1754072264000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11096942\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":60,"URL":"https:\/\/doi.org\/10.1109\/tase.2025.3592721","relation":{},"ISSN":["1545-5955","1558-3783"],"issn-type":[{"value":"1545-5955","type":"print"},{"value":"1558-3783","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}