{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T10:51:54Z","timestamp":1781175114436,"version":"3.54.1"},"reference-count":225,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["22511105500"],"award-info":[{"award-number":["22511105500"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62032019"],"award-info":[{"award-number":["62032019"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Central Universities of China","award":["2023-4-YB05"],"award-info":[{"award-number":["2023-4-YB05"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Intell. Veh."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tiv.2024.3408257","type":"journal-article","created":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T13:46:39Z","timestamp":1717422399000},"page":"8190-8211","source":"Crossref","is-referenced-by-count":26,"title":["Multiagent Reinforcement Learning: Methods, Trustworthiness, Applications in Intelligent Vehicles, and Challenges"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2649-8666","authenticated-orcid":false,"given":"Ziyuan","family":"Zhou","sequence":"first","affiliation":[{"name":"Department of Computer Science, Tongji University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7523-4827","authenticated-orcid":false,"given":"Guanjun","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Tongji University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6064-1908","authenticated-orcid":false,"given":"Ying","family":"Tang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Rowan University, Glassboro, NJ, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","article-title":"Actor-critic algorithms","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Konda","year":"1999"},{"key":"ref3","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Mnih","year":"2016"},{"key":"ref4","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","author":"Schulman","year":"2015"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3618-5_2"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref9","article-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref10","first-page":"1053","article-title":"Reinforcement learning of local shape in the game of go","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Silver","year":"2007"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10160497"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2022.3160697"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3231299"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3227919"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s43684-022-00045-z"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2022.3167103"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3318070"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3323767"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.2300166"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3253281"},{"key":"ref22","article-title":"Learning to summarize from human feedback","author":"Stiennon","year":"2020"},{"key":"ref23","article-title":"Multi-agent reinforcement learning: A report on challenges and approaches","author":"Kapoor","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10299-x"},{"key":"ref25","article-title":"Trustworthy reinforcement learning against intrinsic vulnerabilities: Robustness, safety, and generalizability","author":"Xu","year":"2022"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09421-1"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09996-w"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3390\/app11114948"},{"key":"ref30","article-title":"Model-based multi-agent reinforcement learning: Recent progress and prospects","author":"Wang","year":"2022"},{"key":"ref31","article-title":"A survey on large-population systems and scalable multi-agent reinforcement learning","author":"Cui","year":"2022"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.1900661"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-024-09644-x"},{"key":"ref34","article-title":"Causal multi-agent reinforcement learning: Review and open problems","author":"Grimbly","year":"2021"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2024.3457538"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2021.3111139"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9091363"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.14174"},{"key":"ref40","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.39.10.1953"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"ref43","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Sunehag","year":"2018"},{"key":"ref44","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref45","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Son","year":"2019"},{"key":"ref46","first-page":"7611","article-title":"Maven: Multi-agent variational exploration","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Mahajan","year":"2019"},{"key":"ref47","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2021"},{"key":"ref48","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Lowe","year":"2017"},{"key":"ref49","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Iqbal","year":"2019"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6211"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6214"},{"key":"ref52","first-page":"12208","article-title":"FACMAC: Factored multi-agent centralised policy gradients","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Peng","year":"2021"},{"key":"ref53","first-page":"5571","article-title":"Mean field multi-agent reinforcement learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Yang","year":"2018"},{"key":"ref54","first-page":"411","article-title":"Multi-type mean field reinforcement learning","volume-title":"Proc. 19th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Subramanian","year":"2020"},{"key":"ref55","first-page":"537","article-title":"Partially observable mean field reinforcement learning","volume-title":"Proc. 20th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Subramanian","year":"2021"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/70"},{"key":"ref57","first-page":"4596","article-title":"Randomized entity-wise factorization for multi-agent reinforcement learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Iqbal","year":"2021"},{"key":"ref58","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Foerster","year":"2016"},{"key":"ref59","article-title":"Multi-agent deep reinforcement learning with extremely noisy observations","author":"Kilinc","year":"2018"},{"key":"ref60","article-title":"Event-triggered multi-agent reinforcement learning with communication under limited-bandwidth constraint","author":"Hu","year":"2020"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-023-09096-6"},{"key":"ref62","first-page":"2244","article-title":"Learning multiagent communication with backpropagation","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Sukhbaatar","year":"2016"},{"key":"ref63","article-title":"Multiagent bidirectionally-coordinated nets: Emergence of human-level coordination in learning to play starcraft combat games","author":"Peng","year":"2017"},{"key":"ref64","article-title":"Graph convolutional reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jiang","year":"2020"},{"key":"ref65","first-page":"9908","article-title":"Learning efficient multi-agent communication: An information bottleneck approach","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Wang","year":"2020"},{"key":"ref66","first-page":"456","article-title":"Learning correlated communication topology in multi-agent reinforcement learning","volume-title":"Proc. 20th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Du","year":"2021"},{"key":"ref67","first-page":"964","article-title":"Multi-agent graph-attention communication and teaming","volume-title":"Proc. 20th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Niu","year":"2021"},{"issue":"42","key":"ref68","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garca","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"ref69","article-title":"Multi-agent constrained policy optimisation","author":"Gu","year":"2021"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17062"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86486-6_10"},{"key":"ref72","article-title":"Safe multi-agent reinforcement learning via shielding","author":"ElSayed-Aly","year":"2021"},{"key":"ref73","article-title":"Safe deep reinforcement learning for multi-agent systems with continuous action spaces","author":"Sheebaelhamd","year":"2021"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26240"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/SPW50608.2020.00027"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00022"},{"key":"ref77","article-title":"Sparse adversarial attack in multi-agent reinforcement learning","author":"Hu","year":"2022"},{"key":"ref78","article-title":"Evaluating robustness of cooperative marl: A model-based approach","author":"Pham","year":"2022"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2025.107747"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA230632"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2021.3114024"},{"key":"ref82","article-title":"MARNET: Backdoor attacks against value-decomposition multi-agent reinforcement learning","author":"Chen","year":"2022"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/509"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3278715"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127191"},{"key":"ref86","article-title":"What is the solution for state adversarial multi-agent reinforcement learning?","author":"Han","year":"2022"},{"key":"ref87","article-title":"Robust multi-agent reinforcement learning with state uncertainties","author":"He","year":"2023"},{"key":"ref88","article-title":"Robust multi-agent reinforcement learning against adversaries on observation","author":"Wang","year":"2023"},{"key":"ref89","article-title":"Learning with opponent-learning awareness","author":"Foerster","year":"2017"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014213"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812321"},{"key":"ref92","article-title":"Robust multi-agent reinforcement learning driven by correlated equilibrium","author":"Hu","year":"2021"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i13.17348"},{"key":"ref94","first-page":"7193","article-title":"Online robust reinforcement learning with model uncertainty","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Wang","year":"2021"},{"key":"ref95","first-page":"10571","article-title":"Robust multi-agent reinforcement learning with model uncertainty","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Zhang","year":"2020"},{"key":"ref96","article-title":"Backdoor detection in reinforcement learning","author":"Guo","year":"2022"},{"key":"ref97","first-page":"3910","article-title":"Adversarial policy learning in two-player competitive games","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Guo","year":"2021"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i10.26388"},{"key":"ref99","article-title":"Certifiably robust policy learning against adversarial multi-agent communication","volume-title":"Proc. Eleventh Int. Conf. Learn. Representations","author":"Sun","year":"2023"},{"key":"ref100","article-title":"Explaining and harnessing adversarial examples","author":"Goodfellow","year":"2014"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/EuroSP.2016.36"},{"key":"ref102","article-title":"Adversarial attacks on neural network policies","author":"Huang","year":"2017"},{"key":"ref103","article-title":"Asynchronous distributed bilevel optimization","volume-title":"Proc. Eleventh Int. Conf. Learn. Representations","author":"Jiao","year":"2023"},{"key":"ref104","first-page":"7987","article-title":"Distributed distributionally robust optimization with non-convex objectives","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Jiao","year":"2022"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1201\/9781351251389-8"},{"key":"ref106","first-page":"21024","article-title":"Robust deep reinforcement learning against adversarial perturbations on state observations","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Zhang","year":"2020"},{"key":"ref107","article-title":"Robust reinforcement learning on state observations with learned optimal adversary","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang","year":"2021"},{"key":"ref108","article-title":"Who is the strongest enemy? Towards optimal and efficient evasion attacks in deep RL","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Sun","year":"2022"},{"key":"ref109","article-title":"Model based multi-agent reinforcement learning with tensor decompositions","author":"Vaart","year":"2021"},{"key":"ref110","first-page":"580","article-title":"Multi-task actor-critic with knowledge transfer via a shared critic","volume-title":"Proc. 13th Asian Conf. Mach. Learn.","author":"Zhang","year":"2021"},{"key":"ref111","first-page":"9733","article-title":"OPtions as REsponses: Grounding behavioural hierarchies in multi-agent reinforcement learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Vezhnevets","year":"2020"},{"key":"ref112","first-page":"8128","article-title":"A structured prediction approach for generalization in cooperative multi-agent reinforcement learning","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Carion","year":"2019"},{"key":"ref113","article-title":"Discovering generalizable multi-agent coordination skills from multi-task offline data","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Zhang","year":"2023"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-021-00591-8"},{"key":"ref115","first-page":"2681","article-title":"Deep decentralized multi-task multi-agent reinforcement learning under partial observability","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Omidshafiei","year":"2017"},{"key":"ref116","first-page":"1002","article-title":"A decentralized policy gradient approach to multi-task reinforcement learning","volume-title":"Proc. 37th Conf. Uncertainty Artif. Intell.","author":"Zeng","year":"2021"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981319"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3229213"},{"key":"ref119","article-title":"Real-to-sim-to-real: Learning models for homogeneous multi-agent systems","author":"Gurevich","year":"2022"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-022-1383-7"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10161034"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijinfomgt.2021.102433"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3345661"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton.2013.6736549"},{"key":"ref125","first-page":"11323","article-title":"Privacy-preserving Q-learning with functional noise in continuous spaces","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Wang","year":"2019"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390265"},{"key":"ref127","first-page":"368","article-title":"How you act tells a lot: Privacy-leaking attack on deep reinforcement learning","volume-title":"Proc. 18th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Pan","year":"2019"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1145\/3508028"},{"key":"ref129","article-title":"Learning robust rewards with adversarial inverse reinforcement learning","author":"Fu","year":"2017"},{"key":"ref130","article-title":"Deceptive reinforcement learning for privacy-preserving planning","author":"Liu","year":"2021"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2022.3233436"},{"key":"ref132","article-title":"PP-MARL: Efficient privacy-preserving marl for cooperative intelligence in communication","author":"Yuan","year":"2022"},{"key":"ref133","first-page":"1617","article-title":"Fairness in reinforcement learning","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Jabbari","year":"2017"},{"key":"ref134","article-title":"Fairness in reinforcement learning","author":"Weng","year":"2019"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM42981.2021.9488823"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2022.108242"},{"key":"ref137","article-title":"Reinforcement learning with stepwise fairness constraints","author":"Deng","year":"2022"},{"key":"ref138","first-page":"13854","article-title":"Learning fairness in multi-agent systems","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Jiang","year":"2019"},{"key":"ref139","article-title":"Fairness-aware model-based multi-agent reinforcement learning for traffic signal control","author":"Huang","year":"2023"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/DASC55683.2022.9925786"},{"key":"ref141","first-page":"2499","article-title":"Verifiable reinforcement learning via policy extraction","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Bastani","year":"2018"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1007\/s40747-020-00175-y"},{"key":"ref143","article-title":"Optimization methods for interpretable differentiable decision trees in reinforcement learning","author":"Silva","year":"2020"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1145\/3520304.3533959"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103455"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1561\/2200000086"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3046646"},{"key":"ref148","first-page":"3658","article-title":"Interpretable off-policy evaluation in reinforcement learning by highlighting influential transitions","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Gottesman","year":"2020"},{"key":"ref149","first-page":"3703","article-title":"Batch policy learning under constraints","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Le","year":"2019"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593649"},{"key":"ref151","article-title":"Learning finite state representations of recurrent policy networks","author":"Koul","year":"2018"},{"key":"ref152","first-page":"2388","article-title":"Re-understanding finite-state representations of recurrent policy networks","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Danesh","year":"2021"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v30i1.6671"},{"key":"ref154","first-page":"889","article-title":"Caps: Comprehensible abstract policy summaries for explaining reinforcement learning agents","volume-title":"Proc. 21st Int. Conf. Auton. Agents Multiagent Syst.","author":"McCalmon","year":"2022"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-35288-2_6"},{"key":"ref156","first-page":"1","article-title":"Distilling deep reinforcement learning policies in soft decision trees","volume-title":"Proc. IJCAI Workshop Explainable Artif. Intell.","author":"Coppens","year":"2019"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33012514"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-10928-8_25"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2021.3129959"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-82017-6_12"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.014"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207564"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3540467"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-26412-2_16"},{"key":"ref165","first-page":"1828","article-title":"Concept learning for interpretable multi-agent reinforcement learning","volume-title":"Proc. 6th Conf. Robot Learn.","author":"Zabounidis","year":"2023"},{"key":"ref166","article-title":"An overview of privacy in machine learning","author":"Cristofaro","year":"2020"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i7.20772"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1145\/3494672"},{"key":"ref169","first-page":"12967","article-title":"Learning fair policies in decentralized cooperative multi-agent reinforcement learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Zimmer","year":"2021"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-57321-8_5"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1145\/3527448"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1145\/3616864"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2020.2997896"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3140511"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/23"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.03.087"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-023-36606-2"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2024.3352730"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2893683"},{"key":"ref180","first-page":"264","article-title":"Smarts: An open-source scalable multi-agent RL training school for autonomous driving","volume-title":"Proc. Conf. Robot Learn.","author":"Zhou","year":"2021"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-47358-7_7"},{"key":"ref182","doi-asserted-by":"publisher","DOI":"10.1007\/s43684-022-00023-5"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/ICTAI52525.2021.00016"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3089834"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3220798"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2023.104489"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3274536"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2022.3197666"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1117\/12.2605920"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9304323"},{"key":"ref191","article-title":"Bi-level off-policy reinforcement learning for volt\/var control involving continuous and discrete devices","author":"Liu","year":"2021"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2023.3323554"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2021.3113085"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2024.3369592"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2019.2951769"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.1016\/j.aap.2023.107395"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2024.3372625"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3142822"},{"key":"ref199","article-title":"Online deep reinforcement learning for autonomous UAV navigation and exploration of outdoor environments","author":"Maciel-Pearson","year":"2019"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974648"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2020.3027695"},{"key":"ref202","doi-asserted-by":"publisher","DOI":"10.3390\/drones7070476"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1017\/aer.2021.112"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1007\/s42401-021-00105-x"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2023.3312276"},{"key":"ref206","article-title":"Cooperative and distributed reinforcement learning of drones for field coverage","author":"Pham","year":"2018"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.2514\/1.G004106"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/AERO47225.2020.9172262"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2021.3088718"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-2635-8_71"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.3390\/en15197426"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.3390\/drones8010027"},{"key":"ref213","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Yu","year":"2022"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3307134"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_29"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1177\/17568293231158443"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-010-9497-5"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.3390\/drones7070445"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67361-5_40"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1002\/dac.5334"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1109\/tiv.2023.3324687"},{"key":"ref222","article-title":"An overview of multi-agent reinforcement learning from game theoretical perspective","author":"Yang","year":"2021"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2023.3243558"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3341698"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/tiv.2023.3326565"}],"container-title":["IEEE Transactions on Intelligent Vehicles"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7274857\/11077821\/10546304.pdf?arnumber=10546304","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T17:45:47Z","timestamp":1752255947000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10546304\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":225,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tiv.2024.3408257","relation":{},"ISSN":["2379-8904","2379-8858"],"issn-type":[{"value":"2379-8904","type":"electronic"},{"value":"2379-8858","type":"print"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}