{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T16:48:46Z","timestamp":1785602926820,"version":"3.56.0"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"publisher","award":["62172299"],"award-info":[{"award-number":["62172299"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"publisher","award":["62032019"],"award-info":[{"award-number":["62032019"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["22511105500"],"award-info":[{"award-number":["22511105500"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2023-4-YB-05"],"award-info":[{"award-number":["2023-4-YB-05"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Space Optoelectronic Measurement and Perception Lab., Beijing Institute of Control Engineering","award":["LabSOMP-2023-03"],"award-info":[{"award-number":["LabSOMP-2023-03"]}]},{"name":"International Exchange Program for Graduate Students, Tongji University","award":["2023020041"],"award-info":[{"award-number":["2023020041"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Syst. Man Cybern, Syst."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tsmc.2024.3454118","type":"journal-article","created":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T17:28:08Z","timestamp":1726766888000},"page":"7633-7646","source":"Crossref","is-referenced-by-count":14,"title":["Adversarial Attacks on Multiagent Deep Reinforcement Learning Models in Continuous Action Space"],"prefix":"10.1109","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2649-8666","authenticated-orcid":false,"given":"Ziyuan","family":"Zhou","sequence":"first","affiliation":[{"name":"Department of Computer Science, Tongji University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7523-4827","authenticated-orcid":false,"given":"Guanjun","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Tongji University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1626-8745","authenticated-orcid":false,"given":"Weiran","family":"Guo","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Tongji University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5408-8752","authenticated-orcid":false,"given":"MengChu","family":"Zhou","sequence":"additional","affiliation":[{"name":"Macao Institute of Systems Engineering and Collaborative Laboratory for Intelligent Science and Systems, Macau University of Science and Technology, Macau, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2023.123552"},{"issue":"23","key":"ref2","doi-asserted-by":"crossref","DOI":"10.3390\/app122312377","article-title":"A review of deep reinforcement learning approaches for smart manufacturing in industry 4.0 and 5.0 framework","volume":"12","author":"del Real Torres","year":"2022","journal-title":"Appl. Sci."},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCE.2023.3339468"},{"key":"ref4","article-title":"Meta-learning with elastic prototypical network for fault transfer diagnosis of bearings under unstable speeds","volume":"245","author":"Luo","year":"2024","journal-title":"Rel. Eng. Sys. Saf."},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2023.3312282"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1016\/j.ymssp.2023.110936","article-title":"Bayesian variational transformer: A generalizable model for rotating machinery fault diagnosis","volume":"207","author":"Xiao","year":"2024","journal-title":"Mech. Syst. Signal Process."},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3229213"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2023.3307134"},{"key":"ref9","article-title":"Safe and robust multi-agent reinforcement learning for connected autonomous vehicles under state perturbations","author":"Zhang","year":"2023","journal-title":"arXiv:2309.11057"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2023.3343951"},{"key":"ref11","first-page":"12208","article-title":"FACMAC: Factored multi-agent Centralised policy gradients","volume":"34","author":"Peng","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","article-title":"Less is more: Robust robot learning via partially observable multi-agent reinforcement learning","author":"Zhao","year":"2023","journal-title":"arXiv:2309.1479"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2023.3241337"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2892377"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3227919"},{"key":"ref16","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Sunehag"},{"issue":"1","key":"ref17","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref18","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3139138"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00022"},{"key":"ref21","first-page":"1","article-title":"Robust multi-agent reinforcement learning with state uncertainty","volume-title":"Proc. Trans. Mach. Learn. Res.","author":"He"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/SPW50608.2020.00027"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2023.3278715"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2023.127191","article-title":"Enhancing the robustness of QMIX against state-adversarial attacks","volume":"572","author":"Guo","year":"2024","journal-title":"Neurocomputing"},{"key":"ref25","article-title":"Adversarial attacks on neural network policies","author":"Huang","year":"2017","journal-title":"arXiv:1702.02284"},{"key":"ref26","first-page":"21024","article-title":"Robust deep reinforcement learning against adversarial perturbations on state observations","volume":"33","author":"Zhang","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref27","first-page":"1","article-title":"Robust reinforcement learning on state observations with learned optimal adversary","volume-title":"Proc. 9th Int. Conf. Learn. Represent.","author":"Zhang"},{"key":"ref28","first-page":"1","article-title":"Who is the strongest enemy? Towards optimal and efficient evasion attacks in deep RL","volume-title":"Proc. 10th Int. Conf. Learn. Represent.","author":"Sun"},{"key":"ref29","first-page":"1","article-title":"Illusory attacks: Information-theoretic detectability matters in adversarial attacks","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Franzmeyer"},{"key":"ref30","first-page":"1","article-title":"Rethinking adversarial policies: A generalized attack formulation and provable defense in RL","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Liu"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/jas.2024.124818"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.3233\/faia230632"},{"key":"ref33","first-page":"1","article-title":"What is the solution for state-adversarial multi-agent reinforcement learning?","volume-title":"Proc. Trans. Mach. Learn. Res.","author":"Han"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1287\/moor.27.4.819.297"},{"key":"ref35","first-page":"5887","article-title":"QTRAN: Learning to Factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","volume":"97","author":"Son"},{"key":"ref36","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. 4th Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref37","article-title":"Attacking cooperative multi-agent reinforcement learning by adversarial minority influence","author":"Li","year":"2023","journal-title":"arXiv:2302.03322"},{"key":"ref38","article-title":"Explaining and harnessing adversarial examples","author":"Goodfellow","year":"2014","journal-title":"arXiv:1412.6572"},{"key":"ref39","first-page":"7987","article-title":"Distributed Distributionally robust optimization with non-convex objectives","volume":"35","author":"Jiao","year":"2022","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref40","first-page":"638","article-title":"Towards transferable targeted attack","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit.","author":"Li"},{"key":"ref41","first-page":"1","article-title":"Towards deep learning models resistant to adversarial attacks","volume-title":"Proc. 6th Int. Conf. Learn. Represent.","author":"Madry"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1023\/a:1008202821328"},{"key":"ref43","article-title":"Differential evolution: A review of more than two decades of research","volume":"90","author":"Pant","year":"2020","journal-title":"Eng. Appl. Artif. Intell."},{"key":"ref44","doi-asserted-by":"crossref","first-page":"546","DOI":"10.1016\/j.swevo.2018.06.010","article-title":"Differential evolution: A survey of theoretical analyses","volume":"44","author":"Opara","year":"2019","journal-title":"Swarm Evol. Comput."},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2019.2890858"},{"key":"ref46","volume-title":"On-Line q-learning using connectionist systems","author":"Rummery","year":"1994"},{"key":"ref47","first-page":"681","article-title":"Bayesian learning via stochastic gradient langevin dynamics","volume-title":"Proc. 28th Int. Conf. Mach. Learn.","author":"Welling"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/iros.2012.6386109"},{"key":"ref49","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref50","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume":"35","author":"Yu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1023\/a:1007678930559"},{"key":"ref53","first-page":"1","volume-title":"Convergence of Q-Learning: A Simple Proof","author":"Melo","year":"2001"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2009.4927542"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/tiv.2023.3339668"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2022.105548"}],"container-title":["IEEE Transactions on Systems, Man, and Cybernetics: Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6221021\/10758326\/10684240.pdf?arnumber=10684240","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T23:54:55Z","timestamp":1732665295000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10684240\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":56,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tsmc.2024.3454118","relation":{},"ISSN":["2168-2216","2168-2232"],"issn-type":[{"value":"2168-2216","type":"print"},{"value":"2168-2232","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}