{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:45:45Z","timestamp":1750308345675,"version":"3.41.0"},"reference-count":55,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2022YFB3105405","2021YFC3300502"],"award-info":[{"award-number":["2022YFB3105405","2021YFC3300502"]}]},{"name":"Provincial Key Research and Development Program of Anhui","award":["202423110050033"],"award-info":[{"award-number":["202423110050033"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Games"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tg.2024.3485726","type":"journal-article","created":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T17:32:02Z","timestamp":1729791122000},"page":"397-407","source":"Crossref","is-referenced-by-count":0,"title":["CuDA2: An Approach for Incorporating Traitor Agents Into Cooperative Multiagent Systems"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-2151-7028","authenticated-orcid":false,"given":"Zhen","family":"Chen","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Liao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Youpeng","family":"Zhao","sequence":"additional","affiliation":[{"name":"Polixir, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zipeng","family":"Dai","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4895-990X","authenticated-orcid":false,"given":"Jian","family":"Zhao","sequence":"additional","affiliation":[{"name":"Polixir, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09996-w"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2023.3310150"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2022.3232390"},{"key":"ref4","first-page":"10784","article-title":"Learning to simulate self-driven particles system with coordinated policy optimization","volume":"34","author":"Peng","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MRS50823.2021.9620590"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2022.3143175"},{"key":"ref7","first-page":"20147","article-title":"Multi-agent dynamic algorithm configuration","volume":"35","author":"Xue","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"article-title":"Dealing with non-stationarity in multi-agent deep reinforcement learning","year":"2019","author":"Papoudakis","key":"ref8"},{"key":"ref9","first-page":"29142","article-title":"Towards understanding cooperative multi-agent Q-learning with value factorization","volume":"34","author":"Wang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref10","first-page":"1989","article-title":"Scaling multi-agent reinforcement learning with selective parameter sharing","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Christianos","year":"2021"},{"key":"ref11","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","author":"Sunehag","year":"2018","journal-title":"=Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst."},{"issue":"178","key":"ref12","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref14","first-page":"24611","article-title":"The surprising effectiveness of ppo in cooperative multi-agent games","volume":"35","author":"Yu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref15","first-page":"2186","article-title":"The StarCraft multi-agent challenge","volume-title":"Proc. 18th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Samvelyan","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00022"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2022.3164470"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i10.26388"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6047"},{"article-title":"Adversarial policies: Attacking deep reinforcement learning","year":"2019","author":"Gleave","key":"ref20"},{"key":"ref21","first-page":"3910","article-title":"Adversarial policy learning in two-player competitive games","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Guo","year":"2021"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09421-1"},{"key":"ref23","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume":"30","author":"Lowe","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref24","article-title":"DoP: Off-policy multi-agent decomposed policy gradients","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2020"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref26","first-page":"5887","article-title":"Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son","year":"2019"},{"article-title":"Qplex: Duplex dueling multi-agent Q-learning","year":"2020","author":"Wang","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"ref29","first-page":"5174","article-title":"On the utility of learning about humans for human-AI coordination","author":"Carroll","year":"2019","journal-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst."},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3574159"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2046684.2046692"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/SPW.2019.00021"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00614"},{"key":"ref34","first-page":"11225","article-title":"Adaptive reward-poisoning attacks against reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2020"},{"article-title":"Adversarial exploitation of policy imitation","year":"2019","author":"Vahid","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2022.3143566"},{"key":"ref37","first-page":"548","article-title":"CopyCAT: Taking control of neural policies with constant attacks","volume-title":"Proc. 19th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Hussenot","year":"2020"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.108965"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/DSC.2018.00126"},{"article-title":"Gradient band-based adversarial training for generalized attack immunity of A3C path finding","year":"2018","author":"Chen","key":"ref40"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5887"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2023.103259"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1512\/iumj.1957.6.56038"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(94)90047-7"},{"key":"ref45","first-page":"463","article-title":"Learning to drive a bicycle using reinforcement learning and shaping","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Randlv","year":"1998"},{"key":"ref46","first-page":"12478","article-title":"Eager: Asking and answering questions for automatic reward shaping in language-guided rl","volume":"35","author":"Carta","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref47","first-page":"25217","article-title":"Noveld: A simple yet effective exploration criterion","volume":"34","author":"Zhang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref48","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ng","year":"1999"},{"key":"ref49","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wiewiora","year":"2003"},{"key":"ref50","first-page":"433","article-title":"Dynamic potential-based reward shaping","volume-title":"Proc. Int. Conf. Auton. Agents Multiagent Syst.","author":"Devlin","year":"2012"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9628"},{"article-title":"Exploration by random network distillation","year":"2018","author":"Burda","key":"ref52"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2021.3134259"},{"key":"ref54","first-page":"565","article-title":"Reward shaping in episodic reinforcement learning","volume-title":"Proc. 16th Int. Conf. Auton. Agents Multiagent Syst.","author":"Grzes","year":"2017"},{"key":"ref55","article-title":"Benchmarking multi-agent deep reinforcement learning algorithms in cooperative tasks","volume-title":"Proc. Neural Inf. Process. Syst. Track Datasets Benchmarks","author":"Papoudakis","year":"2021"}],"container-title":["IEEE Transactions on Games"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7782673\/11038929\/10734173.pdf?arnumber=10734173","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:41:42Z","timestamp":1750268502000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10734173\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":55,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tg.2024.3485726","relation":{},"ISSN":["2475-1502","2475-1510"],"issn-type":[{"type":"print","value":"2475-1502"},{"type":"electronic","value":"2475-1510"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}