{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T21:54:26Z","timestamp":1757541266407,"version":"3.37.3"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,10,11]],"date-time":"2020-10-11T00:00:00Z","timestamp":1602374400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,10,11]],"date-time":"2020-10-11T00:00:00Z","timestamp":1602374400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,10,11]],"date-time":"2020-10-11T00:00:00Z","timestamp":1602374400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,10,11]]},"DOI":"10.1109\/smc42975.2020.9283033","type":"proceedings-article","created":{"date-parts":[[2020,12,14]],"date-time":"2020-12-14T21:44:48Z","timestamp":1607982288000},"page":"3523-3530","source":"Crossref","is-referenced-by-count":3,"title":["Friend-or-Foe Deep Deterministic Policy Gradient"],"prefix":"10.1109","author":[{"given":"Hao","family":"Jiang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dianxi","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chao","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yajie","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gongju","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yongjun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"author":"Zhang","key":"ref1","article-title":"Towards vision-based deep reinforcement learning for robotic motion control"},{"key":"ref2","first-page":"2863","article-title":"Action-conditional video prediction using deep networks in atari games","author":"Oh","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","article-title":"Multiagent bidirectionally-coordinated nets for learning to play starcraft combat games","volume":"2","author":"Peng","year":"2017"},{"article-title":"Flow: Architecture and benchmarking for reinforcement learning in traffic control","year":"2017","author":"Wu","key":"ref5"},{"key":"ref6","first-page":"464","article-title":"Multi-agent reinforcement learning in sequential social dilemmas","volume-title":"Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems","author":"Leibo"},{"key":"ref7","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"Lowe","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014213"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref11"},{"article-title":"Addressing function approximation error in actor-critic methods","year":"2018","author":"Fujimoto","key":"ref12"},{"key":"ref13","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","author":"Foerster","year":"2016","journal-title":"Advances in Neural Information Processing Systems"},{"author":"He","key":"ref14","article-title":"Opponent modeling in deep reinforcement learning"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71682-4_5"},{"article-title":"Multiagent soft q-learning","volume-title":"2018 AAAI Spring Symposium Series","author":"Wei","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/37"},{"key":"ref18","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70","author":"Haarnoja"},{"key":"ref19"},{"key":"ref20","first-page":"322","article-title":"Friend-or-foe q-learning in general-sum games","volume":"1","author":"Littman","year":"2001","journal-title":"ICML"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2932203"},{"key":"ref22","first-page":"4190","article-title":"A unified game-theoretic approach to multiagent reinforcement learning","author":"Lanctot","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Adversarial policy gradient for alternating markov games","year":"2018","author":"Gao","key":"ref23"},{"key":"ref24","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70","author":"Finn"},{"article-title":"Deterministic policy gradient algorithms","year":"2014","author":"Silver","key":"ref25"},{"article-title":"Explaining and harnessing adversarial examples","year":"2014","author":"Goodfellow","key":"ref26"}],"event":{"name":"2020 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","start":{"date-parts":[[2020,10,11]]},"location":"Toronto, ON, Canada","end":{"date-parts":[[2020,10,14]]}},"container-title":["2020 IEEE International Conference on Systems, Man, and Cybernetics (SMC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9282733\/9282811\/09283033.pdf?arnumber=9283033","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T01:43:27Z","timestamp":1706060607000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9283033\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,11]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/smc42975.2020.9283033","relation":{},"subject":[],"published":{"date-parts":[[2020,10,11]]}}}