{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T05:37:27Z","timestamp":1751607447534,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9207169","type":"proceedings-article","created":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T00:40:33Z","timestamp":1601426433000},"page":"1-8","source":"Crossref","is-referenced-by-count":3,"title":["Cooperative Multi-Agent Deep Reinforcement Learning with Counterfactual Reward"],"prefix":"10.1109","author":[{"given":"Kun","family":"Shao","sequence":"first","affiliation":[]},{"given":"Yuanheng","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Zhentao","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"449","article-title":"A distributional perspective on reinforcement learning","author":"bellemare","year":"2017","journal-title":"ICML"},{"key":"ref32","article-title":"Prioritized experience replay","author":"schaul","year":"2016","journal-title":"ICLRE"},{"key":"ref31","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"2016","journal-title":"ICML"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref35","article-title":"QUICR-learning for multi-agent coordination","author":"agogino","year":"2006","journal-title":"Proceedings of the 21th National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference"},{"key":"ref34","first-page":"2892","article-title":"Distributional reinforcement learning with quantile regression","author":"dabney","year":"2018","journal-title":"AAAI"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI.2018.8628682"},{"journal-title":"Starcraft ii A new challenge for reinforcement learning","year":"2017","author":"vinyals","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICIST.2018.8426160"},{"key":"ref14","first-page":"387","article-title":"Cooperative multi-agent learning: The state of the art, Autonomous Agents and Multi-Agent Systems","volume":"11","author":"panait","year":"2005"},{"key":"ref15","first-page":"66","article-title":"Cooperative multi-agent control using deep reinforcement learning","author":"jayesh","year":"2017","journal-title":"AAMAS"},{"key":"ref16","first-page":"2244","article-title":"Learning multiagent communication with backpropagation","author":"sukhbaatar","year":"2016","journal-title":"NIPS"},{"key":"ref17","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","author":"foerster","year":"2016","journal-title":"NIPS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"article-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","author":"berner","key":"ref19"},{"key":"ref28","first-page":"4295","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"rashid","year":"2018","journal-title":"35th International Conference on Machine Learning"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref27","first-page":"2974","article-title":"Counterfactual multi-agent policy gradients","author":"foerster","year":"2018","journal-title":"AAAI"},{"article-title":"A survey of deep reinforcement learning in video games","year":"2019","author":"shao","key":"ref3"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of Go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of Go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2018.8490423"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v31i1.10827","article-title":"Playing FPS games with deep reinforcement learning","author":"lample","year":"2017","journal-title":"31st AAAI Conference on Artificial Intelligence"},{"key":"ref2","first-page":"1529","article-title":"Recent progress of deep reinforcement learning: from AlphaGo to AlphaGo Zero","volume":"34","author":"tang","year":"2017","journal-title":"Control theory and applications"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301954"},{"key":"ref1","first-page":"701","article-title":"Review of deep reinforcement learning and discussions on the development of computer Go","volume":"33","author":"zhao","year":"2016","journal-title":"Control theory and applications"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2018.2823329"},{"journal-title":"Shapley value Discussion Paper","year":"2006","author":"hart","key":"ref22"},{"key":"ref21","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","author":"sunehag","year":"0"},{"journal-title":"Feature selection as a multiagent coordination problem","year":"2016","author":"malialis","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/WI-IAT.2014.159"},{"key":"ref26","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"2017","journal-title":"NIPS"},{"key":"ref25","article-title":"Counterfactual exploration for improving multiagent learning","author":"colby","year":"2015","journal-title":"AAMAS"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, United Kingdom","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09207169.pdf?arnumber=9207169","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,21]],"date-time":"2022-11-21T06:20:21Z","timestamp":1669011621000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9207169\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9207169","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}