{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T21:24:10Z","timestamp":1729632250515,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/ssci44817.2019.9003120","type":"proceedings-article","created":{"date-parts":[[2020,2,21]],"date-time":"2020-02-21T02:49:24Z","timestamp":1582253364000},"page":"57-63","source":"Crossref","is-referenced-by-count":3,"title":["Multi-Agent Reinforcement Learning Based on Clustering in Two-Player Games"],"prefix":"10.1109","author":[{"given":"Weifan","family":"Li","sequence":"first","affiliation":[]},{"given":"Yuanheng","family":"Zhuand","sequence":"additional","affiliation":[]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Implicit quantile networks for distributional reinforcement learning","year":"2018","author":"dabney","key":"ref33"},{"key":"ref32","first-page":"449","article-title":"A distributional perspective on reinforcement learning","author":"bellemare","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594201"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015355"},{"key":"ref35","first-page":"226","article-title":"A density-based algorithm for discovering clusters in large spatial databases with noise","volume":"96","author":"ester","year":"1996","journal-title":"KDD"},{"article-title":"Distributed prioritized experience replay","year":"2018","author":"horgan","key":"ref34"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11794","article-title":"Counterfactual multi-agent policy gradients","author":"foerster","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref11","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","author":"sunehag","year":"2018","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"article-title":"Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning","year":"2018","author":"rashid","key":"ref12"},{"key":"ref13","first-page":"361","article-title":"Reinforcement learning with soft state aggregation","author":"singh","year":"1995","journal-title":"Advances in neural information processing systems"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-70087-8_86"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-015-9459-7"},{"key":"ref16","first-page":"19","article-title":"A bayesian sampling approach to exploration in reinforcement learning","author":"asmuth","year":"2009","journal-title":"Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence"},{"key":"ref17","first-page":"1899","article-title":"Graying the black box: Understanding DQNs","author":"zahavy","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref18","first-page":"3812","article-title":"Infogail: Interpretable imitation learning from visual demonstrations","author":"li","year":"2017","journal-title":"Advances in neural information processing systems"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref19"},{"article-title":"Deep reinforcement learning from self-play in imperfect-information games","year":"2016","author":"heinrich","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000057"},{"key":"ref27","first-page":"4190","article-title":"A unified game-theoretic approach to multiagent reinforcement learning","author":"lanctot","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2019.2911900"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.321"},{"key":"ref29","first-page":"1830","article-title":"Efficient bayesian clustering for reinforcement learning","author":"mandel","year":"2016","journal-title":"IJCAI"},{"article-title":"A study on overfitting in deep reinforcement learning","year":"2018","author":"zhang","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI.2017.8280949"},{"key":"ref7","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","author":"foerster","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-019-01503-y"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2018.8489477"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2018.2823329"},{"key":"ref20","first-page":"129","article-title":"Mixed-policy asynchronous deep&#x02DC; Q-learning","author":"simoes","year":"2017","journal-title":"Iberian Robotics Conference"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v24i1.7639","article-title":"Multi-agent learning with policy prediction","author":"zhang","year":"2010","journal-title":"Twenty-Fourth AAAI Conference on Artificial Intelligence"},{"key":"ref21","first-page":"209","article-title":"Convergence and no-regret in multiagent learning","author":"bowling","year":"2005","journal-title":"Advances in neural information processing systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(02)00121-2"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1613\/jair.2628"},{"key":"ref26","first-page":"805","article-title":"Fictitious self-play in extensive-form games","author":"heinrich","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6614986"}],"event":{"name":"2019 IEEE Symposium Series on Computational Intelligence (SSCI)","start":{"date-parts":[[2019,12,6]]},"location":"Xiamen, China","end":{"date-parts":[[2019,12,9]]}},"container-title":["2019 IEEE Symposium Series on Computational Intelligence (SSCI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8975711\/9002648\/09003120.pdf?arnumber=9003120","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,16]],"date-time":"2022-10-16T01:07:25Z","timestamp":1665882445000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9003120\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/ssci44817.2019.9003120","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}