{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T12:55:45Z","timestamp":1730292945425,"version":"3.28.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T00:00:00Z","timestamp":1670198400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T00:00:00Z","timestamp":1670198400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,5]]},"DOI":"10.1109\/robio55434.2022.10011827","type":"proceedings-article","created":{"date-parts":[[2023,1,18]],"date-time":"2023-01-18T18:51:38Z","timestamp":1674067898000},"page":"1192-1197","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Robot Real-time Game Strategy Learning based on Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Ki","family":"Deng","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen,Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Yanjie","family":"Li","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen,Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Songshuo","family":"Lu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen,Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Yongjin","family":"Mu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen,Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Xizheng","family":"Pang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen,Department of Control Science and Engineering,Shenzhen,China"}]},{"given":"Qi","family":"Liu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen,Department of Control Science and Engineering,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"issue":"1","key":"ref2","first-page":"126","article-title":"Reinforcement learning","volume":"11","author":"Sutton","year":"1999","journal-title":"Journal of Cognitive Neuroscience"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref4","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.cag.2021.01.011"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO54168.2021.9739251"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SMC42975.2020.9283492"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2961426"},{"key":"ref9","article-title":"Robotic weapons and the future of war","volume":"125","author":"Sparrow","year":"2011","journal-title":"New wars and new soldiers: Military ethics in the contemporary world"},{"key":"ref10","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24693-0_81"},{"key":"ref12","article-title":"Distributed prioritized experience replay","author":"Horgan","year":"2018","journal-title":"arXiv preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.29007\/xtgm"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref15","article-title":"Actor-critic algorithms","volume":"12","author":"Konda","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref16","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"International conference on machine learning. PMLR","author":"Silver"},{"key":"ref17","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja"},{"key":"ref18","article-title":"An overview of multi-agent reinforcement learning from game theoretical perspective","author":"Yang","year":"2020","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/SoCPaR.2009.112"},{"key":"ref20","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"Sunehag","year":"2017","journal-title":"arXiv preprint"},{"key":"ref21","first-page":"4295","article-title":"Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"International conference on machine learning. PMLR","author":"Rashid"},{"key":"ref22","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume":"30","author":"Lowe","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref23","article-title":"The surprising effectiveness of ppo in cooperative, multi-agent games","author":"Yu","year":"2021","journal-title":"arXiv preprint"},{"volume-title":"Study on intelligent strategy in game adversarial environment","year":"2021","author":"Tang","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.23919\/ChiCC.2019.8866337"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO49542.2019.8961549"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2013.6760239"}],"event":{"name":"2022 IEEE International Conference on Robotics and Biomimetics (ROBIO)","start":{"date-parts":[[2022,12,5]]},"location":"Jinghong, China","end":{"date-parts":[[2022,12,9]]}},"container-title":["2022 IEEE International Conference on Robotics and Biomimetics (ROBIO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10011626\/10011636\/10011827.pdf?arnumber=10011827","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,9]],"date-time":"2024-02-09T03:01:44Z","timestamp":1707447704000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10011827\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,5]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/robio55434.2022.10011827","relation":{},"subject":[],"published":{"date-parts":[[2022,12,5]]}}}