{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:32:14Z","timestamp":1730298734252,"version":"3.28.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,5]],"date-time":"2021-12-05T00:00:00Z","timestamp":1638662400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,5]],"date-time":"2021-12-05T00:00:00Z","timestamp":1638662400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,5]]},"DOI":"10.1109\/ssci50451.2021.9660008","type":"proceedings-article","created":{"date-parts":[[2022,1,24]],"date-time":"2022-01-24T21:09:51Z","timestamp":1643058591000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Hierarchical Cooperative Swarm Policy Learning with Role Emergence"],"prefix":"10.1109","author":[{"given":"Tianle","family":"Zhang","sequence":"first","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China,100190"}]},{"given":"Zhen","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China,100190"}]},{"given":"Zhiqiang","family":"Pu","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China,100190"}]},{"given":"Tenghai","family":"Qiu","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China,100190"}]},{"given":"Jianqiang","family":"Yi","sequence":"additional","affiliation":[{"name":"Institute of Automation,Chinese Academy of Sciences,Beijing,China,100190"}]}],"member":"263","reference":[{"key":"ref10","first-page":"1","article-title":"Deep reinforcement learning for swarm systems","volume":"20","author":"h\u00fcttenrauch","year":"2019","journal-title":"Journal of Machine Learning Research"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1007\/s10458-010-9127-4"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1007\/978-3-642-39975-6"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1142\/S0218194018500043"},{"key":"ref14","article-title":"Roma: Multi-agent reinforcement learning with emergent roles","author":"wang","year":"0","journal-title":"Proceedings of the 37th International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref16","article-title":"Prox-imal policy optimization algorithms","author":"schulman","year":"0","journal-title":"ArXiv Preprint"},{"key":"ref17","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"99","author":"sutton","year":"1999","journal-title":"NIPS"},{"key":"ref18","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"0","journal-title":"32nd Conference on Neural Information Processing Systems (NeurIPS 2018)"},{"key":"ref19","article-title":"Hierarchical deep multiagent reinforcement learning with temporal abstraction","author":"tang","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref4","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"0","journal-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1609\/aaai.v34i05.6221"},{"key":"ref3","article-title":"Formation control with collision avoidance through deep reinforcement learning using model-guided demonstration","author":"sui","year":"2020","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"ref6","first-page":"7265","article-title":"Learning attentional communication for multi-agent cooperation","author":"jiang","year":"0","journal-title":"Proceedings of the 32Nd International Conference on Neural Information Processing Systems"},{"key":"ref5","article-title":"Counterfactual multi-agent policy gradients","volume":"32","author":"foerster","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref8","article-title":"Learning nearly decomposable value functions via communication minimization","author":"wang","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref7","article-title":"Neighborhood cognition consistent multi-agent reinforcement learning","author":"mao","year":"2019","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1186\/s41018-018-0045-4"},{"key":"ref9","article-title":"Emergent tool use from multi-agent autocurricula","author":"baker","year":"0","journal-title":"International Conference on Learning Representations"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/ROBIO.2017.8324822"},{"key":"ref20","first-page":"1566","article-title":"Hierarchical cooperative multiagent reinforcement learning with skill discovery","author":"yang","year":"0","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref22","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NIPS"},{"key":"ref21","article-title":"Learning transferable cooperative behavior in multi-agent team","author":"agarwal","year":"0","journal-title":"International Joint Conference on Autonomous Agents and Multiagent Systems (AAMAS '06)"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.3115\/v1\/D14-1179"},{"year":"2019","author":"mahajan","journal-title":"Maven Multiagent variational exploration","key":"ref23"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1016\/j.automatica.2016.05.007"},{"key":"ref25","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"ArXiv Preprint"}],"event":{"name":"2021 IEEE Symposium Series on Computational Intelligence (SSCI)","start":{"date-parts":[[2021,12,5]]},"location":"Orlando, FL, USA","end":{"date-parts":[[2021,12,7]]}},"container-title":["2021 IEEE Symposium Series on Computational Intelligence (SSCI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9659537\/9659538\/09660008.pdf?arnumber=9660008","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,1]],"date-time":"2023-05-01T18:27:47Z","timestamp":1682965667000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9660008\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,5]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/ssci50451.2021.9660008","relation":{},"subject":[],"published":{"date-parts":[[2021,12,5]]}}}