{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T05:25:39Z","timestamp":1743485139217,"version":"3.37.3"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002367","name":"Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["XDA27030300"],"award-info":[{"award-number":["XDA27030300"]}],"id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9892747","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-8","source":"Crossref","is-referenced-by-count":3,"title":["Learning in Bi-level Markov Games"],"prefix":"10.1109","author":[{"given":"Linghui","family":"Meng","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingqing","family":"Ruan","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dengpeng","family":"Xing","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Xu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10287-004-0010-0"},{"key":"ref10","first-page":"3307","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"0","journal-title":"Advances in Neural Information Processing Systems 31 Annual Conference on Neural Information Processing Systems 2018 NeurIPS 2018 2018"},{"key":"ref11","article-title":"Learning multi-level hierarchies with hindsight","author":"levy","year":"0","journal-title":"7th International Conference on Learning Representations ICLR 2019"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref13","article-title":"Hierarchical reinforcement learning with hindsight","author":"levy","year":"0","journal-title":"International Conference on Learning Representations 2019"},{"key":"ref14","article-title":"Inter-level cooperation in hierarchical reinforcement learning","author":"kreidieh","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1080\/01969722.2019.1677335"},{"key":"ref16","article-title":"Feudal multi-agent hierarchies for cooperative reinforcement learning","author":"ahilan","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10744"},{"key":"ref18","article-title":"Hierarchical deep multiagent reinforcement learning with temporal abstraction","author":"tang","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.anbehav.2005.03.004"},{"journal-title":"The condensed wealth of nations","year":"2012","author":"butler","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-12586-7"},{"key":"ref3","article-title":"Smarts: Scalable multi-agent reinforcement learning training school for autonomous driving","author":"zhou","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref6","first-page":"3133","article-title":"Implicit learning dynamics in stackelberg games: Equilibria characterization, convergence analysis, and empirical study","volume":"119","author":"fiez","year":"2020","journal-title":"Proceedings of the 37th International Conference on Machine Learning ICML 2020"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28697-6_80"},{"journal-title":"Dynamic Noncooperative Game Theory","year":"1998","author":"ba?ar","key":"ref5"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6226"},{"key":"ref7","first-page":"105","article-title":"Asymmetric multiagent reinforcement learning","volume":"2","author":"k\u00f6n\u00f6nen","year":"2004","journal-title":"Web Intelligence and Agent Systems An international Journal"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5878"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.12785\/ijcds\/040207"},{"key":"ref1","article-title":"Starcraft ii: A new challenge for reinforcement learning","author":"vinyals","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref20","article-title":"Rode: Learning roles to decompose multi-agent tasks","author":"wang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref22","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","author":"sunehag","year":"0","journal-title":"AAMAS"},{"key":"ref21","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"0","journal-title":"Advances in Neural Information Processing Systems 30 Annual Conference on Neural Information Processing Systems 2017"},{"key":"ref24","first-page":"5887","article-title":"QTRAN: learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume":"97","author":"son","year":"2019","journal-title":"Proceedings of the 36th International Conference on Machine Learning ICML 2019"},{"key":"ref23","first-page":"4292","article-title":"QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"80","author":"rashid","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning ICML 2018"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/BF01096458"},{"key":"ref25","article-title":"Qplex: Duplex dueling multi-agent q-learning","author":"wang","year":"2020","journal-title":"ArXiv Preprint"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2022,7,18]]},"location":"Padua, Italy","end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09892747.pdf?arnumber=9892747","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T01:26:46Z","timestamp":1667525206000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9892747\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9892747","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}