{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:11:15Z","timestamp":1772907075329,"version":"3.50.1"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,23]]},"DOI":"10.1109\/iros47612.2022.9982253","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:38:15Z","timestamp":1672083495000},"page":"9019-9026","source":"Crossref","is-referenced-by-count":7,"title":["Scalable Model-based Policy Optimization for Decentralized Networked Systems"],"prefix":"10.1109","author":[{"given":"Yali","family":"Du","sequence":"first","affiliation":[{"name":"King&#x0027;s College London,London,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengdong","family":"Ma","sequence":"additional","affiliation":[{"name":"Xiamen University,Xiamen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuchen","family":"Liu","sequence":"additional","affiliation":[{"name":"Peking University,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Runji","family":"Lin","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Dong","sequence":"additional","affiliation":[{"name":"CFCS, School of CS, Peking University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Wang","sequence":"additional","affiliation":[{"name":"University College London,London,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaodong","family":"Yang","sequence":"additional","affiliation":[{"name":"Peking University,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Multi-agent reinforcement learning for networked system control","author":"Chu","year":"2020","journal-title":"ICLR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2014.04.014"},{"key":"ref3","article-title":"Scalable multi-agent reinforcement learning for networked systems with average reward","volume":"33","author":"Qu","year":"2020","journal-title":"NeurIPS"},{"key":"ref4","first-page":"256","article-title":"Scalable reinforcement learning of localized policies for multi-agent networked systems","volume-title":"L4DC","author":"Qu","year":"2020"},{"key":"ref5","article-title":"The surprising effectiveness of ppo in cooperative, multi-agent games","author":"Yu","year":"2021","journal-title":"arXiv preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2015.2422780"},{"key":"ref8","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","volume-title":"ICML","author":"Deisenroth","year":"2011"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1561\/2300000021"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref11","article-title":"Algorithmic framework for model-based deep reinforcement learning with theoretical guarantees","author":"Luo","year":"2019","journal-title":"ICLR"},{"key":"ref12","article-title":"When to trust your model: Model-based policy optimization","author":"Janner","year":"2019","journal-title":"NeurIPS"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561298"},{"key":"ref14","article-title":"Model-based multi-agent rl in zero-sum markov games with near-optimal sample complexity","volume":"33","author":"Zhang","year":"2020","journal-title":"NeurIPS"},{"key":"ref15","first-page":"160","article-title":"Cooperative prioritized sweeping","volume-title":"AAMAS 2021","author":"Bargiacchi","year":"2021"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref17","article-title":"Model-based reinforcement learning for decentralized multiagent rendezvous","author":"Wang","journal-title":"arXiv preprint"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-141-3.50030-4"},{"key":"ref19","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref20","first-page":"399","article-title":"Benchmarks for reinforcement learning in mixed-autonomy traffic","volume-title":"CoRL","author":"Vinitsky","year":"2018"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref23","first-page":"2576","article-title":"Grid-wise control for multi-agent reinforcement learning in video game ai","volume-title":"ICML","author":"Han","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref26","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments.","author":"Lowe","year":"2017","journal-title":"NeurIPS"},{"key":"ref27","article-title":"Liir: Learning individual intrinsic reward in multi-agent reinforcement learning.","author":"Du","year":"2019","journal-title":"NeurIPS"},{"key":"ref28","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","author":"Sunehag","year":"2018","journal-title":"AAMAS"},{"key":"ref29","first-page":"4292","article-title":"Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"Rashid","year":"2018","journal-title":"ICML"},{"key":"ref30","article-title":"Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","author":"Son","year":"2019","journal-title":"ICML"},{"key":"ref31","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"Zhang","year":"2018","journal-title":"ICML"},{"key":"ref32","first-page":"1523","article-title":"Multi agent planning with factored mdps.","volume":"1","author":"Guestrin","year":"2001","journal-title":"NeurIPS"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014967"},{"key":"ref34","article-title":"Gcs: Graph-based coordination strategy for multi-agentreinforcement learning","author":"Ruan","year":"2022","journal-title":"AAMAS"},{"key":"ref35","first-page":"456","article-title":"Learning correlated communication topology in multiagent reinforcement learning","author":"Du","year":"2021","journal-title":"AAMAS"},{"key":"ref36","first-page":"2137","article-title":"Learning to communicate with deep multi-agent reinforcement learning","author":"Foerster","year":"2016","journal-title":"NeurIPS"},{"key":"ref37","first-page":"1101","article-title":"Coordinating multi-agent reinforcement learning with limited communication","author":"Zhang","year":"2013","journal-title":"AAMAS"},{"key":"ref38","first-page":"2252","article-title":"Learning multi agent communication with backpropagation","author":"Sukhbaatar","year":"2016","journal-title":"NeurIPS"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(00)00039-4"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/466"},{"key":"ref41","article-title":"Learning when to communicate at scale in multiagent cooperative and competitive tasks","author":"Singh","year":"2018","journal-title":"ar Xiv preprint"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.51.1035"},{"key":"ref43","first-page":"398","article-title":"Emergent behaviors in mixed-autonomy traffic","volume-title":"CoRL","author":"Wu","year":"2017"}],"event":{"name":"2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Kyoto, Japan","start":{"date-parts":[[2022,10,23]]},"end":{"date-parts":[[2022,10,27]]}},"container-title":["2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9981026\/9981028\/09982253.pdf?arnumber=9982253","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T03:32:54Z","timestamp":1706758374000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9982253\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/iros47612.2022.9982253","relation":{},"subject":[],"published":{"date-parts":[[2022,10,23]]}}}