{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T05:17:13Z","timestamp":1740719833914,"version":"3.38.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100016311","name":"Arm","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016311","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,16]]},"DOI":"10.1109\/cdc56724.2024.10886868","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T18:43:32Z","timestamp":1740595412000},"page":"3384-3389","source":"Crossref","is-referenced-by-count":0,"title":["Policy Optimization finds Nash Equilibrium in Regularized General-Sum LQ Games"],"prefix":"10.1109","author":[{"given":"Muhammad Aneeq Uz","family":"Zaman","sequence":"first","affiliation":[{"name":"University of Illinois Urbana-Champaign,Coordinated Science Laboratory,Urbana,IL,USA,61801"}]},{"given":"Shubham","family":"Aggarwal","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign,Coordinated Science Laboratory,Urbana,IL,USA,61801"}]},{"given":"Melih","family":"Bastopcu","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign,Coordinated Science Laboratory,Urbana,IL,USA,61801"}]},{"given":"Tamer","family":"Ba\u015far","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign,Coordinated Science Laboratory,Urbana,IL,USA,61801"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"Shalev-Shwartz","year":"2016","journal-title":"arXiv:1610.03295"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2018.8489655"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s10614-021-10119-4"},{"key":"ref5","first-page":"24785","article-title":"Cooperative multi-agent reinforcement learning: Asynchronous communication and linear function approximation","volume-title":"International Conference on Machine Learning","author":"Min"},{"key":"ref6","article-title":"V-learning-a simple, efficient, decentralized algorithm for multiagent RL","author":"Jin","year":"2021","journal-title":"arXiv:2110.14555"},{"key":"ref7","article-title":"When can we learn general-sum Markov games with a large number of players sample-efficiently?","author":"Song","year":"2021","journal-title":"arXiv:2110.04184"},{"key":"ref8","article-title":"Provable policy gradient methods for average-reward Markov potential games","author":"Cheng","year":"2024","journal-title":"arXiv:2403.05738"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3490486.3538330"},{"issue":"1","key":"ref10","first-page":"165","article-title":"Provably efficient reinforcement learning in decentralized general-sum markov games","volume":"13","author":"Mao","year":"2023","journal-title":"Dynamic Games and Applications"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref12","first-page":"1909","article-title":"Approximately solving mean field games via entropy-regularized deep reinforcement learning","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Cui"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s00498-021-00310-1"},{"key":"ref14","first-page":"10 178","article-title":"Oracle-free reinforcement learning in mean-field games along a single sample path","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Zaman"},{"key":"ref15","article-title":"Policy-gradient algorithms have no guarantees of convergence in continuous action and state multi-agent settings","author":"Mazumdar","year":"2019","journal-title":"arXiv:1907.03712"},{"issue":"139","key":"ref16","article-title":"Policy gradient methods find the Nash equilibrium in N-player general-sum linear-quadratic games","volume":"24","author":"Hambly","year":"2023","journal-title":"Journal of Machine Learning Research"},{"key":"ref17","article-title":"Independent RL for cooperative-competitive agents: A mean-field perspective","author":"uz Zaman","year":"2024","journal-title":"arxiv:2403.11345"},{"key":"ref18","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"International Conference on Machine Learning","author":"Fazel"},{"key":"ref19","first-page":"19339","article-title":"Understanding the effect of stochasticity in policy optimization","volume":"34","author":"Mei","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref20","article-title":"Global convergence of policy gradient for sequential zero-sum linear quadratic dynamic games","author":"Bu","year":"2019","journal-title":"arXiv preprint arXiv:1911.04672"},{"key":"ref21","article-title":"Policy optimization provably converges to Nash equilibria in zero-sum linear quadratic games","volume":"32","author":"Zhang","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CDC42340.2020.9303950"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.020"},{"key":"ref24","article-title":"Exploration versus exploitation in reinforcement learning: A stochastic control approach","author":"Wang","year":"2018","journal-title":"arXiv:1812.01552"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1111\/mafi.12281"},{"key":"ref26","article-title":"Fast policy learning for linear quadratic control with entropy regularization","author":"Guo","year":"2023","journal-title":"arXiv:2311.14168"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1287\/moor.2021.1238"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2022.110177"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11537-007-0657-8"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.4310\/cis.2006.v6.n3.a5"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.23919\/ACC53348.2022.9867728"},{"key":"ref32","article-title":"Policy optimization finds nash equilibrium in regularized general-sum LQ games","author":"Zaman","year":"2024","journal-title":"arXiv preprint arXiv:2404.00045"},{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611971132","volume-title":"Dynamic Noncooperative Game Theory","author":"Ba\u015far","year":"1998"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-8176-4757-5","volume-title":"H-infinity optimal control and related minimax design problems: a dynamic game approach","author":"Ba\u015far","year":"2008"},{"key":"ref35","first-page":"770","article-title":"Robust cooperative multi-agent reinforcement learning: A mean-field type game perspective","volume-title":"6th Annual Learning for Dynamics & Control Conference","author":"Zaman"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2023.3271594"}],"event":{"name":"2024 IEEE 63rd Conference on Decision and Control (CDC)","start":{"date-parts":[[2024,12,16]]},"location":"Milan, Italy","end":{"date-parts":[[2024,12,19]]}},"container-title":["2024 IEEE 63rd Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10885784\/10885785\/10886868.pdf?arnumber=10886868","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T07:10:31Z","timestamp":1740640231000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10886868\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,16]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/cdc56724.2024.10886868","relation":{},"subject":[],"published":{"date-parts":[[2024,12,16]]}}}