{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T02:40:02Z","timestamp":1775616002914,"version":"3.50.1"},"reference-count":64,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"NSF AI institute","award":["2112085"],"award-info":[{"award-number":["2112085"]}]},{"name":"NSF ECCS","award":["2328241"],"award-info":[{"award-number":["2328241"]}]},{"name":"NSF CNS","award":["2003111"],"award-info":[{"award-number":["2003111"]}]},{"name":"ONR YIP","award":["N00014-19-1-2217"],"award-info":[{"award-number":["N00014-19-1-2217"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1109\/tac.2024.3387208","type":"journal-article","created":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T18:33:50Z","timestamp":1712774030000},"page":"6499-6514","source":"Crossref","is-referenced-by-count":10,"title":["Gradient Play in Stochastic Games: Stationary Points, Convergence, and Sample Complexity"],"prefix":"10.1109","volume":"69","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9333-3554","authenticated-orcid":false,"given":"Runyu","family":"Zhang","sequence":"first","affiliation":[{"name":"Harvard School of Engineering and Applied Sciences, Harvard University, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6502-6430","authenticated-orcid":false,"given":"Zhaolin","family":"Ren","sequence":"additional","affiliation":[{"name":"Harvard School of Engineering and Applied Sciences, Harvard University, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9545-3050","authenticated-orcid":false,"given":"Na","family":"Li","sequence":"additional","affiliation":[{"name":"Harvard School of Engineering and Applied Sciences, Harvard University, Cambridge, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1049\/iet-gtd.2009.0168"},{"key":"ref2","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"Shalev-Shwartz","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2021.3078100"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3128592"},{"key":"ref5","first-page":"256","article-title":"Scalable reinforcement learning of localized policies for multi-agent networked systems","volume-title":"Proc. Int. Conf. Mach. Learn. Res.","author":"Qu","year":"2020"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.39.10.1953"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14435-6_7"},{"key":"ref8","first-page":"4193","article-title":"A unified game-theoretic approach to multiagent reinforcement learning","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Lanctot","year":"2017"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref10","first-page":"1039","article-title":"Nash Q-learning for general-sum stochastic games","volume":"4","author":"Hu","year":"2003","journal-title":"J. Mach. Learn. Res."},{"key":"ref11","first-page":"871","article-title":"Extending Q-learning to general adaptive multi-agent systems","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Tesauro","year":"2003"},{"key":"ref12","first-page":"1021","article-title":"Rational and convergent learning in stochastic games","volume-title":"Proc. Int. Joint Conf. Artif. Intell.","author":"Bowling","year":"2001"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1613\/jair.2628"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v24i1.7639"},{"key":"ref15","first-page":"122","article-title":"Learning with opponent-learning awareness","volume-title":"Proc. 17th Int. Conf. Auton. Agents Multiagent Syst.","author":"Foerster","year":"2018"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1515\/9781400882014-002"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/0167-2681(85)90025-3"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1006\/game.1993.1022"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1287\/moor.23.2.479"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2005.843878"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.2307\/1912320"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-58242-4"},{"issue":"3","key":"ref23","first-page":"341","article-title":"Regret testing: Learning to play Nash equilibrium without knowing you have an opponent","volume":"1","author":"Foster","year":"2006","journal-title":"Theor. Econ."},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.geb.2006.06.001"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1137\/070680199"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1137\/18M1231298"},{"key":"ref27","article-title":"Policy optimization provably converges to Nash equilibria in zero-sum linear quadratic games","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zhang","year":"2019"},{"key":"ref28","first-page":"4431","article-title":"On the theory of policy gradient methods: Optimality, approximation, and distribution shift","volume":"22","author":"Agarwal","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-01059-5"},{"key":"ref30","article-title":"Learning parametric closed-loop policies for Markov potential games","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Macua","year":"2018"},{"key":"ref31","article-title":"Global convergence of multi-agent policy gradient in Markov potential games","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Leonardos","year":"2022"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref33","first-page":"746","article-title":"The dynamics of reinforcement learning in cooperative multiagent systems","volume-title":"Proc. 15th Nat.\/10th Conf. Artif. Intell.\/Innov. Appl. Artif. Intell.","author":"Claus","year":"1998"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-005-2631-2"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000057"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2932203"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-022-04105-y"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/tac.2024.3387208"},{"key":"ref39","article-title":"When can we learn general-sum Markov games with a large number of players sample-efficiently?","volume-title":"Proc. Conf. Learn. Representations","author":"Song","year":"2022"},{"key":"ref40","article-title":"V-learningA simple, efficient, decentralized algorithm for multiagent RL","author":"Jin","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2598476"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3121228"},{"key":"ref43","first-page":"5527","article-title":"Independent policy gradient methods for competitive reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Daskalakis","year":"2020"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.4171\/icm2022\/152"},{"key":"ref45","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2018"},{"key":"ref46","first-page":"1923","article-title":"On the global convergence rates of decentralized softmax gradient play in Markov potential games","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zhang","year":"2022"},{"key":"ref47","first-page":"4414","article-title":"Independent natural policy gradient always converges in Markov potential games","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Fox","year":"2022"},{"key":"ref48","first-page":"5166","article-title":"Independent policy gradient for large-scale Markov potential games: Sharper rates, function approximation, and game-agnostic convergence","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ding","year":"2022"},{"key":"ref49","article-title":"Gradient play in multi-agent Markov stochastic games: Stationary points and local geometry","volume-title":"Proc. Int. Symp. Math. Theory Netw. Syst.","author":"Zhang","year":"2022"},{"key":"ref50","article-title":"Reinforcement learning: Theory and algorithms","author":"Agarwal","year":"2019"},{"key":"ref51","volume-title":"Game Theory","author":"Fudenberg","year":"1991"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1017\/9781108636049"},{"key":"ref53","first-page":"6820","article-title":"On the global convergence rates of softmax policy gradient methods","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Mei","year":"2020"},{"key":"ref54","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","volume-title":"Proc. 19th Int. Conf. Mach. Learn.","author":"Kakade","year":"2002"},{"key":"ref55","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. 12th Int. Conf. Neural Inf. Process. Syst.","author":"Sutton","year":"1999"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.2307\/3003416"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.jedc.2005.10.010"},{"key":"ref58","first-page":"7688","article-title":"Learning in nonzero-sum stochastic games with potentials","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mguni","year":"2021"},{"key":"ref59","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611974997","volume-title":"First-Order Methods in Optimization","author":"Beck","year":"2017"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-015-0871-8"},{"key":"ref61","first-page":"2803","article-title":"Finite-time error bounds for linear stochastic approximation and TD learning","volume-title":"Proc. Conf. Mach. Learn. Res.","author":"Srikant","year":"2019"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2021.3120096"},{"key":"ref63","first-page":"5192","article-title":"Near-optimal time and sample complexities for solving Markov decision processes with a generative model","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sidford","year":"2018"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511609428"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/9\/10695785\/10496201-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/10695785\/10496201.pdf?arnumber=10496201","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,2]],"date-time":"2025-01-02T19:49:21Z","timestamp":1735847361000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10496201\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10]]},"references-count":64,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tac.2024.3387208","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"value":"0018-9286","type":"print"},{"value":"1558-2523","type":"electronic"},{"value":"2334-3303","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10]]}}}