{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T11:13:55Z","timestamp":1740136435086,"version":"3.37.3"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["DMS-1953686","IIS-2107304","CMMI-1653435"],"award-info":[{"award-number":["DMS-1953686","IIS-2107304","CMMI-1653435"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"ONR","doi-asserted-by":"publisher","award":["1006977"],"award-info":[{"award-number":["1006977"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Mobile Comput."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1109\/tmc.2023.3315120","type":"journal-article","created":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T17:46:08Z","timestamp":1694627168000},"page":"5709-5721","source":"Crossref","is-referenced-by-count":0,"title":["Teamwork Reinforcement Learning With Concave Utilities"],"prefix":"10.1109","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2525-2912","authenticated-orcid":false,"given":"Zheng","family":"Yu","sequence":"first","affiliation":[{"name":"Princeton University, Pirnceton, NJ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2194-9664","authenticated-orcid":false,"given":"Junyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6987-6463","authenticated-orcid":false,"given":"Zheng","family":"Wen","sequence":"additional","affiliation":[{"name":"DeepMind, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9311-9171","authenticated-orcid":false,"given":"Andrea","family":"Tacchetti","sequence":"additional","affiliation":[{"name":"DeepMind, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2101-9507","authenticated-orcid":false,"given":"Mengdi","family":"Wang","sequence":"additional","affiliation":[{"name":"Princeton University, Pirnceton, NJ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7774-3246","authenticated-orcid":false,"given":"Ian","family":"Gemp","sequence":"additional","affiliation":[{"name":"DeepMind, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.09.025"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3021017"},{"article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","year":"2016","author":"Shalev-Shwartz","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-27477-1_19"},{"article-title":"Variational policy gradient method for reinforcement learning with general utilities","year":"2020","author":"Zhang","key":"ref6"},{"key":"ref7","first-page":"2681","article-title":"Provably efficient maximum entropy exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hazan"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref9","first-page":"195","article-title":"Planning, learning and coordination in multiagent decision processes","volume-title":"Proc. 6th Conf. Theor. Aspects Rationality Knowl.","author":"Boutilier"},{"key":"ref10","first-page":"1039","article-title":"Nash Q-learning for general-sum stochastic games","volume":"4","author":"Hu","year":"2003","journal-title":"J. Mach. Learn. Res."},{"volume-title":"A Course in Game Theory","year":"1994","author":"Osborne","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511811654"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1287\/moor.14.1.147"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1287\/moor.19.2.434"},{"key":"ref15","first-page":"25746","article-title":"Reward is enough for convex MDPs","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Zahavy"},{"key":"ref16","first-page":"489","article-title":"Concave utility reinforcement learning: The mean-field game viewpoint","volume-title":"Proc. 21st Int. Conf. Auton. Agents Multiagent Syst.","author":"Geist"},{"issue":"10","key":"ref17","first-page":"2169","article-title":"Proto-value functions: A Laplacian framework for learning representation and control in Markov decision processes","volume":"8","author":"Mahadevan","year":"2007","journal-title":"J. Mach. Learn. Res."},{"key":"ref18","first-page":"1523","article-title":"Multiagent planning with factored MDPs","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Guestrin"},{"article-title":"Distributed planning in hierarchical factored MDPs","year":"2012","author":"Guestrin","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1000"},{"article-title":"Towards minimax optimal reinforcement learning in factored Markov decision processes","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Tian","key":"ref21"},{"key":"ref22","first-page":"2976","article-title":"Variational planning for graph-based MDPs","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Cheng"},{"volume-title":"Nonlinear Programming","year":"1998","author":"Bertsekas","key":"ref23"},{"volume-title":"Numerical Optimization","year":"2006","author":"Nocedal","key":"ref24"},{"volume-title":"Constrained Optimization and Lagrange Multiplier Methods","year":"2014","author":"Bertsekas","key":"ref25"},{"key":"ref26","article-title":"On the acceleration of augmented Lagrangian method for linearly constrained optimization","volume":"3","author":"He","year":"2010","journal-title":"Optim. Online"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21236\/ADA580738"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1137\/16M1082305"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-019-01425-9"},{"volume-title":"Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers","year":"2011","author":"Boyd","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48311-5"},{"article-title":"The numerics of GANs","year":"2017","author":"Mescheder","key":"ref32"},{"article-title":"Last-iterate convergence rates for min-max optimization","year":"2019","author":"Abernethy","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1137\/21M1451944"},{"key":"ref35","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sutton"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s40305-018-0232-4"},{"key":"ref37","article-title":"CVXOPT: A Python package for convex optimization","volume":"54","author":"Andersen","year":"2013","journal-title":"cvxopt.org"}],"container-title":["IEEE Transactions on Mobile Computing"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7755\/10491282\/10250920-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7755\/10491282\/10250920.pdf?arnumber=10250920","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T07:34:22Z","timestamp":1725694462000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10250920\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":37,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tmc.2023.3315120","relation":{},"ISSN":["1536-1233","1558-0660","2161-9875"],"issn-type":[{"type":"print","value":"1536-1233"},{"type":"electronic","value":"1558-0660"},{"type":"electronic","value":"2161-9875"}],"subject":[],"published":{"date-parts":[[2024,5]]}}}