{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T02:28:00Z","timestamp":1750904880307,"version":"3.37.3"},"reference-count":45,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["ECCS 1931932"],"award-info":[{"award-number":["ECCS 1931932"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62203073"],"award-info":[{"award-number":["62203073"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2212582"],"award-info":[{"award-number":["2212582"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"ARL"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Contr."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1109\/tac.2024.3375248","type":"journal-article","created":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T18:57:50Z","timestamp":1710269870000},"page":"6636-6651","source":"Crossref","is-referenced-by-count":3,"title":["Distributed Multiagent Reinforcement Learning Based on Graph-Induced Local Value Functions"],"prefix":"10.1109","volume":"69","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0066-204X","authenticated-orcid":false,"given":"Gangshan","family":"Jing","sequence":"first","affiliation":[{"name":"School of Automation, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4247-0698","authenticated-orcid":false,"given":"He","family":"Bai","sequence":"additional","affiliation":[{"name":"Oklahoma State University, Stillwater, OK, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8417-5411","authenticated-orcid":false,"given":"Jemin","family":"George","sequence":"additional","affiliation":[{"name":"U.S. Army Research Laboratory, Adelphi, MD, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3474-8215","authenticated-orcid":false,"given":"Aranya","family":"Chakrabortty","sequence":"additional","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8829-9390","authenticated-orcid":false,"given":"Piyush K.","family":"Sharma","sequence":"additional","affiliation":[{"name":"U.S. Army Research Laboratory, Adelphi, MD, USA"}]}],"member":"263","reference":[{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-017-0468-y"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2021.3050419"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-022-04105-y"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-09996-w"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref8","first-page":"256","article-title":"Scalable reinforcement learning of localized policies for multi-agent networked systems","volume-title":"Proc. Learn. Dyn. Control","author":"Qu","year":"2020"},{"key":"ref9","first-page":"7825","article-title":"Multi-agent reinforcement learning in stochastic networked systems","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Lin","year":"2021"},{"key":"ref10","first-page":"2681","article-title":"Deep decentralized multi-task multi-agent reinforcement learning under partial observability","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Omidshafiei","year":"2017"},{"key":"ref11","first-page":"25817","article-title":"Scalable multi-agent reinforcement learning through intelligent information aggregation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Nayak"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/tac.2023.3288025"},{"key":"ref13","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2018"},{"key":"ref14","first-page":"227","article-title":"Coordinated reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"2","author":"Guestrin","year":"2002"},{"key":"ref15","first-page":"1789","article-title":"Collaborative multiagent reinforcement learning by payoff propagation","volume":"7","author":"Kok","year":"2006","journal-title":"J. Mach. Learn. Res."},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/tac.2024.3386061"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3128592"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.ifacol.2019.09.144"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2021.3074256"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2241057"},{"key":"ref22","article-title":"Diff-dac: Distributed actor-critic for average multitask deep reinforcement learning","volume-title":"Proc. Adaptive Learn. Agents Conf.","author":"Macua","year":"2018"},{"key":"ref23","first-page":"761","article-title":"Horde: A scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction","volume-title":"Proc. 10th Int. Conf. Auton. Agents Multiagent Syst.","volume":"2","author":"Sutton","year":"2011"},{"article-title":"Linear time average consensus on fixed graphs and implications for decentralized optimization and multi-agent control","year":"2014","author":"Olshevsky","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.23919\/ACC53348.2022.9867152"},{"key":"ref26","first-page":"2074","article-title":"Scalable multi-agent reinforcement learning for networked systems with average reward","volume":"33","author":"Qu","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref27","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Sunehag","year":"2018"},{"key":"ref28","first-page":"12491","article-title":"Fop: Factorizing optimal joint policy of maximum-entropy multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2021"},{"key":"ref29","first-page":"1467","article-title":"Global convergence of policy gradient methods for the linear quadratic regulator","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fazel","year":"2018"},{"issue":"21","key":"ref30","first-page":"1","article-title":"Derivative-free methods for policy optimization: Guarantees for linear quadratic systems","volume":"21","author":"Malik","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2019.2896025"},{"article-title":"Privacy-preserving distributed zeroth-order optimization","year":"2020","author":"Gratton","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2020.3024321"},{"key":"ref34","first-page":"10209","article-title":"Distributed zero-order optimization under adversarial noise","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Akhavan","year":"2021"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2021.3078100"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-015-9296-2"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-015-5484-1"},{"article-title":"Zeroth-order deterministic policy gradient","year":"2020","author":"Kumar","key":"ref38"},{"key":"ref39","first-page":"2926","article-title":"Contrasting exploration in parameter and action space: A zeroth-order optimization perspective","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Vemula","year":"2019"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2004.02.022"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2021.0014"},{"issue":"98","key":"ref42","first-page":"1","article-title":"On the theory of policy gradient methods: Optimality, approximation, and distribution shift","volume":"22","author":"Agarwal","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2019.8814952"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2021.110006"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IPSN.2005.1440896"}],"container-title":["IEEE Transactions on Automatic Control"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/9\/10695785\/10463092-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9\/10695785\/10463092.pdf?arnumber=10463092","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,2]],"date-time":"2025-01-02T19:49:46Z","timestamp":1735847386000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10463092\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10]]},"references-count":45,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tac.2024.3375248","relation":{},"ISSN":["0018-9286","1558-2523","2334-3303"],"issn-type":[{"type":"print","value":"0018-9286"},{"type":"electronic","value":"1558-2523"},{"type":"electronic","value":"2334-3303"}],"subject":[],"published":{"date-parts":[[2024,10]]}}}