{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T15:46:26Z","timestamp":1781279186959,"version":"3.54.1"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Rensselaer-IBM AI Research Collaboration"},{"name":"IBM AI Horizons Network"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Signal Process."],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/tsp.2023.3268475","type":"journal-article","created":{"date-parts":[[2023,5,12]],"date-time":"2023-05-12T17:50:37Z","timestamp":1683913837000},"page":"2579-2594","source":"Crossref","is-referenced-by-count":37,"title":["Towards Understanding Asynchronous Advantage Actor-Critic: Convergence and Linear Speedup"],"prefix":"10.1109","volume":"71","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8537-9862","authenticated-orcid":false,"given":"Han","family":"Shen","sequence":"first","affiliation":[{"name":"Department of Electrical, Computer, and Systems Engineering, Rensselaer Polytechnic Institute, Troy, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kaiqing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Laboratory for Information &amp; Decision Systems and Computer Science &amp; Artificial Intelligence Laboratory, Massachusetts Institute of Technology, Cambridge, MA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1263-9365","authenticated-orcid":false,"given":"Mingyi","family":"Hong","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Minnesota, Minneapolis, MN, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3477-1439","authenticated-orcid":false,"given":"Tianyi","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Electrical, Computer, and Systems Engineering, Rensselaer Polytechnic Institute, Troy, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref57","first-page":"8668","article-title":"Finite-sample analysis for SARSA with linear function approximation","author":"zou","year":"0","journal-title":"Proc 33rd Int Conf Neural Inf Process Syst"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2019.2924579"},{"key":"ref56","first-page":"9340","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"zhang","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2021.3122303"},{"key":"ref15","article-title":"Communication-efficient distributed reinforcement learning","author":"chen","year":"2021"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2020.2971457"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619440"},{"key":"ref52","article-title":"Non-asymptotic convergence analysis of two time-scale (natural) actor-critic algorithms","author":"xu","year":"2020"},{"key":"ref55","first-page":"3586","article-title":"Global convergence of policy gradient methods to (almost) locally optimal policies","author":"zhang","year":"2019","journal-title":"SIAM J Control Optim"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/BF02745577"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17300"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2009.07.008"},{"key":"ref17","article-title":"Pytorch implementation of a3c","author":"dgriff","year":"2018"},{"key":"ref16","first-page":"10707","article-title":"Shared experience actor-critic for multi-agent reinforcement learning","author":"christianos","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2021.3093792"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015961"},{"key":"ref51","first-page":"4358","article-title":"Improving sample complexity bounds for (natural) actor-critic algorithms","author":"xu","year":"0","journal-title":"Proc 34th Int Conf Neural Inf Process Syst"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2021.3090952"},{"key":"ref46","article-title":"Neural policy gradient methods: Global optimality and rates of convergence","author":"wang","year":"2019"},{"key":"ref45","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation.","author":"sutton","year":"0","journal-title":"Proc 12th Int Conf Neural Inf Process Syst"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TSIPN.2017.2695121"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022672621406"},{"key":"ref42","first-page":"6183","article-title":"Asynchronous coordinate descent under more realistic assumptions","author":"sun","year":"0","journal-title":"Proc 31st Int Conf Neural Inf Process Syst"},{"key":"ref41","article-title":"Accelerated methods for deep reinforcement learning","author":"stooke","year":"2019"},{"key":"ref44","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref49","first-page":"17617","article-title":"A finite time analysis of two time-scale actor critic methods","author":"wu","year":"0","journal-title":"Proc 34th Int Conf Neural Inf Process Syst"},{"key":"ref8","first-page":"2386","article-title":"On the linear convergence of policy gradient methods for finite MDPs","author":"bhandari","year":"0","journal-title":"Proc 24th Int Conf Artif Intell Statist"},{"key":"ref7","article-title":"Global optimality guarantees for policy gradient methods","author":"bhandari","year":"2022"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2020.2024"},{"key":"ref4","first-page":"1","article-title":"Reinforcement learning through asynchronous advantage actor-critic on a GPU","author":"babaeizadeh","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref3","first-page":"13320","article-title":"Gossip-based actor-learner architectures for deep reinforcement learning","author":"assran","year":"0","journal-title":"Proc 33rd Int Conf Neural Inf Process Syst"},{"key":"ref6","author":"bertsekas","year":"1989","journal-title":"Parallel and Distributed Computation Numerical Methods"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1613\/jair.806"},{"key":"ref40","article-title":"Asynchronous advantage actor critic: Non-asymptotic analysis and linear speedup","author":"shen","year":"2022"},{"key":"ref35","article-title":"Massively parallel methods for deep reinforcement learning","author":"nair","year":"2015"},{"key":"ref34","first-page":"4718","article-title":"A class of parallel doubly stochastic algorithms for large-scale learning","volume":"21","author":"mokhtari","year":"2020","journal-title":"J Mach Learn Res"},{"key":"ref37","first-page":"2074","article-title":"Scalable multi-agent reinforcement learning for networked systems with average reward","author":"qu","year":"0","journal-title":"Proc 34th Int Conf Neural Inf Process Syst"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/JSAIT.2021.3078754"},{"key":"ref31","first-page":"6820","article-title":"On the global convergence rates of softmax policy gradient methods","author":"mei","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref30","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref32","article-title":"Asynchronous methods for deep reinforcement learning.","author":"mnih","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref2","first-page":"64","article-title":"Optimality and approximation with policy gradient methods in Markov decision processes","author":"agarwal","year":"0","journal-title":"Proc 30th Conf Learn Theory"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2012.6426626"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2787979"},{"key":"ref38","first-page":"693","article-title":"Hogwild!: A lock-free approach to parallelizing stochastic gradient descent","author":"recht","year":"0","journal-title":"Proc 24th Int Conf Neural Inf Process Syst"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2241057"},{"key":"ref23","article-title":"A two-timescale framework for bilevel optimization: Complexity analysis and application to actor-critic","author":"hong","year":"2022"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1137\/S036301299731669X"},{"key":"ref25","article-title":"Actor-critic algorithms","author":"konda","year":"2002"},{"key":"ref20","first-page":"1407","article-title":"Impala: Scalable distributed deep-RL with importance weighted actor-learner architectures","author":"espeholt","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref22","article-title":"Single-timescale actor-critic provably finds globally optimal policy","author":"fu","year":"2020"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2525015"},{"key":"ref28","first-page":"5336","article-title":"Can decentralized algorithms outperform centralized algorithms? a case study for decentralized parallel stochastic gradient descent","author":"lian","year":"0","journal-title":"Proc 31st Int Conf Neural Inf Process Syst"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-023-06303-2"},{"key":"ref29","first-page":"3062","article-title":"A comprehensive linear speedup analysis for asynchronous stochastic parallel optimization from zeroth-order to first-order","author":"lian","year":"0","journal-title":"Proc 30th Int Conf Neural Inf Process Syst"}],"container-title":["IEEE Transactions on Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/78\/10040758\/10124081.pdf?arnumber=10124081","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,26]],"date-time":"2023-09-26T00:03:59Z","timestamp":1695686639000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10124081\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":57,"URL":"https:\/\/doi.org\/10.1109\/tsp.2023.3268475","relation":{},"ISSN":["1053-587X","1941-0476"],"issn-type":[{"value":"1053-587X","type":"print"},{"value":"1941-0476","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}