{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T16:56:57Z","timestamp":1762102617407,"version":"build-2065373602"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100016047","name":"Science Fund of the Republic of Serbia","doi-asserted-by":"publisher","award":["6524745"],"award-info":[{"award-number":["6524745"]}],"id":[{"id":"10.13039\/501100016047","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Fundac&#x00E3;o para a Ci&#x00EA;ncia e a Tecnologia","award":["UIDB\/04111\/2020"],"award-info":[{"award-number":["UIDB\/04111\/2020"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Control Netw. Syst."],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1109\/tcns.2021.3061909","type":"journal-article","created":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T15:45:32Z","timestamp":1614181532000},"page":"1270-1280","source":"Crossref","is-referenced-by-count":13,"title":["Distributed Value Function Approximation for Collaborative Multiagent Reinforcement Learning"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9064-7059","authenticated-orcid":false,"given":"Milos S.","family":"Stankovic","sequence":"first","affiliation":[]},{"given":"Marko","family":"Beko","sequence":"additional","affiliation":[]},{"given":"Srdjan S.","family":"Stankovic","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.2991\/agi.2010.22"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2241057"},{"key":"ref33","article-title":"Fast multi-agent temporal-difference learning via homotopy stochastic primal-dual method","author":"ding","year":"2019","journal-title":"Optimization Foundations Reinforcement Learn Workshop 33rd Conf Neural Inf Process Syst"},{"article-title":"Primal-dual distributed temporal difference learning","year":"2020","author":"lee","key":"ref32"},{"key":"ref31","first-page":"1626","article-title":"Finite-time analysis of distributed TD(0) with linear function approximation on multi-agent reinforcement learning","author":"doan","year":"2019","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref30","first-page":"167","article-title":"Multi-agent temporal-difference learning with linear function approximation: Weak convergence under time-varying network topologies","author":"stankovi?","year":"2016","journal-title":"Proc Amer Control Conf"},{"key":"ref37","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","volume":"80","author":"zhang","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"article-title":"A multi-agent off-policy actor-critic algorithm for distributed reinforcement learning","year":"2019","author":"suttle","key":"ref36"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029969"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2009.2037462"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2018.8619839"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2368731"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.23919\/ECC.2019.8795670"},{"article-title":"Problems in decentralized decision making and computation","year":"1984","author":"tsitsiklis","key":"ref2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2016.2633788"},{"article-title":"On convergence of some gradient-based temporal-differences algorithms for off-policy learning","year":"2017","author":"yu","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"ref21","first-page":"1125","article-title":"SBEED: Convergent reinforcement learning with nonlinear function approximation","volume":"80","author":"dai","year":"2018","journal-title":"Proc 35th Int Conf Mach Learn"},{"article-title":"A review of cooperative multi-agent deep reinforcement learning","year":"2019","author":"oroojlooyjadid","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2585302"},{"key":"ref25","first-page":"371","article-title":"Distributed value function","author":"schneider","year":"1999","journal-title":"Proc 16th Int Conf Mach Learn"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2545098"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2020.104667"},{"journal-title":"Stochastic Approximation and Recursive Algorithms and Applications","year":"2003","author":"kushner","key":"ref40"},{"journal-title":"Reinforcement Learning An Introduction","year":"2017","author":"sutton","key":"ref12"},{"journal-title":"Neuro-Dynamic Programming","year":"1996","author":"bertsekas","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref15","first-page":"417","article-title":"Off-policy temporal-difference learning with function approximation","author":"precup","year":"2001","journal-title":"Proc 18th Int Conf Mach Learn"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref17","first-page":"289","article-title":"Off-policy learning with eligibility traces: A survey","volume":"15","author":"geist","year":"2014","journal-title":"J Mach Learn Res"},{"article-title":"Gradient temporal difference learning algorithms","year":"2011","author":"maei","key":"ref18"},{"key":"ref19","first-page":"809","article-title":"Policy evaluation with temporal differences: A survey and comparisons","volume":"15","author":"dann","year":"2014","journal-title":"J Mach Learn Res"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1137\/0325070"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1986.1104412"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2013.2275131"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2008.2009515"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2364096"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2018.03.054"},{"key":"ref49","first-page":"1","article-title":"Weak convergence properties of constrained emphatic temporal-difference learning with constant and slowly diminishing stepsize","volume":"17","author":"yu","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2012.2217338"},{"key":"ref46","first-page":"1","article-title":"An emphatic approach to the problem of off-policy temporal-difference learning","volume":"17","author":"sutton","year":"2016","journal-title":"J Mach Learn Res"},{"journal-title":"Stochastic Approximation A Dynamical Systems Viewpoint","year":"2009","author":"borkar","key":"ref45"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638519"},{"key":"ref47","first-page":"1724","article-title":"On convergence of emphatic temporal-difference learning","author":"yu","year":"2015","journal-title":"Proc 28th Conf Learn Theory"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012995282784"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6911(97)90015-3"},{"key":"ref43","first-page":"1","article-title":"On generalized Bellman equations and temporal-difference learning","volume":"19","author":"yu","year":"2019","journal-title":"J Mach Learn Res"}],"container-title":["IEEE Transactions on Control of Network Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6509490\/9540809\/09362284.pdf?arnumber=9362284","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T11:55:08Z","timestamp":1641988508000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9362284\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9]]},"references-count":49,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tcns.2021.3061909","relation":{},"ISSN":["2325-5870","2372-2533"],"issn-type":[{"type":"electronic","value":"2325-5870"},{"type":"electronic","value":"2372-2533"}],"subject":[],"published":{"date-parts":[[2021,9]]}}}