{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T13:37:03Z","timestamp":1754487423189,"version":"3.37.3"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,17]],"date-time":"2022-05-17T00:00:00Z","timestamp":1652745600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,17]],"date-time":"2022-05-17T00:00:00Z","timestamp":1652745600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00f5o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["UIDB\/04111\/2020"],"award-info":[{"award-number":["UIDB\/04111\/2020"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,17]]},"DOI":"10.1109\/codit55151.2022.9804022","type":"proceedings-article","created":{"date-parts":[[2022,6,30]],"date-time":"2022-06-30T19:42:22Z","timestamp":1656618142000},"page":"1167-1172","source":"Crossref","is-referenced-by-count":2,"title":["Distributed Actor-Critic Learning Using Emphatic Weightings"],"prefix":"10.1109","author":[{"given":"Milo\u0161 S.","family":"Stankovi\u0107","sequence":"first","affiliation":[{"name":"University Singidunum,Belgrade,Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marko","family":"Beko","sequence":"additional","affiliation":[{"name":"Instituto de Telecomunica&#x00E7;&#x00F5;es, Instituto Superior T&#x00E9;cnico, Universidade de Lisboa,Lisbon,Portugal"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Srdjan S.","family":"Stankovi\u0107","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, University of Belgrade,Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1137\/0325070"},{"key":"ref32","first-page":"1","article-title":"Weak convergence properties of constrained emphatic temporal-difference learning with constant and slowly diminishing stepsize","volume":"17","author":"yu","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref31","first-page":"1724","article-title":"On convergence of emphatic temporal-difference learning","author":"yu","year":"0","journal-title":"Proceedings of The 28th Conference on Learning Theory"},{"key":"ref30","article-title":"On convergence of some gradient-based temporal-differences algorithms for off-policy learning","author":"yu","year":"2017","journal-title":"ArXiv"},{"year":"2003","author":"kushner","journal-title":"Stochastic Approximation and Recursive Algorithms and Appli cations","key":"ref34"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1016\/j.automatica.2009.07.008"},{"key":"ref11","first-page":"179","article-title":"Off policy actor critic","author":"degris","year":"0","journal-title":"Proc Int Conf Machine Learning"},{"key":"ref12","first-page":"3426","article-title":"Actor-critic policy optimization in partially observable multiagent environments","author":"srinavasan","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref13","article-title":"Convergent actor-critic algorithms under off-policy training and function approximation","author":"maei","year":"2018","journal-title":"ArXiv"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/TSMCC.2007.913919"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/ICASSP.2013.6638519"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/CDC45484.2021.9683607"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"ref18","first-page":"1626","article-title":"Finite-time analysis of distributed TD(0) with linear function approximation on multi-agent reinforcement learning","author":"doan","year":"0","journal-title":"Proc Int Conf Machine Learning"},{"key":"ref19","article-title":"Adaptive consensusbased distributed system for multisensor multitarget tracking","author":"stankovic","year":"2021","journal-title":"IEEE Transactions on Aerospace and Electronic Systems"},{"key":"ref28","article-title":"A review of cooperative multiagent deep reinforcement learning","author":"oroojlooyjadid","year":"2019","journal-title":"ArXiv"},{"key":"ref4","first-page":"719","article-title":"Toward off policy learning control with function approximation","author":"maei","year":"0","journal-title":"Proc 7th Int Conf Machine Learning"},{"key":"ref27","article-title":"Diff-DAC: Distributed actor-critic for average multitask deep reinforcement learning","author":"macua","year":"2019","journal-title":"ArXiv"},{"key":"ref3","first-page":"1609","article-title":"A convergent O(n) algorithm for off-policy temporal-difference learning with linear function approximation","author":"sutton","year":"0","journal-title":"Proceedings of the 21st International Conference on Neural Information Processing Systems"},{"key":"ref6","first-page":"1","article-title":"An emphatic approach to the problem of off-policy temporal-difference learning","volume":"17","author":"sutton","year":"2016","journal-title":"Journal of Machine Learning Research"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/TAC.2016.2545098"},{"key":"ref5","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1145\/1553374.1553501"},{"key":"ref7","first-page":"289","article-title":"Off-policy learning with eligibility traces: A survey","volume":"15","author":"geist","year":"2014","journal-title":"Journal of Machine Learning Research"},{"year":"2017","author":"sutton","journal-title":"Reinforcement Learning An Introduction","key":"ref2"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1137\/S0363012901385691"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"1307","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/CDC.2018.8619839"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/TCNS.2021.3061909"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/TCNS.2016.2633788"},{"key":"ref24","article-title":"A multiagent off-policy actor-critic algorithm for distributed reinforcement learning","author":"suttle","year":"2019","journal-title":"ArXiv"},{"key":"ref23","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"zhang","year":"2018","journal-title":"ArXiv"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1109\/CDC40024.2019.9029969"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/TAC.2009.2037462"}],"event":{"name":"2022 8th International Conference on Control, Decision and Information Technologies (CoDIT)","start":{"date-parts":[[2022,5,17]]},"location":"Istanbul, Turkey","end":{"date-parts":[[2022,5,20]]}},"container-title":["2022 8th International Conference on Control, Decision and Information Technologies (CoDIT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9803856\/9803880\/09804022.pdf?arnumber=9804022","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T17:58:31Z","timestamp":1686592711000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9804022\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,17]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/codit55151.2022.9804022","relation":{},"subject":[],"published":{"date-parts":[[2022,5,17]]}}}