{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T02:45:54Z","timestamp":1730342754425,"version":"3.28.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T00:00:00Z","timestamp":1719273600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T00:00:00Z","timestamp":1719273600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100016047","name":"Science Fund of the Republic of Serbia","doi-asserted-by":"publisher","award":["7502"],"award-info":[{"award-number":["7502"]}],"id":[{"id":"10.13039\/501100016047","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["2022.07530.CEECIND,UIDB\/04111\/2020"],"award-info":[{"award-number":["2022.07530.CEECIND,UIDB\/04111\/2020"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,25]]},"DOI":"10.23919\/ecc64448.2024.10590764","type":"proceedings-article","created":{"date-parts":[[2024,7,24]],"date-time":"2024-07-24T17:48:23Z","timestamp":1721843303000},"page":"2500-2505","source":"Crossref","is-referenced-by-count":0,"title":["Distributed Multi-Agent Gradient Based Q-Learning with Linear Function Approximation"],"prefix":"10.23919","author":[{"given":"Milo\u0161 S.","family":"Stankovi\u0107","sequence":"first","affiliation":[{"name":"Singidunum University,Belgrade,Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marko","family":"Beko","sequence":"additional","affiliation":[{"name":"Universidade Lus&#x00F3;fona,COPELABS,Lisboa,Portugal"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Srdjan S.","family":"Stankovi\u0107","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, University of Belgrade,Serbia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"volume-title":"Reinforcement learning: An introduction","year":"2017","author":"Sutton","key":"ref1"},{"volume-title":"Neuro-Dynamic Programming","year":"1996","author":"Bertsekas","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref6","first-page":"289","article-title":"Off-policy learning with eligibility traces: A survey","volume":"15","author":"Geist","year":"2014","journal-title":"Journal of Machine Learning Research"},{"key":"ref7","article-title":"On convergence of some gradient-based temporal-differences algorithms for off-policy learning","author":"Yu","year":"2017","journal-title":"arXiv"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref9","first-page":"91","article-title":"$\\text{GQ}(\\lambda)$: A general gradient algorithm for temporal difference prediction learning with eligibility traces","volume-title":"Proc. 3rd Conf. Artificial General Intelligence","author":"Maei","year":"2010"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390240"},{"key":"ref11","first-page":"719","article-title":"Toward off policy learning control with function approximation","volume-title":"Proc. Intern. Conf. Machine Learning","author":"Maei","year":"2010"},{"key":"ref12","article-title":"Performance of Q-learning with linear function approximation","author":"Chen","year":"2019","journal-title":"arXiv"},{"key":"ref13","article-title":"Regularized Q-learning","author":"Lim","year":"2022","journal-title":"arXiv"},{"key":"ref14","article-title":"A new convergent variant of Q-learning with linear function approximation","volume-title":"Proc. 34th Conf. NeurIPS","author":"Carvalho","year":"2020"},{"key":"ref15","article-title":"Performance of Q-learning with linear function approximation","author":"Lee","year":"2021","journal-title":"arXiv"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TCNS.2021.3061909"},{"key":"ref18","first-page":"1626","article-title":"Finite-time analysis of distributed TD(0) with linear function approximation on multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Machine Learning","author":"Doan","year":"2019"},{"key":"ref19","article-title":"Primal-dual distributed temporal difference learning","author":"Lee","year":"2020","journal-title":"arXiv"},{"key":"ref20","article-title":"Fast multi-agent temporal-difference learning via homotopy stochastic primal-dual optimization","author":"Ding","year":"2020","journal-title":"arXiv"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2023.110922"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2009.2037462"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CDC40024.2019.9029969"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CDC51059.2022.9992951"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejcon.2023.100853"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2368731"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1137\/0325070"},{"volume-title":"Stochastic Approximation and Recursive Algorithms and Applications","year":"2003","author":"Kushner","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2016.2545098"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2015.07.018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysconle.2020.104667"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017936530646"}],"event":{"name":"2024 European Control Conference (ECC)","start":{"date-parts":[[2024,6,25]]},"location":"Stockholm, Sweden","end":{"date-parts":[[2024,6,28]]}},"container-title":["2024 European Control Conference (ECC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10590709\/10590710\/10590764.pdf?arnumber=10590764","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,25]],"date-time":"2024-07-25T05:15:12Z","timestamp":1721884512000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10590764\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,25]]},"references-count":33,"URL":"https:\/\/doi.org\/10.23919\/ecc64448.2024.10590764","relation":{},"subject":[],"published":{"date-parts":[[2024,6,25]]}}}