{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,30]],"date-time":"2025-12-30T03:24:06Z","timestamp":1767065046512,"version":"3.38.0"},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,16]],"date-time":"2024-12-16T00:00:00Z","timestamp":1734307200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001843","name":"Science and Engineering Research Board","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001843","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,16]]},"DOI":"10.1109\/cdc56724.2024.10885965","type":"proceedings-article","created":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T18:43:32Z","timestamp":1740595412000},"page":"387-393","source":"Crossref","is-referenced-by-count":1,"title":["Federated TD Learning in Heterogeneous Environments with Average Rewards: A Two-timescale Approach with Polyak-Ruppert Averaging"],"prefix":"10.1109","author":[{"given":"Ankur","family":"Naskar","sequence":"first","affiliation":[{"name":"Indian Institute of Science,Dept. of Computer Science and Automation,Bengaluru,India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gugan","family":"Thoppe","sequence":"additional","affiliation":[{"name":"Indian Institute of Science,Dept. of Computer Science and Automation,Bengaluru,India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abbasali","family":"Koochakzadeh","sequence":"additional","affiliation":[{"name":"Indian Institute of Science,Dept. of Computer Science and Automation,Bengaluru,India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vijay","family":"Gupta","sequence":"additional","affiliation":[{"name":"Purdue University,Dept. of Electrical and Computer Engineering,IN,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","author":"McMahan","year":"2017","journal-title":"Artificial intelligence and statistics"},{"key":"ref2","first-page":"374","article-title":"Towards federated learning at scale: System design","volume-title":"Proceedings of machine learning and systems","volume":"1","author":"Bonawitz"},{"article-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref3"},{"key":"ref4","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"arXiv preprint arXiv:2005.01643"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"},{"key":"ref6","first-page":"332","article-title":"A general trust framework for multi-agent systems","volume-title":"Proceedings of the 20th International Conference on Autonomous Agents and MultiAgent Systems","author":"Cheng"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3358231"},{"key":"ref8","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"Sutton","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref9","article-title":"Discounted reinforcement learning is not an optimization problem","volume":"abs\/1910.02140","author":"Naik","year":"2019","journal-title":"CoRR"},{"key":"ref10","first-page":"1230","article-title":"Finite sample analysis of average-reward td learning and q-learning","volume":"34","author":"Zhang","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.20517\/ir.2021.02"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/AIKE.2019.00031"},{"key":"ref13","article-title":"Federated deep reinforcement learning","author":"Zhuo","year":"2019","journal-title":"arXiv preprint arXiv:1901.08277"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155494"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2023.3287499"},{"article-title":"Federated reinforcement learning with environment heterogeneity","year":"2022","author":"Jin","key":"ref16"},{"key":"ref17","article-title":"Federated TD learning with linear function approximation under environmental heterogeneity","author":"Wang","year":"2024","journal-title":"Transactions on Machine Learning Research"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1137\/0330046"},{"key":"ref19","article-title":"Stochastic approximation","author":"Ruppert","year":"1991","journal-title":"Handbook of Sequetial Analysis"},{"key":"ref20","first-page":"5438","article-title":"Finite time analysis of temporal difference learning with linear function approximation: Tail averaging and regularisation","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Patil"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6911(97)90015-3"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1214\/105051606000000448"},{"key":"ref23","first-page":"1199","article-title":"Finite sample analysis of two-timescale stochastic approximation with applications to reinforcement learning","volume-title":"Conference On Learning Theory","author":"Dalal"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5779"},{"key":"ref25","first-page":"2144","article-title":"Finite time analysis of linear two-timescale stochastic approximation with markovian noise","volume-title":"Conference on Learning Theory","author":"Kaledin"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10384052"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref28","volume-title":"Stochastic approximation: a dynamical systems viewpoint","volume":"48","author":"Borkar","year":"2009"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12079"}],"event":{"name":"2024 IEEE 63rd Conference on Decision and Control (CDC)","start":{"date-parts":[[2024,12,16]]},"location":"Milan, Italy","end":{"date-parts":[[2024,12,19]]}},"container-title":["2024 IEEE 63rd Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10885784\/10885785\/10885965.pdf?arnumber=10885965","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,27]],"date-time":"2025-02-27T07:23:32Z","timestamp":1740641012000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10885965\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,16]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/cdc56724.2024.10885965","relation":{},"subject":[],"published":{"date-parts":[[2024,12,16]]}}}