{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T19:36:11Z","timestamp":1767987371320,"version":"3.49.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,20]],"date-time":"2024-05-20T00:00:00Z","timestamp":1716163200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,20]],"date-time":"2024-05-20T00:00:00Z","timestamp":1716163200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004147","name":"Tsinghua University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,20]]},"DOI":"10.1109\/infocom52122.2024.10621140","type":"proceedings-article","created":{"date-parts":[[2024,8,12]],"date-time":"2024-08-12T17:25:41Z","timestamp":1723483541000},"page":"811-820","source":"Crossref","is-referenced-by-count":2,"title":["Federated Offline Policy Optimization with Dual Regularization"],"prefix":"10.1109","author":[{"given":"Sheng","family":"Yue","sequence":"first","affiliation":[{"name":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"}]},{"given":"Zerui","family":"Qin","sequence":"additional","affiliation":[{"name":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"}]},{"given":"Xingyuan","family":"Hua","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology,School of Computer Science and Technology,Beijing,China"}]},{"given":"Yongheng","family":"Deng","sequence":"additional","affiliation":[{"name":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"}]},{"given":"Ju","family":"Ren","sequence":"additional","affiliation":[{"name":"BNRist, Tsinghua University,Department of Computer Science and Technology,Beijing,China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. AISTATS","author":"McMahan"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1049\/cje.2019.10.004"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3119950"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2931179"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/AIKE.2019.00031"},{"key":"ref6","article-title":"Federated deep reinforcement learning","author":"Zhuo","year":"2020"},{"key":"ref7","first-page":"1007","article-title":"Fault-tolerant federated reinforcement learning with theoretical guarantee","volume-title":"Proc. NeurIPS","volume":"34","author":"Fan"},{"key":"ref8","first-page":"10 997","article-title":"Federated reinforcement learning: Linear speedup under markovian sampling","volume-title":"Proc. ICML","volume":"162","author":"Khodadadian"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3026589"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3390\/s20051359"},{"key":"ref11","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1561\/2200000083"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.23919\/cje.2022.00.031"},{"key":"ref14","first-page":"2613","article-title":"Double q-learning","volume":"23","author":"Hasselt","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref15","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref16","article-title":"Federated transfer reinforcement learning for autonomous driving","author":"Liang","year":"2019"},{"key":"ref17","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015"},{"key":"ref18","article-title":"Federated reinforcement distillation with proxy experience memory","author":"Cha","year":"2019"},{"key":"ref19","article-title":"Multi-task federated reinforcement learning with adversaries","author":"Anwar","year":"2021"},{"key":"ref20","article-title":"Federated offline reinforcement learning","author":"Zhou","year":"2022"},{"key":"ref21","first-page":"4443","article-title":"Distributed offline policy optimization over batch data","volume-title":"Proc. AISTATS","author":"Shen"},{"key":"ref22","article-title":"Federated ensemble-directed offline reinforcement learning","author":"Rengarajan","year":"2023"},{"key":"ref23","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume-title":"Proc. NeurIPS","author":"Kumar"},{"key":"ref24","first-page":"5774","article-title":"Offline reinforcement learning with fisher divergence critic regularization","volume-title":"Proc. ICML","author":"Kostrikov"},{"key":"ref25","article-title":"Combo: Conservative offline model-based policy optimization","volume-title":"Proc. NeurIPS","author":"Yu"},{"key":"ref26","volume-title":"Reinforcement learning: An introduction","author":"Sutton","year":"2018"},{"key":"ref27","article-title":"D4rl: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020"},{"key":"ref28","first-page":"14 129","article-title":"Mopo: Model-based offline policy optimization","volume-title":"Proc. NeurIPS","author":"Yu"},{"key":"ref29","article-title":"Model-based offline meta-reinforcement learning with regularization","volume-title":"Proc. ICLR","author":"Lin"},{"key":"ref30","article-title":"Reinforcement learning: Theory and algorithms","volume-title":"Tech. Rep","author":"Agarwal","year":"2019"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"}],"event":{"name":"IEEE INFOCOM 2024 - IEEE Conference on Computer Communications","location":"Vancouver, BC, Canada","start":{"date-parts":[[2024,5,20]]},"end":{"date-parts":[[2024,5,23]]}},"container-title":["IEEE INFOCOM 2024 - IEEE Conference on Computer Communications"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10621050\/10621073\/10621140.pdf?arnumber=10621140","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:30:59Z","timestamp":1737052259000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10621140\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,20]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/infocom52122.2024.10621140","relation":{},"subject":[],"published":{"date-parts":[[2024,5,20]]}}}