{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T05:13:16Z","timestamp":1768281196612,"version":"3.49.0"},"reference-count":67,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62572496"],"award-info":[{"award-number":["62572496"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62432004"],"award-info":[{"award-number":["62432004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Young Elite Scientist Sponsorship Program by China Association for Science and Technology","award":["ZB2025-218"],"award-info":[{"award-number":["ZB2025-218"]}]},{"name":"Shenzhen Science and Technology Program","award":["JCYJ20250604175500001"],"award-info":[{"award-number":["JCYJ20250604175500001"]}]},{"DOI":"10.13039\/501100001809","name":"Guoqiang Institute, Tsinghua University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Netw."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/ton.2025.3637043","type":"journal-article","created":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:41:12Z","timestamp":1764787272000},"page":"2031-2046","source":"Crossref","is-referenced-by-count":0,"title":["FOVA: Offline Federated Reinforcement Learning With Mixed-Quality Data"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-3917-267X","authenticated-orcid":false,"given":"Nan","family":"Qiao","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Central South University, Changsha, Hunan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3416-8181","authenticated-orcid":false,"given":"Sheng","family":"Yue","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Technology, Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2782-183X","authenticated-orcid":false,"given":"Ju","family":"Ren","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, BNRist, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6717-461X","authenticated-orcid":false,"given":"Yaoxue","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, BNRist, Tsinghua University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"87","article-title":"AWQ: Activation-aware weight quantization for on-device LLM compression and acceleration","volume-title":"Proc. Mach. Learn. Syst.","volume":"6","author":"Lin"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3485767"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3390\/s20051359"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2024.3350198"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2019.2915983"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.2976572"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3390\/machines11100980"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2020.2999784"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.23919\/cje.2023.00.288"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref12","first-page":"17854","article-title":"DFRD: Data-free robustness distillation for heterogeneous federated learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Luo"},{"key":"ref13","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. 20th Int. Conf. Artif. Intell. Statist.","author":"McMahan"},{"key":"ref14","first-page":"96","article-title":"Reinforcement learning: Theory and algorithms","volume":"32","author":"Agarwal","year":"2019"},{"key":"ref15","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"arXiv:2005.01643"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2022.03.037"},{"key":"ref17","first-page":"37157","article-title":"The blessing of heterogeneity in federated Q-learning: Linear speedup and beyond","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Woo"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3083087"},{"key":"ref19","first-page":"50530","article-title":"Momentum for the win: Collaborative federated reinforcement learning across heterogeneous environments","volume-title":"Proc. Forty-first Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref20","article-title":"Finite-time analysis of on-policy heterogeneous federated reinforcement learning","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Zhang"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.20517\/ir.2021.02"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.2986803"},{"key":"ref23","first-page":"18","article-title":"Federated reinforcement learning with environment heterogeneity","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Jin"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3250269"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM52122.2024.10621140"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2024.2310287"},{"key":"ref27","first-page":"6154","article-title":"Federated ensemble-directed offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kalathil"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-17422-3_10"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TCE.2023.3330943"},{"key":"ref30","article-title":"Federated offline reinforcement learning: Collaborative single-policy coverage suffices","author":"Woo","year":"2024","journal-title":"arXiv:2402.05876"},{"key":"ref31","first-page":"4443","article-title":"Distributed offline policy optimization over batch data","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Shen"},{"key":"ref32","volume-title":"FOVA: Offline Federated Reinforcement Learning With Mixed-Quality Data (Supplementary File)","author":"Qiao","year":"2025"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2020.3026589"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2019.2956615"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3447036"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.23919\/cje.2023.00.264"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3361876"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672268"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.23919\/cje.2021.00.326"},{"key":"ref40","article-title":"FedFormer: Contextual federation with attention in reinforcement learning","author":"Hebert","year":"2022","journal-title":"arXiv:2205.13697"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-11748-0_15"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2021.3090952"},{"key":"ref43","article-title":"Federated temporal difference learning with linear function approximation under environmental heterogeneity","author":"Wang","year":"2023","journal-title":"arXiv:2302.02212"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2023.3242734"},{"key":"ref45","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref46","article-title":"Advantage-weighted regression: Simple and scalable off-policy reinforcement learning","author":"Peng","year":"2019","journal-title":"arXiv:1910.00177"},{"key":"ref47","article-title":"Offline reinforcement learning with implicit Q-learning","author":"Kostrikov","year":"2021","journal-title":"arXiv:2110.06169"},{"key":"ref48","first-page":"11784","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref49","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Fujimoto"},{"key":"ref50","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref51","first-page":"1711","article-title":"Mildly conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lyu"},{"key":"ref52","first-page":"7436","article-title":"Uncertainty-based offline reinforcement learning with diversified Q-ensemble","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"An"},{"key":"ref53","first-page":"2644","article-title":"Offline reinforcement learning: Role of state aggregation and trajectory data","volume-title":"Proc. Thirty 7th Annu. Conf. Learn. Theory","author":"Jia"},{"key":"ref54","first-page":"1177","article-title":"Fitted Q-iteration by advantage weighted regression","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"21","author":"Neumann"},{"key":"ref55","article-title":"Notes on Kullback\u2013Leibler divergence and likelihood","author":"Shlens","year":"2014","journal-title":"arXiv:1404.2000"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273590"},{"key":"ref57","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020","journal-title":"arXiv:2004.07219"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref59","first-page":"60247","article-title":"Understanding, predicting and better resolving Q-value divergence in offline-RL","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yue"},{"key":"ref60","first-page":"2621","article-title":"Kernel continual learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Derakhshani"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA230343"},{"key":"ref62","first-page":"28954","article-title":"COMBO: Conservative offline model-based policy optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref63","article-title":"Model-based offline meta-reinforcement learning with regularization","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lin"},{"key":"ref64","first-page":"1","article-title":"Near-optimal regret bounds for reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"21","author":"Auer"},{"key":"ref65","first-page":"2701","article-title":"Why is posterior sampling better than optimism for reinforcement learning?","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Osband"},{"key":"ref66","article-title":"REGAL: A regularization based algorithm for reinforcement learning in weakly communicating MDPs","author":"Bartlett","year":"2012","journal-title":"arXiv:1205.2661"},{"key":"ref67","article-title":"MICRO: Model-based offline reinforcement learning with a conservative Bellman operator","author":"Liu","year":"2023","journal-title":"arXiv:2312.03991"}],"container-title":["IEEE Transactions on Networking"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10723154\/11317935\/11273896.pdf?arnumber=11273896","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T22:00:18Z","timestamp":1768255218000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11273896\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":67,"URL":"https:\/\/doi.org\/10.1109\/ton.2025.3637043","relation":{},"ISSN":["2998-4157"],"issn-type":[{"value":"2998-4157","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}