{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T08:19:36Z","timestamp":1766132376773,"version":"3.48.0"},"reference-count":70,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976243"],"award-info":[{"award-number":["61976243"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172142"],"award-info":[{"award-number":["62172142"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Development Plan of Henan Province of China","award":["231111222600"],"award-info":[{"award-number":["231111222600"]}]},{"name":"Science and Technology Development Plan of Henan Province of China","award":["231100220600"],"award-info":[{"award-number":["231100220600"]}]},{"name":"Scientific and Technological Innovation Team of Colleges and Universities in Henan Province","award":["24IRTSTHN022"],"award-info":[{"award-number":["24IRTSTHN022"]}]},{"DOI":"10.13039\/501100013066","name":"Basic Research Projects in the University of Henan Province","doi-asserted-by":"publisher","award":["23ZX003"],"award-info":[{"award-number":["23ZX003"]}],"id":[{"id":"10.13039\/501100013066","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Syst. Man Cybern, Syst."],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1109\/tsmc.2025.3627478","type":"journal-article","created":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T18:36:58Z","timestamp":1762281418000},"page":"192-203","source":"Crossref","is-referenced-by-count":0,"title":["PfoPG: A Personalized Federated First-Order Policy Gradient Algorithm and Its Nonasymptotic Analysis"],"prefix":"10.1109","volume":"56","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6411-7035","authenticated-orcid":false,"given":"Junlong","family":"Zhu","sequence":"first","affiliation":[{"name":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China"}]},{"given":"Haotong","family":"Dong","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2523-1089","authenticated-orcid":false,"given":"Mingchuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China"}]},{"given":"Gaofeng","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0932-8788","authenticated-orcid":false,"given":"Ruijuan","family":"Zheng","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0044-6059","authenticated-orcid":false,"given":"Quanbo","family":"Ge","sequence":"additional","affiliation":[{"name":"School of Automation, Nanjing University of Information Science and Technology, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1572-5293","authenticated-orcid":false,"given":"Qingtao","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Information Engineering, Henan University of Science and Technology, Luoyang, China"}]}],"member":"263","reference":[{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2022.3181823"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2931179"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2021.3112691"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref7","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"12","author":"Sutton"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1613\/jair.806"},{"key":"ref9","article-title":"Variance reduction for policy-gradient methods via empirical variance minimization","author":"Kaledin","year":"2022","journal-title":"arXiv:2206.06827"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.5555\/2999611.2999647"},{"key":"ref11","first-page":"699","article-title":"Variance reduction for faster non-convex optimization","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Allen-Zhu"},{"key":"ref12","first-page":"314","article-title":"Stochastic variance reduction for nonconvex optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Reddi"},{"key":"ref13","first-page":"2613","article-title":"SARAH: A novel method for machine learning problems using stochastic recursive gradient","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Nguyen"},{"key":"ref14","first-page":"689","article-title":"SPIDER: Near-optimal non-convex optimization via stochastic path-integrated differential estimator","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Fang"},{"key":"ref15","first-page":"1","article-title":"Neural policy gradient methods: Global optimality and rates of convergence","volume-title":"Proc. 8th Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref16","first-page":"3921","article-title":"Stochastic nested variance reduction for nonconvex optimization","volume":"31","author":"Zhou","year":"2018","journal-title":"J. Mach. Learn. Res."},{"key":"ref17","first-page":"1","article-title":"Sample efficient policy gradient methods with recursive variance reduction","volume-title":"Proc. 8th Int. Conf. Learn. Represent.","author":"Xu"},{"key":"ref18","article-title":"Stochastic recursive momentum for policy gradient methods","author":"Yuan","year":"2020","journal-title":"arXiv:2003.04302"},{"key":"ref19","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref20","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref21","article-title":"Federated deep reinforcement learning","author":"Hankui Zhuo","year":"2019","journal-title":"arXiv:1901.08277"},{"key":"ref22","first-page":"10997","article-title":"Federated reinforcement learning: Linear speedup under Markovian sampling","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Khodadadian"},{"key":"ref23","first-page":"37157","article-title":"The blessing of heterogeneity in federated Q-learning: Linear speedup and beyond","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Woo"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM52122.2024.10621260"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM52122.2024.10621347"},{"key":"ref26","first-page":"18","article-title":"Federated reinforcement learning with environment heterogeneity","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Jin"},{"key":"ref27","article-title":"Improving federated learning personalization via model agnostic meta learning","author":"Jiang","year":"2019","journal-title":"arXiv:1909.12488"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.20517\/ir.2021.02"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2019.2960052"},{"key":"ref30","first-page":"6357","article-title":"Ditto: Fair and robust federated learning through personalization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/AIKE.2019.00031"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/MSN57253.2022.00087"},{"key":"ref33","article-title":"Adaptive personalized federated learning","author":"Deng","year":"2020","journal-title":"arXiv:2003.13461"},{"key":"ref34","article-title":"Three approaches for personalization with applications to federated learning","author":"Mansour","year":"2020","journal-title":"arXiv:2002.10619"},{"key":"ref35","first-page":"50530","article-title":"Momentum for the win: Collaborative federated reinforcement learning across heterogeneous environments","volume-title":"Proc. 41st Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-015-5484-1"},{"key":"ref37","first-page":"4023","article-title":"Stochastic variance-reduced policy gradient","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Papini"},{"key":"ref38","first-page":"5729","article-title":"Hessian aided policy gradient","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Shen"},{"key":"ref39","first-page":"4422","article-title":"Momentum-based policy gradient methods","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Huang"},{"key":"ref40","first-page":"10564","article-title":"Neural proximal\/trust region policy optimization attains globally optimal policy","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref41","first-page":"1002","article-title":"A decentralized policy gradient approach to multi-task reinforcement learning","volume-title":"Proc. Conf. Uncertainty Artif. Intell.","author":"Zeng"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17062"},{"key":"ref43","article-title":"Distributed policy gradient with variance reduction in multi-agent reinforcement learning","author":"Zhao","year":"2021","journal-title":"arXiv:2111.12961"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i9.21169"},{"key":"ref45","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. 20th Int. Conf. Artif. Intell. Statist.","author":"McMahan"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3320060"},{"key":"ref47","first-page":"4615","article-title":"Agnostic federated learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mohri"},{"key":"ref48","first-page":"17716","article-title":"Federated learning with partial model personalization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pillutla"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2023.3276329"},{"key":"ref50","first-page":"4519","article-title":"Tighter theory for local SGD on identical and heterogeneous data","volume-title":"Proc. Int. Conf. Artif. Intell. Statist. (AISTATS)","author":"Khaled"},{"key":"ref51","first-page":"6281","article-title":"Minibatch vs local SGD for heterogeneous distributed learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Woodworth"},{"key":"ref52","first-page":"5132","article-title":"SCAFFOLD: Stochastic controlled averaging for federated learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Karimireddy"},{"key":"ref53","first-page":"7057","article-title":"FedSplit: An algorithmic framework for fast federated optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Pathak"},{"key":"ref54","first-page":"2575","article-title":"Convergence and accuracy trade-offs in federated learning and meta-learning","volume-title":"Proc. 24th Int. Conf. Artif. Intell. Statist.","author":"Charles"},{"key":"ref55","first-page":"14606","article-title":"Linear convergence in federated learning: Tackling client heterogeneity and sparse gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Mitra"},{"key":"ref56","first-page":"15750","article-title":"ProxSkip: Yes! local gradient steps provably lead to communication acceleration! Finally!","volume-title":"Proc. 39th Int. Conf. Mach. Learn.","author":"Mishchenko"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3015958"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2022.3192506"},{"key":"ref59","first-page":"1","article-title":"Enhancing clustered federated learning: Integration of strategies and improved methodologies","volume-title":"Proc. 13th Int. Conf. Learn. Represent.","author":"Guo"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3160699"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2023.3345431"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2024.3446271"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2023.3312511"},{"key":"ref64","first-page":"1","article-title":"Federated TD learning with linear function approximation under environmental heterogeneity","volume":"2024","author":"Wang","year":"2024","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3214221"},{"key":"ref66","first-page":"15210","article-title":"Momentum-based variance reduction in non-convex SGD","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Cutkosky"},{"key":"ref67","first-page":"5442","article-title":"Policy optimization via importance sampling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Metelli"},{"key":"ref68","first-page":"541","article-title":"An improved convergence analysis of stochastic variance-reduced policy gradient","volume-title":"Proc. 35th Conf. Uncertainty Artif. Intell.","author":"Xu"},{"key":"ref69","first-page":"442","article-title":"Learning bounds for importance weighting","author":"Cortes","year":"2010","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref70","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lowe"}],"container-title":["IEEE Transactions on Systems, Man, and Cybernetics: Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6221021\/11301985\/11227002.pdf?arnumber=11227002","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T08:15:52Z","timestamp":1766132152000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11227002\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":70,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tsmc.2025.3627478","relation":{},"ISSN":["2168-2216","2168-2232"],"issn-type":[{"type":"print","value":"2168-2216"},{"type":"electronic","value":"2168-2232"}],"subject":[],"published":{"date-parts":[[2026,1]]}}}