{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T09:01:37Z","timestamp":1780390897195,"version":"3.54.1"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61973324"],"award-info":[{"award-number":["61973324"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2021B1515020094"],"award-info":[{"award-number":["2021B1515020094"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Provincial Key Laboratory of Computational Science","award":["2020B1212060032"],"award-info":[{"award-number":["2020B1212060032"]}]},{"name":"IEEE International Conference on Acoustics"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Signal Process."],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/tsp.2021.3090952","type":"journal-article","created":{"date-parts":[[2021,6,22]],"date-time":"2021-06-22T19:39:50Z","timestamp":1624390790000},"page":"3839-3853","source":"Crossref","is-referenced-by-count":20,"title":["Byzantine-Resilient Decentralized Policy Evaluation With Linear Function Approximation"],"prefix":"10.1109","volume":"69","author":[{"given":"Zhaoxian","family":"Wu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Han","family":"Shen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3477-1439","authenticated-orcid":false,"given":"Tianyi","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4222-5964","authenticated-orcid":false,"given":"Qing","family":"Ling","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33011544"},{"key":"ref38","first-page":"3518","article-title":"The hidden vulnerability of distributed learning in byzantium","author":"el mhamdi","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3154503"},{"key":"ref32","article-title":"Beyond confidence regions: Tight Bayesian ambiguity sets for robust MDPs","author":"petrik","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref31","article-title":"Policy-conditioned uncertainty sets for robust Markov decision processes","author":"tirinzoni","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref30","article-title":"A family of robust stochastic operators for reinforcement learning","author":"lu","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref37","first-page":"118","article-title":"Machine learning with adversaries: Byzantine tolerant gradient descent","author":"blanchard","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref36","article-title":"Phocas: Dimensional byzantine-resilient stochastic gradient descent","author":"xie","year":"2018","journal-title":"arXiv 1805 09682"},{"key":"ref35","first-page":"5650","article-title":"Byzantine-robust distributed learning: Towards optimal statistical rates","author":"yin","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref34","article-title":"Generalized byzantine-tolerant SGD","author":"xie","year":"2018","journal-title":"arXiv 1802 10116"},{"key":"ref28","first-page":"4485","article-title":"Finite-sample analysis of decentralized temporal-difference learning with linear function approximation","author":"sun","year":"2020","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref27","first-page":"2803","article-title":"Finite-time error bounds for linear stochastic approximation and TD learning","author":"srikant","year":"2019","journal-title":"Proc Conf Learn Theory"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.2995814"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.312"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1023\/A:1022192903948","article-title":"Least squares policy evaluation algorithms with linear function approximation","volume":"13","author":"nedi?","year":"2003","journal-title":"Discrete Event Dyn Syst"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992701"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref24","first-page":"1347","article-title":"Linear stochastic approximation: How far does constant step-size and iterate averaging go?","author":"lakshminarayanan","year":"2018","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref23","first-page":"1691","article-title":"A finite time analysis of temporal difference learning with linear function approximation","author":"bhandari","year":"2018","journal-title":"Proc Conf Learn Theory"},{"key":"ref26","article-title":"Adaptive temporal difference learning with linear function approximation","author":"sun","year":"2020","journal-title":"arXiv 2002 08537"},{"key":"ref25","first-page":"1347","article-title":"Finite sample analyses for TD (0) with function approximation","author":"dalal","year":"2018","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1016\/S0022-0000(73)80033-9"},{"key":"ref51","article-title":"Byzantine-resilient decentralized TD learning with linear function approximation","author":"wu","year":"2020","journal-title":"arXiv 2009 11146"},{"key":"ref56","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1137\/130943170"},{"key":"ref54","first-page":"5330","article-title":"Can decentralized algorithms outperform centralized algorithms? A case study for decentralized parallel stochastic gradient descent","author":"lian","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref53","first-page":"365","article-title":"Iterative approximate byzantine consensus in arbitrary directed graphs-Part II: Synchronous and asynchronous systems","author":"vaidya","year":"2012","journal-title":"Proc ACM Symp on the Principles of Distr Computing"},{"key":"ref52","author":"br\u00e9maud","year":"2013","journal-title":"Markov Chains Gibbs Fields Monte Carlo Simulation and Queues"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2976000"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"ref40","first-page":"4618","article-title":"Byzantine stochastic gradient descent","author":"alistarh","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TRA.2004.824698"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1137\/20M1311971"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/357172.357176"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2973345"},{"key":"ref16","first-page":"1609","article-title":"A convergent O(n) algorithm for off-policy temporal-difference learning with linear function approximation","volume":"21","author":"sutton","year":"2008","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref18","first-page":"3041","article-title":"Finite-sample analysis of least-squares policy iteration","volume":"13","author":"lazaric","year":"2012","journal-title":"J Mach Learn Res"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114723"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2241057"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref8","first-page":"9672","article-title":"Multi-agent reinforcement learning via double averaging primal-dual optimization","author":"wai","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref7","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"zhang","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref49","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref9","article-title":"Communication-efficient distributed reinforcement learning","author":"chen","year":"2018","journal-title":"arXiv 1812 03239"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1137\/16M1084316"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2014.2336806"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.21236\/ADA558910"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2014.2364096"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.sigpro.2021.108020"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2015.2471755"},{"key":"ref44","article-title":"Towards Byzantine-resilient learning in decentralized systems","author":"guo","year":"2020","journal-title":"arXiv 2002 08569"},{"key":"ref43","article-title":"BRIDGE: Byzantine-resilient decentralized gradient descent","author":"yang","year":"2019","journal-title":"arXiv 1908 08098"}],"container-title":["IEEE Transactions on Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/78\/9307529\/09462519.pdf?arnumber=9462519","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:50:32Z","timestamp":1652194232000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9462519\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":56,"URL":"https:\/\/doi.org\/10.1109\/tsp.2021.3090952","relation":{},"ISSN":["1053-587X","1941-0476"],"issn-type":[{"value":"1053-587X","type":"print"},{"value":"1941-0476","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}