{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T09:04:45Z","timestamp":1777626285279,"version":"3.51.4"},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&#x0026;D Program of China","award":["2018YFB0204300"],"award-info":[{"award-number":["2018YFB0204300"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61932001"],"award-info":[{"award-number":["61932001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906200"],"award-info":[{"award-number":["61906200"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,12,1]]},"DOI":"10.1109\/tpami.2021.3119645","type":"journal-article","created":{"date-parts":[[2021,10,15]],"date-time":"2021-10-15T16:47:15Z","timestamp":1634316435000},"page":"8812-8824","source":"Crossref","is-referenced-by-count":12,"title":["Adaptive Temporal Difference Learning With Linear Function Approximation"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5024-1900","authenticated-orcid":false,"given":"Tao","family":"Sun","sequence":"first","affiliation":[{"name":"College of Computer, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Han","family":"Shen","sequence":"additional","affiliation":[{"name":"Department of ECSE, Rensselaer Polytechnic Institute, Troy, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3477-1439","authenticated-orcid":false,"given":"Tianyi","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of ECSE, Rensselaer Polytechnic Institute, Troy, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9743-2034","authenticated-orcid":false,"given":"Dongsheng","family":"Li","sequence":"additional","affiliation":[{"name":"College of Computer, National University of Defense Technology, Changsha, Hunan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","first-page":"4486","article-title":"State aggregation learning from markov transition data","author":"duan","year":"2019","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref38","first-page":"2915","article-title":"Near optimal behavior via approximate state abstraction","author":"abel","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref33","article-title":"Local adagrad-type algorithm for stochastic convex-concave minimax problems","author":"liao","year":"2021"},{"key":"ref32","doi-asserted-by":"crossref","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1090\/mbk\/107"},{"key":"ref30","first-page":"1","article-title":"On the convergence of a class of adam-type algorithms for non-convex optimization","author":"chen","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/9.24227"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1287\/opre.30.1.62"},{"key":"ref35","first-page":"1","article-title":"Towards better understanding of adaptive gradient algorithms in generative adversarial nets","author":"liu","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref34","first-page":"1","article-title":"Universal stagewise learning for non-convex problems with convergence on averaged solutions","author":"chen","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12079"},{"key":"ref11","first-page":"1347","article-title":"Linear stochastic approximation: How far does constant step-size and iterate averaging go?","author":"lakshminarayanan","year":"2018","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref40","article-title":"Towards a unified theory of state abstraction for mdps","author":"li","year":"2006","journal-title":"Proc 9th Int Symp Artif Intell Math"},{"key":"ref12","first-page":"1691","article-title":"A finite time analysis of temporal difference learning with linear function approximation","author":"bhandari","year":"2018","journal-title":"Proc Conf Learn Theory"},{"key":"ref13","first-page":"2803","article-title":"Finite-time error bounds for linear stochastic approximation and TD learning","author":"srikant","year":"2019","journal-title":"Proc 32nd Annu Conf Learn Theory"},{"key":"ref14","first-page":"8477","article-title":"Characterizing the exact behaviors of temporal difference learning algorithms using markov jump linear system theory","author":"hu","year":"2019","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref15","first-page":"1626","article-title":"Finite-time analysis of distributed TD(0) with linear function approximation on multi-agent reinforcement learning","author":"doan","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref16","first-page":"2235","article-title":"Zap Q-learning","author":"devraj","year":"2017","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref17","first-page":"4706","article-title":"Finite-time performance bounds and adaptive learning rate selection for two time-scale reinforcement learning","author":"harsh gupta","year":"2019","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref18","first-page":"2529","article-title":"Momentum in reinforcement learning","author":"vieillard","year":"2020","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6021"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/9.580874"},{"key":"ref28","first-page":"1","article-title":"On the convergence of adam and beyond","author":"reddi","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"ref27","article-title":"Incorporating nesterov momentum into adam","author":"dozat","year":"2016","journal-title":"Proc Workshop Track - 10th Int Conf Learn Representations"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"ref5","first-page":"1","article-title":"Learning rates for Q-learning","volume":"5","author":"even-dar","year":"2003","journal-title":"J Mach Learn Res"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01138"},{"key":"ref8","first-page":"1609","article-title":"A convergent $o(n)$o(n) temporal-difference algorithm for off-policy learning with linear function approximation","author":"sutton","year":"2009","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref9","first-page":"504","article-title":"Finite-sample analysis of proximal gradient TD algorithms","author":"liu","year":"2015","journal-title":"Proc Conf Uncertainty Artif Intell"},{"key":"ref1","volume":"2","author":"sutton","year":"1998","journal-title":"Introduction to Reinforcement Learning"},{"key":"ref46","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"2017","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref20","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"duchi","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref45","article-title":"OpenAI Gym","author":"brockman","year":"2016"},{"key":"ref22","first-page":"983","article-title":"On the convergence of stochastic gradient descent with adaptive stepsizes","author":"li","year":"2019","journal-title":"Proc 22nd Int Conf Artif Intell Statist"},{"key":"ref21","article-title":"Adaptive bound optimization for online convex optimization","author":"mcmahan","year":"2010","journal-title":"Proc Annu Conf Learn Theory"},{"key":"ref24","article-title":"RMSprop: Neural networks for machine learning","author":"tieleman","year":"2012"},{"key":"ref42","first-page":"1038","article-title":"Generalization in reinforcement learning: Successful examples using sparse coarse coding","author":"sutton","year":"1996","journal-title":"Proc Advances Neural Inf Process Syst"},{"key":"ref23","first-page":"6677","article-title":"Adagrad stepsizes: Sharp convergence over nonconvex landscapes","author":"ward","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1060.0188"},{"key":"ref26","first-page":"1","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref44","article-title":"Dynamic programming and optimal control 3rd edition, volume ii","author":"bertsekas","year":"2011"},{"key":"ref25","article-title":"ADADELTA: An adaptive learning rate method","author":"zeiler","year":"2012"},{"key":"ref43","author":"van roy","year":"1998","journal-title":"Learning and value function approximation in complex decision processes"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9940447\/09573484.pdf?arnumber=9573484","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T23:13:30Z","timestamp":1670282010000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9573484\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,1]]},"references-count":46,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3119645","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,1]]}}}