{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T10:22:29Z","timestamp":1768040549524,"version":"3.49.0"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T00:00:00Z","timestamp":1648771200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T00:00:00Z","timestamp":1648771200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T00:00:00Z","timestamp":1648771200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61873022"],"award-info":[{"award-number":["61873022"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61751311"],"award-info":[{"award-number":["61751311"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61825305"],"award-info":[{"award-number":["61825305"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["4182045"],"award-info":[{"award-number":["4182045"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Syst. Man Cybern, Syst."],"published-print":{"date-parts":[[2022,4]]},"DOI":"10.1109\/tsmc.2020.3043584","type":"journal-article","created":{"date-parts":[[2021,1,13]],"date-time":"2021-01-13T18:48:36Z","timestamp":1610563716000},"page":"2042-2052","source":"Crossref","is-referenced-by-count":4,"title":["Online Sparse Temporal Difference Learning Based on Nested Optimization and Regularized Dual Averaging"],"prefix":"10.1109","volume":"52","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8275-5535","authenticated-orcid":false,"given":"Tianheng","family":"Song","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1610-6558","authenticated-orcid":false,"given":"Dazi","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3238-745X","authenticated-orcid":false,"given":"Xin","family":"Xu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref1","first-page":"1204","article-title":"Convergent temporal-difference learning with arbitrary smooth function approximation","volume-title":"Proc. 22nd Int. Conf. Neural Inf. Process. Syst.","author":"Maei"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2805298"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114724"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2894403"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2016.2543238"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2016.2544866"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/LWC.2019.2904486"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2015.2509646"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CEC.2019.8790157"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.08.152"},{"key":"ref14","first-page":"154","article-title":"Bayes meets bellman: The Gaussian process approach to temporal difference learning","volume-title":"Proc. 20th Int. Conf. Mach. Learn.","volume":"1","author":"Engel"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2007.899161"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553504"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2015.2424233"},{"key":"ref18","first-page":"809","article-title":"Policy evaluation with temporal differences: A survey and comparison","volume":"15","author":"Dann","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1561\/2200000016"},{"key":"ref20","first-page":"2543","article-title":"Dual averaging methods for regularized stochastic learning and online optimization","volume":"11","author":"Xiao","year":"2010","journal-title":"J. Mach. Learn. Res."},{"key":"ref21","first-page":"2019","article-title":"Stabilizing training of generative adversarial networks through regularization","volume-title":"Proc. Adv. Neural Inf. Process. Syst. Conf.","author":"Roth"},{"key":"ref22","first-page":"3339","article-title":"A regularized framework for sparse and structured neural attention","volume-title":"Proc. 31st Int. Adv. Neural Inf. Process. Syst. Conf.","author":"Niculae"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-015-0893-y"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553442"},{"key":"ref25","first-page":"424","article-title":"Sparse Reinforcement learning via convex optimization","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","author":"Qin"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-29946-9_13"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2015.36"},{"key":"ref28","article-title":"L1 regularized linear temporal difference learning","author":"Painter-Wakefield","year":"2012"},{"key":"ref29","first-page":"845","article-title":"Regularized off-policy TD-learning","volume-title":"Proc. 26th Annu. Conf. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref30","first-page":"289","article-title":"Off-policy learning with eligibility traces: A survey","volume":"15","author":"Geist","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5038-2"},{"key":"ref32","first-page":"441","article-title":"Regularized policy iteration","volume-title":"Proc. 21st Conf. Adv. Neural Inf. Process.","author":"Farahmand"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1137\/S0363012997331639"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553501"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50013-X"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/81.563625"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1995.478953"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1162\/jmlr.2003.4.6.1107"}],"container-title":["IEEE Transactions on Systems, Man, and Cybernetics: Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221021\/9736423\/09321105.pdf?arnumber=9321105","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,10]],"date-time":"2024-01-10T00:14:24Z","timestamp":1704845664000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9321105\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4]]},"references-count":39,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tsmc.2020.3043584","relation":{},"ISSN":["2168-2216","2168-2232"],"issn-type":[{"value":"2168-2216","type":"print"},{"value":"2168-2232","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,4]]}}}