{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T19:44:23Z","timestamp":1776800663787,"version":"3.51.2"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T00:00:00Z","timestamp":1736899200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T00:00:00Z","timestamp":1736899200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62176113"],"award-info":[{"award-number":["62176113"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62172142"],"award-info":[{"award-number":["62172142"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s40747-024-01757-w","type":"journal-article","created":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T09:12:49Z","timestamp":1736932369000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Adaptive temporal-difference learning via deep neural network function approximation: a non-asymptotic analysis"],"prefix":"10.1007","volume":"11","author":[{"given":"Guoyong","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tiange","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruijuan","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuhui","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junlong","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2523-1089","authenticated-orcid":false,"given":"Mingchuan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,1,15]]},"reference":[{"key":"1757_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.120980","volume":"679","author":"P-A Andersen","year":"2024","unstructured":"Andersen P-A, Goodwin M, Granmo O-C (2024) Towards safe and sustainable reinforcement learning for real-time strategy games. Inf Sci 679:120980. https:\/\/doi.org\/10.1016\/j.ins.2024.120980","journal-title":"Inf Sci"},{"key":"1757_CR2","doi-asserted-by":"publisher","unstructured":"Dong L, Jiang F, Wang M, Peng,Y, Li X (2024) Deep progressive reinforcement learning-based flexible resource scheduling framework for irs and uav-assisted mec system. IEEE Trans Neural Netw Learn Syst 1\u201313. https:\/\/doi.org\/10.1109\/TNNLS.2023.3341067","DOI":"10.1109\/TNNLS.2023.3341067"},{"issue":"10","key":"1757_CR3","doi-asserted-by":"publisher","first-page":"1943","DOI":"10.1177\/01423312231225782","volume":"46","author":"Y Tao","year":"2024","unstructured":"Tao Y, Tao H, Zhuang Z, Stojanovic V, Paszke W (2024) Quantized iterative learning control of communication-constrained systems with encoding and decoding mechanism. Trans Inst Meas Control 46(10):1943\u20131954","journal-title":"Trans Inst Meas Control"},{"issue":"11","key":"1757_CR4","doi-asserted-by":"publisher","first-page":"2015","DOI":"10.1177\/01423312221142564","volume":"45","author":"S Guan","year":"2023","unstructured":"Guan S, Zhuang Z, Tao H, Chen Y, Stojanovic V, Paszke W (2023) Feedback-aided pd-type iterative learning control for time-varying systems with non-uniform trial lengths. Trans Inst Meas Control 45(11):2015\u20132026","journal-title":"Trans Inst Meas Control"},{"issue":"6","key":"1757_CR5","doi-asserted-by":"publisher","first-page":"7451","DOI":"10.1007\/s40747-023-01135-y","volume":"9","author":"Z Peng","year":"2023","unstructured":"Peng Z, Song X, Song S, Stojanovic V (2023) Hysteresis quantified control for switched reaction-diffusion systems and its application. Complex Intell Syst 9(6):7451\u20137460","journal-title":"Complex Intell Syst"},{"key":"1757_CR6","doi-asserted-by":"crossref","unstructured":"Luo G, Wang Y, Zhang H, Yuan Q, Li J (2023) Alpharoute: large-scale coordinated route planning via monte carlo tree search. In: Proceedings of the 37th AAAI conference on artificial intelligence, vol 37, pp 12058\u201312067","DOI":"10.1609\/aaai.v37i10.26422"},{"issue":"1","key":"1757_CR7","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/BF00115009","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Mach Learn 3(1):9\u201344","journal-title":"Mach Learn"},{"key":"1757_CR8","unstructured":"Tsitsiklis J, Van\u00a0Roy B (1997) Analysis of temporal-diffference learning with function approximation. In: Proceedings of the 11th international conference on neural information processing systems, vol 9"},{"key":"1757_CR9","unstructured":"Bhandari J, Russo D, Singal R (2018) A finite time analysis of temporal difference learning with linear function approximation. In: Proceedings of the 31st conference on learning theory, vol 75 . PMLR, pp 1691\u20131692"},{"key":"1757_CR10","unstructured":"Lakshminarayanan C, Szepesvari C (2018) Linear stochastic approximation: How far does constant step-size and iterate averaging go? In: Proceedings of the 21st international conference on artificial intelligence and statistics . PMLR, pp 1347\u20131355"},{"key":"1757_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120406","volume":"228","author":"J Zhu","year":"2023","unstructured":"Zhu J, Li B, Wang L, Zhang M, Xing L, Xi J, Wu Q (2023) Provable distributed adaptive temporal-difference learning over time-varying networks. Expert Syst Appl 228:120406. https:\/\/doi.org\/10.1016\/j.eswa.2023.120406","journal-title":"Expert Syst Appl"},{"issue":"8","key":"1757_CR12","doi-asserted-by":"publisher","first-page":"4630","DOI":"10.1109\/TSMC.2024.3382986","volume":"54","author":"J Zhu","year":"2024","unstructured":"Zhu J, Mao T, Zhang M, Ge Q, Wu Q, Li K (2024) Decentralized adaptive td($$\\lambda $$) learning with linear function approximation: nonasymptotic analysis. IEEE Trans Syst Man Cybern systems 54(8):4630\u20134641. https:\/\/doi.org\/10.1109\/TSMC.2024.3382986","journal-title":"IEEE Trans Syst Man Cybern systems"},{"issue":"7540","key":"1757_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"1757_CR14","unstructured":"Wang Z, Schaul T, Hessel M, Hasselt H, Lanctot M, Freitas N (2016) Dueling network architectures for deep reinforcement learning. In: Proceedings of the 34th International Conference on Machine Learning, pp. 1995\u20132003 . PMLR"},{"key":"1757_CR15","unstructured":"Lillicrap T.P, Hunt J.J, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: Proceedings of the 4th international conference on learning representations"},{"key":"1757_CR16","unstructured":"Sutton RS (1995) Generalization in reinforcement learning: successful examples using sparse coarse coding. In: Proceedings of the 9th international conference on neural information processing systems, vol. 8"},{"key":"1757_CR17","unstructured":"Maei H, Szepesvari C, Bhatnagar S, Precup D, Silver D, Sutton RS (2009) Convergent temporal-difference learning with arbitrary smooth function approximation. In: Proceedings of the 23rd international cConference on neural information processing systems, vol 22"},{"issue":"1","key":"1757_CR18","doi-asserted-by":"publisher","first-page":"619","DOI":"10.1287\/moor.2023.1370","volume":"49","author":"Q Cai","year":"2023","unstructured":"Cai Q, Yang Z, Lee JD, Wang Z (2023) Neural temporal difference and q learning provably converge to global optima. Math Oper Res 49(1):619\u2013651","journal-title":"Math Oper Res"},{"key":"1757_CR19","unstructured":"Fan J, Wang Z, Xie Y, Yang Z (2020) A theoretical analysis of deep q-learning. In: Proceedings of the 2nd conference on learning for dynamics and control. PMLR, pp 486\u2013489"},{"key":"1757_CR20","unstructured":"Brandfonbrener D, Bruna J (2020) Geometric insights into the convergence of nonlinear td learning. In: Proceedings of the 8th International Conference on Learning Representations"},{"key":"1757_CR21","unstructured":"Xu P, Gu Q (2020) A finite-time analysis of q-learning with neural network function approximation. In: Proceedings of the 37th international conference on machine learning. PMLR, pp 10555\u201310565"},{"issue":"5","key":"1757_CR22","doi-asserted-by":"publisher","first-page":"2891","DOI":"10.1109\/TAC.2023.3234234","volume":"68","author":"S Cayci","year":"2023","unstructured":"Cayci S, Satpathi S, He N, Srikant R (2023) Sample complexity and overparameterization bounds for temporal-difference learning with neural network approximation. IEEE Trans Autom Control 68(5):2891\u20132905","journal-title":"IEEE Trans Autom Control"},{"key":"1757_CR23","unstructured":"Tian H, Paschalidis IC, Olshevsky A (2023) On the performance of temporal difference learning with neural networks. In: Proceedings of the 11th international conference on learning representations . https:\/\/openreview.net\/forum?id=6JMXLWX68Kj"},{"key":"1757_CR24","unstructured":"Ke Z, Wen Z, Zhang J (2024) An improved finite-time analysis of temporal difference learning with deep neural networks. arXiv preprint arXiv:2405.04017"},{"issue":"1","key":"1757_CR25","first-page":"1","volume":"5","author":"E Even-Dar","year":"2003","unstructured":"Even-Dar E, Mansour Y, Bartlett P (2003) Learning rates for q-learning. J Mach Learn Res 5(1):1\u201325","journal-title":"J Mach Learn Res"},{"key":"1757_CR26","doi-asserted-by":"crossref","unstructured":"Liu Y, Liu Q, Zhao H, Pan Z, Liu C (2020) Adaptive quantitative trading: an imitative deep reinforcement learning approach. In: Proceedings of the 34th AAAI conference on artificial intelligence, vol 34, pp 2128\u20132135","DOI":"10.1609\/aaai.v34i02.5587"},{"issue":"12","key":"1757_CR27","doi-asserted-by":"publisher","first-page":"5526","DOI":"10.1109\/TNNLS.2020.3042981","volume":"32","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Gao B, Guo L, Guo H, Chen H (2021) Adaptive decision-making for automated vehicles under roundabout scenarios using optimization embedded reinforcement learning. IEEE Trans Neural Netw Learn Syst 32(12):5526\u20135538","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"1757_CR28","doi-asserted-by":"crossref","unstructured":"Xiong H, Xu T, Liang Y, Zhang W (2021) Non-asymptotic convergence of adam-type reinforcement learning algorithms under markovian sampling. In: Proceedings of the 35th AAAI conference on artificial intelligence, vol 35, pp 10460\u201310468","DOI":"10.1609\/aaai.v35i12.17252"},{"key":"1757_CR29","doi-asserted-by":"crossref","unstructured":"Sun T, Shen H, Chen T, Li D (2021) Adaptive temporal difference learning with linear function approximation. IEEE Trans Pattern Anal Mach Intell 1\u201312","DOI":"10.1109\/TPAMI.2021.3119645"},{"key":"1757_CR30","unstructured":"Sun T, Li D, Wang B (2022) Finite-time analysis of adaptive temporal difference learning with deep neural networks. In: Proceedings of the 36th International Conference on Neural Information Processing Systems, vol. 35, pp. 19592\u201319604"},{"key":"1757_CR31","unstructured":"Reddi S.J, Kale S, Kumar S (2018) On the convergence of adam and beyond. In: Proceedings of the 6th international conference on learning representations"},{"key":"1757_CR32","doi-asserted-by":"crossref","unstructured":"Dalal G, Sz\u00f6r\u00e9nyi B, Thoppe G, Mannor S (2018) Finite sample analyses for td (0) with function approximation. In: Proceedings of the 32nd AAAI conference on artificial intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.12079"},{"key":"1757_CR33","doi-asserted-by":"crossref","unstructured":"Jaakkola T, Jordan M, Singh S (1994) Convergence of stochastic iterative dynamic programming algorithms. In: Proceedings of the 8th international conference on neural information processing systems, vol 6","DOI":"10.1162\/neco.1994.6.6.1185"},{"key":"1757_CR34","unstructured":"Korda N, La P (2015) On td (0) with function approximation: Concentration bounds and a centered variant with exponential convergence. In: Proceedings of the 33rd international conference on machine learning. PMLR, pp 626\u2013634"},{"key":"1757_CR35","unstructured":"Narayanan C, Szepesv\u00e1ri C (2017) Finite time bounds for temporal difference learning with function approximation: Problems with some \u201cstate-of-the-art\u201d results. Technical report. Technical report"},{"key":"1757_CR36","unstructured":"Allen-Zhu Z, Li Y, Song Z (2019) A convergence theory for deep learning via over-parameterization. In: Proceedings of the 36th international conference on machine learning. PMLR, pp 242\u2013252"},{"key":"1757_CR37","unstructured":"Cao Y, Gu Q (2019) Generalization bounds of stochastic gradient descent for wide and deep neural networks. In: Proceedings of the 33rd international conference on neural information processing systems, vol 32"},{"key":"1757_CR38","unstructured":"Arora S, Du S, Hu W, Li Z, Wang R (2019) Fine-grained analysis of optimization and generalization for overparameterized two-layer neural networks. In: Proceedings of the 38th international conference on machine learning. PMLR, pp 322\u2013332"},{"key":"1757_CR39","unstructured":"Neyshabur B, Li Z, Bhojanapalli S, LeCun Y, Srebro N (2018) Towards understanding the role of over-parametrization in generalization of neural networks. arXiv preprint arXiv:1805.12076"},{"issue":"7","key":"1757_CR40","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi J, Hazan E, Singer Y (2011) Adaptive subgradient methods for online learning and stochastic optimization. J Mach Learn Res 12(7):2121\u20132159","journal-title":"J Mach Learn Res"},{"key":"1757_CR41","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: Proceedings of the 3rd international conference on learning representations"},{"key":"1757_CR42","unstructured":"Luo L, Xiong Y, Liu Y, Sun X (2019) Adaptive gradient methods with dynamic bound of learning rate. In: Proceedings of the 7th international conference on learning representations"},{"key":"1757_CR43","unstructured":"Zhuang J, Tang T, Ding Y, Tatikonda S.C, Dvornek N, Papademetris X, Duncan J (2020) Adabelief optimizer: adapting stepsizes by the belief in observed gradients. In: Proceedings of the 34th international conference on neural information processing systems, vol 33, pp 18795\u201318806"},{"key":"1757_CR44","volume-title":"Dynamic programming and optimal control","author":"DP Bertsekas","year":"2017","unstructured":"Bertsekas DP (2017) Dynamic programming and optimal control, 4th edn. Athena Scientific, Belmont","edition":"4"},{"key":"1757_CR45","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"1757_CR46","unstructured":"Wang G, Lu S, Giannakis G, Tesauro G, Sun J (2020) Decentralized td tracking with linear function approximation and its finite-time analysis. In: Proceedings of the 34th international conference on neural information processing systems, vol 33, pp 13762\u201313772"},{"key":"1757_CR47","unstructured":"Graves A (2013) Generating sequences with recurrent neural networks. arXiv preprint arXiv:1308.0850"},{"key":"1757_CR48","doi-asserted-by":"crossref","unstructured":"Robbins H, Monro S (1951) A stochastic approximation method. The annals of mathematical statistics, 400\u2013407","DOI":"10.1214\/aoms\/1177729586"},{"key":"1757_CR49","unstructured":"Wilson A.C, Roelofs R, Stern M, Srebro N, Recht B (2017) The marginal value of adaptive gradient methods in machine learning. In: Proceedings of the 31st international conference on neural information processing systems, vol 30, pp 1\u201311"},{"key":"1757_CR50","unstructured":"Keskar NS, Socher R (2017) Improving generalization performance by switching from adam to sgd. arXiv preprint arXiv:1712.07628"},{"issue":"1","key":"1757_CR51","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1137\/20M1311971","volume":"3","author":"TT Doan","year":"2021","unstructured":"Doan TT, Maguluri ST, Romberg J (2021) Finite-time performance of distributed temporal-difference learning with linear function approximation. SIAM J Math Data Sci 3(1):298\u2013320","journal-title":"SIAM J Math Data Sci"},{"key":"1757_CR52","doi-asserted-by":"crossref","unstructured":"Chen Z, Zhang S, Doan TT, Clarke J-P, Maguluri ST (2022) Finite-sample analysis of nonlinear stochastic approximation with applications in reinforcement learning. arXiv preprint arXiv:1905.11425","DOI":"10.1016\/j.automatica.2022.110623"},{"key":"1757_CR53","doi-asserted-by":"crossref","unstructured":"Cao Y, Gu Q (2020) Generalization error bounds of gradient descent for learning over-parameterized deep relu networks. In: Proceedings of the 34th AAAI conference on artificial intelligence, vol 34, pp 3349\u20133356","DOI":"10.1609\/aaai.v34i04.5736"},{"issue":"5","key":"1757_CR54","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TSMC.1983.6313077","volume":"SMC13","author":"AG Barto","year":"1983","unstructured":"Barto AG, Sutton RS, Anderson CW (1983) Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Trans Syst Man Cybern SMC13(5):834\u2013846","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"1757_CR55","unstructured":"Moore AW (1990) Efficient memory-based learning for robot control. Technical report, University of Cambridge"},{"key":"1757_CR56","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) Openai gym. arXiv preprint arXiv:1606.01540"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01757-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01757-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01757-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,7]],"date-time":"2025-02-07T16:32:17Z","timestamp":1738945937000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01757-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,15]]},"references-count":56,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["1757"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01757-w","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,15]]},"assertion":[{"value":"11 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no Conflict of interest regarding the publication of this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"150"}}