{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T08:39:55Z","timestamp":1772786395830,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2022,12,13]],"date-time":"2022-12-13T00:00:00Z","timestamp":1670889600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,13]],"date-time":"2022-12-13T00:00:00Z","timestamp":1670889600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62173347"],"award-info":[{"award-number":["62173347"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1007\/s11063-022-11096-x","type":"journal-article","created":{"date-parts":[[2022,12,13]],"date-time":"2022-12-13T07:02:55Z","timestamp":1670914975000},"page":"5515-5537","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["An Adaptive Updating Method of Target Network Based on Moment Estimates for Deep Reinforcement Learning"],"prefix":"10.1007","volume":"55","author":[{"given":"Miaoping","family":"Sun","sequence":"first","affiliation":[]},{"given":"Zequan","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9886-176X","authenticated-orcid":false,"given":"Xunhua","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Xiaohong","family":"Nian","sequence":"additional","affiliation":[]},{"given":"Hongyun","family":"Xiong","sequence":"additional","affiliation":[]},{"given":"Haibo","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,12,13]]},"reference":[{"issue":"7","key":"11096_CR1","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Osindero S, Teh Y-W (2006) A fast learning algorithm for deep belief nets. Neural Comput 18(7):1527\u20131554. https:\/\/doi.org\/10.1162\/neco.2006.18.7.1527","journal-title":"Neural Comput"},{"issue":"3","key":"11096_CR2","doi-asserted-by":"publisher","first-page":"2827","DOI":"10.1007\/s11063-020-10231-w","volume":"51","author":"M Afif","year":"2020","unstructured":"Afif M, Ayachi R, Said Y, Atri M (2020) Deep learning based application for indoor scene recognition. Neural Process Lett 51(3):2827\u20132837. https:\/\/doi.org\/10.1007\/s11063-020-10231-w","journal-title":"Neural Process Lett"},{"key":"11096_CR3","doi-asserted-by":"publisher","DOI":"10.1155\/2018\/7068349","author":"A Voulodimos","year":"2018","unstructured":"Voulodimos A, Doulamis N, Doulamis A, Protopapadakis E (2018) Deep learning for computer vision: a brief review. Comput Intell Neurosci. https:\/\/doi.org\/10.1155\/2018\/7068349","journal-title":"Comput Intell Neurosci"},{"issue":"5","key":"11096_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3178115","volume":"9","author":"Z Zhang","year":"2018","unstructured":"Zhang Z et al (2018) Deep learning for environmentally robust speech recognition: an overview of recent developments. ACM Transact Intell Syst Technol (TIST) 9(5):1\u201328. https:\/\/doi.org\/10.1145\/3178115","journal-title":"ACM Transact Intell Syst Technol (TIST)"},{"issue":"1","key":"11096_CR5","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/JAS.2020.1003381","volume":"8","author":"Z Li","year":"2021","unstructured":"Li Z, Li S, Luo X (2021) An overview of calibration technology of industrial robots. IEEE\/CAA J Autom Sinica 8(1):23\u201336","journal-title":"IEEE\/CAA J Autom Sinica"},{"issue":"11","key":"11096_CR6","doi-asserted-by":"publisher","first-page":"5931","DOI":"10.1109\/TII.2019.2909142","volume":"15","author":"H Lu","year":"2019","unstructured":"Lu H et al (2019) Rnn for solving perturbed time-varying underdetermined linear system with double bound limits on residual errors and state variables. IEEE Transact Ind Inf 15(11):5931\u20135942. https:\/\/doi.org\/10.1109\/TII.2019.2909142","journal-title":"IEEE Transact Ind Inf"},{"issue":"3","key":"11096_CR7","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1109\/TNNLS.2013.2281663","volume":"25","author":"D Liu","year":"2013","unstructured":"Liu D, Wei Q (2013) Policy iteration adaptive dynamic programming algorithm for discrete-time nonlinear systems. IEEE Transact Neural Netw Learn Syst 25(3):621\u2013634. https:\/\/doi.org\/10.1109\/TNNLS.2013.2281663","journal-title":"IEEE Transact Neural Netw Learn Syst"},{"issue":"2","key":"11096_CR8","doi-asserted-by":"publisher","first-page":"418","DOI":"10.1109\/TNNLS.2013.2280013","volume":"25","author":"D Liu","year":"2013","unstructured":"Liu D, Wang D, Li H (2013) Decentralized stabilization for a class of continuous-time nonlinear interconnected systems using online learning optimal control approach. IEEE Transact Neural Netw Learn Syst 25(2):418\u2013428. https:\/\/doi.org\/10.1109\/TNNLS.2013.2280013","journal-title":"IEEE Transact Neural Netw Learn Syst"},{"issue":"3","key":"11096_CR9","doi-asserted-by":"publisher","first-page":"840","DOI":"10.1109\/TCYB.2015.2492242","volume":"46","author":"Q Wei","year":"2015","unstructured":"Wei Q, Liu D, Lin H (2015) Value iteration adaptive dynamic programming for optimal control of discrete-time nonlinear systems. IEEE Transact Cybern 46(3):840\u2013853. https:\/\/doi.org\/10.1109\/TCYB.2015.2492242","journal-title":"IEEE Transact Cybern"},{"issue":"3","key":"11096_CR10","doi-asserted-by":"publisher","first-page":"1709","DOI":"10.1007\/s11063-019-10127-4","volume":"53","author":"Y Huang","year":"2021","unstructured":"Huang Y, Gu C, Guan X (2021) Integrating classical control into reinforcement learning policy. Neural Process Lett 53(3):1709\u20131722. https:\/\/doi.org\/10.1007\/s11063-019-10127-4","journal-title":"Neural Process Lett"},{"issue":"7540","key":"11096_CR11","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533. https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"issue":"6","key":"11096_CR12","doi-asserted-by":"publisher","first-page":"701","DOI":"10.7641\/CTA.2016.60173","volume":"33","author":"D Zhao","year":"2016","unstructured":"Zhao D et al (2016) Review of deep reinforcement learning and discussions on the development of computer go. Control Theor & Appl 33(6):701\u2013717. https:\/\/doi.org\/10.7641\/CTA.2016.60173","journal-title":"Control Theor & Appl"},{"key":"11096_CR13","doi-asserted-by":"publisher","unstructured":"Wan L, Lan X, Zhang H, Zheng N (2019) A review of deep reinforcement learning theory and application. Pattern Recognit Artifici Intell 32(1), 67\u201381 https:\/\/doi.org\/10.16451\/j.cnki.issn1003-6059.201901009","DOI":"10.16451\/j.cnki.issn1003-6059.201901009"},{"issue":"7782","key":"11096_CR14","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals O et al (2019) Grandmaster level in starcraft ii using multi-agent reinforcement learning. Nature 575(7782):350\u2013354. https:\/\/doi.org\/10.1038\/s41586-019-1724-z","journal-title":"Nature"},{"issue":"7676","key":"11096_CR15","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359. https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"key":"11096_CR16","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625","author":"BR Kiran","year":"2021","unstructured":"Kiran BR et al (2021) Deep reinforcement learning for autonomous driving: a survey. IEEE Transact Intell Transport Syst. https:\/\/doi.org\/10.1109\/TITS.2021.3054625","journal-title":"IEEE Transact Intell Transport Syst"},{"key":"11096_CR17","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1016\/j.neucom.2019.01.087","volume":"345","author":"F Li","year":"2019","unstructured":"Li F, Jiang Q, Zhang S, Wei M, Song R (2019) Robot skill acquisition in assembly process using deep reinforcement learning. Neurocomputing 345:92\u2013102. https:\/\/doi.org\/10.1016\/j.neucom.2019.01.087","journal-title":"Neurocomputing"},{"issue":"12","key":"11096_CR18","doi-asserted-by":"publisher","first-page":"4335","DOI":"10.1007\/s10489-019-01510-8","volume":"49","author":"T Kobayashi","year":"2019","unstructured":"Kobayashi T (2019) Student-t policy in reinforcement learning to acquire global optimum of robot control. Appl Intell 49(12):4335\u20134347. https:\/\/doi.org\/10.1007\/s10489-019-01510-8","journal-title":"Appl Intell"},{"issue":"3","key":"11096_CR19","doi-asserted-by":"publisher","first-page":"2513","DOI":"10.1007\/s11063-020-10220-z","volume":"51","author":"J Zhao","year":"2020","unstructured":"Zhao J (2020) Neural network-based optimal tracking control of continuous-time uncertain nonlinear system via reinforcement learning. Neural Process Lett 51(3):2513\u20132530. https:\/\/doi.org\/10.1007\/s11063-020-10220-z","journal-title":"Neural Process Lett"},{"key":"11096_CR20","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1016\/j.neunet.2020.12.023","volume":"136","author":"T Kobayashi","year":"2021","unstructured":"Kobayashi T, Ilboudo WEL (2021) T-soft update of target network for deep reinforcement learning. Neural Netw 136:63\u201371. https:\/\/doi.org\/10.1016\/j.neunet.2020.12.023","journal-title":"Neural Netw"},{"key":"11096_CR21","doi-asserted-by":"publisher","unstructured":"Ruder S (2016) An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747https:\/\/doi.org\/10.48550\/arXiv.1609.04747","DOI":"10.48550\/arXiv.1609.04747"},{"key":"11096_CR22","doi-asserted-by":"publisher","unstructured":"Lillicrap TP et al (2015) Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971https:\/\/doi.org\/10.48550\/arXiv.1509.02971","DOI":"10.48550\/arXiv.1509.02971"},{"key":"11096_CR23","unstructured":"Fujimoto S, van Hoof H, Meger D, Dy J, Krause A (eds) Addressing function approximation error in actor-critic methods. (eds Dy, J. & Krause, A.) In: Proceedings of the 35th international conference on machine learning, Vol. 80 of proceedings of machine learning research, 1587\u20131596 (PMLR, 2018). https:\/\/proceedings.mlr.press\/v80\/fujimoto18a.html"},{"key":"11096_CR24","doi-asserted-by":"publisher","unstructured":"Kingma DP, Ba J Adam A (2014) Method for stochastic optimization. arXiv preprint arXiv:1412.6980https:\/\/doi.org\/10.48550\/arXiv.1412.6980","DOI":"10.48550\/arXiv.1412.6980"},{"issue":"7\u20139","key":"11096_CR25","doi-asserted-by":"publisher","first-page":"1180","DOI":"10.1016\/j.neucom.2007.11.026","volume":"71","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008) Natural actor-critic. Neurocomputing 71(7\u20139):1180\u20131190. https:\/\/doi.org\/10.1016\/j.neucom.2007.11.026","journal-title":"Neurocomputing"},{"key":"11096_CR26","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction (MIT press)"},{"issue":"5","key":"11096_CR27","doi-asserted-by":"publisher","first-page":"823","DOI":"10.1103\/PhysRev.36.823","volume":"36","author":"GE Uhlenbeck","year":"1930","unstructured":"Uhlenbeck GE, Ornstein LS (1930) On the theory of the brownian motion. Phys Rev 36(5):823. https:\/\/doi.org\/10.1103\/PhysRev.36.823","journal-title":"Phys Rev"},{"key":"11096_CR28","unstructured":"Lowe R et al (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. Adv Neural Inform Process Syst 30"},{"key":"11096_CR29","doi-asserted-by":"publisher","unstructured":"Zhang J, He T, Sra S, Jadbabaie A (2019) Why gradient clipping accelerates training: a theoretical justification for adaptivity. arXiv preprint arXiv:1905.11881https:\/\/doi.org\/10.48550\/arXiv.1905.11881","DOI":"10.48550\/arXiv.1905.11881"},{"key":"11096_CR30","unstructured":"Coumans E, Bai Y (2016) Pybullet, a python module for physics simulation for games, robotics and machine learning"},{"key":"11096_CR31","doi-asserted-by":"publisher","unstructured":"Brockman G et al (2016) Openai gym. arXiv preprint arXiv:1606.01540https:\/\/doi.org\/10.48550\/arXiv.1606.01540","DOI":"10.48550\/arXiv.1606.01540"},{"key":"11096_CR32","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3115240","author":"H Ge","year":"2021","unstructured":"Ge H et al (2021) Multi-agent transfer reinforcement learning with multi-view encoder for adaptive traffic signal control. IEEE Transac Intell Transp Syst. https:\/\/doi.org\/10.1109\/TITS.2021.3115240","journal-title":"IEEE Transac Intell Transp Syst"},{"issue":"7","key":"11096_CR33","doi-asserted-by":"publisher","first-page":"1121","DOI":"10.3390\/electronics9071121","volume":"9","author":"W Kong","year":"2020","unstructured":"Kong W, Zhou D, Yang Z, Zhao Y, Zhang K (2020) Uav autonomous aerial combat maneuver strategy generation with observation error based on state-adversarial deep deterministic policy gradient and inverse reinforcement learning. Electronics 9(7):1121. https:\/\/doi.org\/10.3390\/electronics9071121","journal-title":"Electronics"},{"key":"11096_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.114896","volume":"176","author":"L Huang","year":"2021","unstructured":"Huang L, Fu M, Qu H, Wang S, Hu S (2021) A deep reinforcement learning-based method applied for solving multi-agent defense and attack problems. Expert Syst Appl 176:114896. https:\/\/doi.org\/10.1016\/j.eswa.2021.114896","journal-title":"Expert Syst Appl"},{"issue":"8","key":"11096_CR35","doi-asserted-by":"publisher","first-page":"1537","DOI":"10.3390\/sym13081537","volume":"13","author":"Z Zhu","year":"2021","unstructured":"Zhu Z, Xie N, Zong K, Chen L (2021) Building a connected communication network for uav clusters using de-maddpg. Symmetry 13(8):1537. https:\/\/doi.org\/10.3390\/sym13081537","journal-title":"Symmetry"},{"key":"11096_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.measurement.2020.108253","volume":"167","author":"AT Khan","year":"2021","unstructured":"Khan AT, Li S, Cao X (2021) Control framework for cooperative robots in smart home using bio-inspired neural network. Measurement 167:108253","journal-title":"Measurement"},{"issue":"2","key":"11096_CR37","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1109\/JAS.2020.1003048","volume":"7","author":"AH Khan","year":"2020","unstructured":"Khan AH, Cao X, Li S, Katsikis VN, Liao L (2020) Bas-adam: an adam based approach to improve the performance of beetle antennae search optimizer. IEEE\/CAA J Autom Sin 7(2):461\u2013471","journal-title":"IEEE\/CAA J Autom Sin"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11096-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-022-11096-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11096-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,29]],"date-time":"2023-09-29T16:11:41Z","timestamp":1696003901000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-022-11096-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,13]]},"references-count":37,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,10]]}},"alternative-id":["11096"],"URL":"https:\/\/doi.org\/10.1007\/s11063-022-11096-x","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,13]]},"assertion":[{"value":"25 November 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 December 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}