{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T22:36:12Z","timestamp":1740177372339,"version":"3.37.3"},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176259","61976215"],"award-info":[{"award-number":["62176259","61976215"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013058","name":"Key Research and Development Program of Jiangsu Province","doi-asserted-by":"publisher","award":["BE2022095"],"award-info":[{"award-number":["BE2022095"]}],"id":[{"id":"10.13039\/501100013058","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cogn. Dev. Syst."],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tcds.2023.3242274","type":"journal-article","created":{"date-parts":[[2023,2,6]],"date-time":"2023-02-06T19:14:21Z","timestamp":1675710861000},"page":"2190-2201","source":"Crossref","is-referenced-by-count":1,"title":["Alternated Greedy-Step Deterministic Policy Gradient"],"prefix":"10.1109","volume":"15","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5327-1088","authenticated-orcid":false,"given":"Xuesong","family":"Wang","sequence":"first","affiliation":[{"name":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, the Xuzhou Key Laboratory of Artificial Intelligence and Big Data, and the School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8963-6374","authenticated-orcid":false,"given":"Jiazhi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, the Xuzhou Key Laboratory of Artificial Intelligence and Big Data, and the School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6698-479X","authenticated-orcid":false,"given":"Yang","family":"Gu","sequence":"additional","affiliation":[{"name":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, the Xuzhou Key Laboratory of Artificial Intelligence and Big Data, and the School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5123-5043","authenticated-orcid":false,"given":"Longyang","family":"Huang","sequence":"additional","affiliation":[{"name":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, the Xuzhou Key Laboratory of Artificial Intelligence and Big Data, and the School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7587-8027","authenticated-orcid":false,"given":"Kun","family":"Yu","sequence":"additional","affiliation":[{"name":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, the Xuzhou Key Laboratory of Artificial Intelligence and Big Data, and the School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2022-9999","authenticated-orcid":false,"given":"Yuhu","family":"Cheng","sequence":"additional","affiliation":[{"name":"Engineering Research Center of Intelligent Control for Underground Space, Ministry of Education, the Xuzhou Key Laboratory of Artificial Intelligence and Big Data, and the School of Information and Control Engineering, China University of Mining and Technology, Xuzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/TNN.1998.712192"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/TCDS.2019.2926477"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/tnnls.2021.3140042"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/TCDS.2019.2928820"},{"issue":"3","key":"ref5","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/TCDS.2016.2628817"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/ICMLC.2010.38"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/TCDS.2020.3034452"},{"key":"ref9","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"arXiv: 1312.5602"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1038\/nature14236"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ADPRL.2013.6614994"},{"key":"ref12","first-page":"240","article-title":"Averaged-DQN: variance reduction and stabilization for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Anschel"},{"key":"ref13","first-page":"1628","article-title":"Estimating the maximum expected value through Gaussian approximation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Carlo"},{"key":"ref14","first-page":"2613","article-title":"Double Q-learning","volume-title":"Proc. Int. Conf. Neural Inf. Process.","author":"Hasselt"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref16","article-title":"Maxmin Q-learning: Controlling the estimation bias of Q-learning","author":"Lan","year":"2020","journal-title":"arXiv: 2002.06487"},{"key":"ref17","first-page":"10246","article-title":"On the estimation bias in double Q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ren"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.24963\/ijcai.2017\/483"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1609\/aaai.v35i12.17334"},{"key":"ref20","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Rep.","author":"Lillicrap"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/ICTAI50040.2020.00068"},{"key":"ref22","article-title":"Parameter-free deterministic reduction of the estimation bias in continuous control","author":"Saglam","year":"2021","journal-title":"arXiv:2109.11788"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/ICTAI52525.2021.00123"},{"key":"ref24","first-page":"1587","article-title":"Addressing function approximation error in actor\u2013critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref25","article-title":"Learning pessimism for robust and efficient off-policy reinforcement learning","author":"Cetin","year":"2021","journal-title":"arXiv:2110.03375"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1609\/aaai.v36i7.20732"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1109\/TNNLS.2019.2959129"},{"key":"ref28","first-page":"5556","article-title":"Controlling overestimation bias with truncated mixture of continuous distributional quantile critics","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kuznetsov"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1109\/IJCNN.2019.8852397"},{"key":"ref30","article-title":"A novel greedy-step bellman optimality equation for efficient value propagation","author":"Wang","year":"2021","journal-title":"arXiv:2102.11717"},{"key":"ref31","first-page":"1","article-title":"Issues in using function approximation for reinforcement learning","volume-title":"Proc. Connectionist Models Summer School Hillsdale","author":"Thrun"},{"key":"ref32","first-page":"1861","article-title":"Soft actor\u2013critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"}],"container-title":["IEEE Transactions on Cognitive and Developmental Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7274989\/10360134\/10036940.pdf?arnumber=10036940","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:23:54Z","timestamp":1705019034000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10036940\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":32,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcds.2023.3242274","relation":{},"ISSN":["2379-8920","2379-8939"],"issn-type":[{"type":"print","value":"2379-8920"},{"type":"electronic","value":"2379-8939"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}