{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T14:45:47Z","timestamp":1773153947100,"version":"3.50.1"},"reference-count":88,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100008254","name":"Startup Project of Doctor Scientific Research from Jiangxi University of Science and Technology","doi-asserted-by":"publisher","award":["2022205200100595"],"award-info":[{"award-number":["2022205200100595"]}],"id":[{"id":"10.13039\/501100008254","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Automat. Sci. Eng."],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1109\/tase.2023.3234961","type":"journal-article","created":{"date-parts":[[2023,1,11]],"date-time":"2023-01-11T22:14:44Z","timestamp":1673475284000},"page":"976-988","source":"Crossref","is-referenced-by-count":3,"title":["Continuous Control With Swarm Intelligence Based Value Function Approximation"],"prefix":"10.1109","volume":"21","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4365-0148","authenticated-orcid":false,"given":"Bi","family":"Wang","sequence":"first","affiliation":[{"name":"Faculty of Information Engineering, Jiangxi University of Science and Technology, Ganzhou, China"}]},{"given":"Xuelian","family":"Li","sequence":"additional","affiliation":[{"name":"School of Foreign Studies, Nanjing University of Posts and Telecommunications, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5239-9816","authenticated-orcid":false,"given":"Yang","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7198-4199","authenticated-orcid":false,"given":"Jianqing","family":"Wu","sequence":"additional","affiliation":[{"name":"Faculty of Information Engineering, Jiangxi University of Science and Technology, Ganzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6183-4162","authenticated-orcid":false,"given":"Bowen","family":"Zeng","sequence":"additional","affiliation":[{"name":"Faculty of Information Engineering, Jiangxi University of Science and Technology, Ganzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6106-6121","authenticated-orcid":false,"given":"Junfu","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Nanjing University of Aeronautics and Astronautics, Nanjing, China"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Artificial Intelligence: A Modern Approach","author":"Russell","year":"2009"},{"key":"ref2","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"issue":"3","key":"ref3","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"key":"ref4","first-page":"2001","article-title":"Benchmarking deep reinforcement learning for continuous control","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","volume":"3","author":"Duan"},{"key":"ref5","article-title":"Efficient memory-based learning for robot control","author":"Moore","year":"1990"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2020.3047924"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2020.3024725"},{"key":"ref10","first-page":"8224","article-title":"Sample-efficient reinforcement learning with stochastic ensemble value expansion","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Buckman"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1137\/17M1144854"},{"key":"ref12","first-page":"3599","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref13","first-page":"1634","article-title":"Smooth exploration for robotic reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Raffin"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6059"},{"key":"ref15","first-page":"2776","article-title":"Bridging the gap between value and policy based reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Nachum"},{"key":"ref16","article-title":"Value-based reinforcement learning for continuous control robotic manipulation in multi-task sparse reward settings","volume":"abs\/2107.13356","author":"Rammohan","year":"2021","journal-title":"CoRR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143955"},{"key":"ref18","first-page":"1","article-title":"Continuous deep Q-learning with model-based acceleration","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Gu"},{"key":"ref19","first-page":"1","article-title":"Hamilton\u2013Jacobi deep Q-learning for deterministic continuous-time systems with Lipschitz continuous controls","volume":"22","author":"Kim","year":"2021","journal-title":"J. Mach. Learn. Res."},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2020.107143"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-8971-8_25"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-015-5522-z"},{"key":"ref23","first-page":"11404","article-title":"Batch value-function approximation with only realizability","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Xie"},{"key":"ref24","first-page":"1942","article-title":"Particle swarm optimization","volume-title":"Proc. IEEE Int. Conf. Neural Netw.","volume":"4","author":"Eberhart"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.4018\/IJSIR.2016070102"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2018.11.008"},{"key":"ref27","first-page":"7450","article-title":"Constrained cross-entropy method for safe reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wen"},{"key":"ref28","first-page":"3198","article-title":"Risk-sensitive reinforcement learning with function approximation: A debiasing approach","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Fei"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.1996.506586"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2007.368199"},{"key":"ref31","first-page":"605","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. 31st Int. Conf. Mach. Learn. (ICML)","volume":"1","author":"Silver"},{"key":"ref32","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume":"abs\/1509","author":"Lillicrap","year":"2015","journal-title":"CoRR"},{"key":"ref33","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-94042-7_8"},{"key":"ref35","first-page":"1","article-title":"Gradient monitored reinforcement learning","volume":"abs\/2005.12108","author":"Hameed","year":"2020","journal-title":"CoRR"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3103132"},{"key":"ref37","first-page":"1386","article-title":"Improving stochastic policy gradients in continuous control with deep reinforcement learning using the beta distribution","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","volume":"2","author":"Chou"},{"key":"ref38","article-title":"Reproducibility of benchmarked deep reinforcement learning tasks for continuous control","volume":"abs\/1708.04133","author":"Islam","year":"2017","journal-title":"CoRR"},{"key":"ref39","first-page":"21","article-title":"Truly proximal policy optimization","volume-title":"Proc. 31th Conf. Uncertainty Artif. Intell.","author":"Wang"},{"key":"ref40","article-title":"Continuous-action reinforcement learning for playing racing games: Comparing SPG to PPO","author":"Holubar","year":"2020","journal-title":"arXiv:2001.05270"},{"key":"ref41","first-page":"4476","article-title":"Learning and planning in complex action spaces","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Hubert"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-022-10034-z"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3102831"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2022.01.047"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553476"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2011.5967381"},{"key":"ref48","article-title":"Discrete sequential prediction of continuous actions for deep RL","author":"Metz","year":"2017","journal-title":"arXiv:1705.05035"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11798"},{"key":"ref50","article-title":"Input convex neural networks","author":"Amos","year":"2016","journal-title":"arXiv:1609.07152"},{"key":"ref51","first-page":"7224","article-title":"Value iteration in continuous actions, states and time","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Lutter"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2020.109421"},{"key":"ref53","first-page":"3078","article-title":"Model-based reinforcement learning for continuous control with posterior sampling","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Fan"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.04.015"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3017461"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3045087"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.amc.2021.126537"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.2008.4811430"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/IRC.2019.00121"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1162\/106365602320169811"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1080\/09528130903065497"},{"key":"ref62","article-title":"Neural architecture evolution in deep reinforcement learning for continuous control","author":"Franke","year":"2019","journal-title":"arXiv:1910.12824"},{"key":"ref63","first-page":"1754","article-title":"Genetic-gated networks for deep reinforcement","volume-title":"Proc. Adv. Neural Inf. Process. Syst., Annu. Conf. Neural Inf. Process. Syst.","author":"Chang"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44851-9_5"},{"key":"ref65","first-page":"949","article-title":"Natural evolution strategies","volume":"15","author":"Wierstra","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"ref66","article-title":"Evolution strategies as a scalable alternative to reinforcement learning","author":"Salimans","year":"2017","journal-title":"arXiv:1703.03864"},{"key":"ref67","first-page":"5816","article-title":"Collaborative evolutionary reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Khadka"},{"key":"ref68","first-page":"4171","article-title":"Logarithmic regret for reinforcement learning with linear function approximation","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"He"},{"key":"ref69","first-page":"2254","article-title":"Combining pessimism with optimism for robust and efficient model-based deep reinforcement learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Curi"},{"key":"ref70","first-page":"1188","article-title":"Evolution-guided policy gradient in reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Khadka"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.3390\/make1020035"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2015.08.152"},{"key":"ref73","first-page":"275","article-title":"Locally persistent exploration in continuous control tasks with sparse rewards","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Amin"},{"key":"ref74","first-page":"2753","article-title":"Exploration: A study of count-based exploration for deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Tang"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511813603"},{"key":"ref76","article-title":"Weak convergence of particle swarm optimization","author":"Bruned","year":"2018","journal-title":"arXiv:1811.04924"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1137\/S1052623497331063"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1137\/120880811"},{"key":"ref79","first-page":"4896","article-title":"On the local minima of the empirical risk","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jin"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1049\/joe.2018.9178"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1002\/nme.1646"},{"key":"ref82","first-page":"2976","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"5","author":"Haarnoja"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1016\/j.swevo.2018.03.011"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1145\/3449726.3459475"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref86","first-page":"1131","article-title":"Reconciling \u03bb-returns with experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Daley"},{"key":"ref87","first-page":"3","article-title":"Rectifier nonlinearities improve neural network acoustic models","volume-title":"Proc. 30th Int. Conf. Mach. Learn.","volume":"28","author":"Maas"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1117\/3.682726.p64"}],"container-title":["IEEE Transactions on Automation Science and Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8856\/10381526\/10015150.pdf?arnumber=10015150","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T17:25:57Z","timestamp":1729531557000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10015150\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1]]},"references-count":88,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tase.2023.3234961","relation":{},"ISSN":["1545-5955","1558-3783"],"issn-type":[{"value":"1545-5955","type":"print"},{"value":"1558-3783","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1]]}}}