{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:29:27Z","timestamp":1773930567151,"version":"3.50.1"},"reference-count":45,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Emerg. Top. Comput. Intell."],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1109\/tetci.2022.3140375","type":"journal-article","created":{"date-parts":[[2022,1,20]],"date-time":"2022-01-20T20:25:37Z","timestamp":1642710337000},"page":"1324-1334","source":"Crossref","is-referenced-by-count":11,"title":["Optimal Actor-Critic Policy With Optimized Training Datasets"],"prefix":"10.1109","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1039-3744","authenticated-orcid":false,"given":"Chayan","family":"Banerjee","sequence":"first","affiliation":[{"name":"School of Engineering, University of Newcastle, Callaghan, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2033-4249","authenticated-orcid":false,"given":"Zhiyong","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Engineering, University of Newcastle, Callaghan, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8566-0870","authenticated-orcid":false,"given":"Nasimul","family":"Noman","sequence":"additional","affiliation":[{"name":"School of Information and Physical Sciences, University of Newcastle, Callaghan, NSW, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2972-7417","authenticated-orcid":false,"given":"Mohsen","family":"Zamani","sequence":"additional","affiliation":[{"name":"School of Engineering, University of Newcastle, Callaghan, NSW, Australia"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IRC.2019.00121"},{"key":"ref38","first-page":"3341","article-title":"Collaborative evolutionary reinforcement learning","author":"khadka","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593986"},{"key":"ref32","first-page":"651","article-title":"Scalable deep reinforcement learning for vision-based robotic manipulation","author":"kalashnikov","year":"0","journal-title":"Proc Conf Robot Learn"},{"key":"ref31","article-title":"Way off-policy batch deep reinforcement learning of implicit human preferences in dialog","author":"jaques","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3057023"},{"key":"ref37","first-page":"1188","article-title":"Evolution-guided policy gradient in reinforcement learning","author":"khadka","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref36","article-title":"Deep neuroevolution: Genetic algorithms are a competitive alternative for training deep neural networks for reinforcement learning","author":"such","year":"2017"},{"key":"ref35","article-title":"Policy optimization by genetic distillation","author":"gangwani","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref34","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"levine","year":"2020"},{"key":"ref10","article-title":"Q-Prop: Sample-efficient policy gradient with an off-policy critic","author":"gu","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2019.2916183"},{"key":"ref11","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref12","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref13","first-page":"5279","article-title":"Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation","volume":"30","author":"wu","year":"0","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref14","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref15","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref17","article-title":"Playing Atari with deep reinforcement learning","author":"mnih","year":"2013"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref19","first-page":"179","article-title":"Off-policy actor-critic","author":"degris","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_2"},{"key":"ref4","first-page":"1","article-title":"Data efficient reinforcement learning for legged robots","author":"yang","year":"0","journal-title":"Proc Conf Robot Learn"},{"key":"ref27","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref3","article-title":"Learning to walk via deep reinforcement learning","author":"haarnoja","year":"2018"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref29","article-title":"D4RL: Datasets for deep data-driven reinforcement learning","author":"fu","year":"2020"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref8","first-page":"1008","article-title":"Actor-critic algorithms","volume":"12","author":"konda","year":"0","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwz190"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197020"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2950779"},{"key":"ref1","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2015.2421338"},{"key":"ref45","article-title":"Stable baselines","author":"hill","year":"2018"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref42","article-title":"GADAM: Genetic-evolutionary ADAM for deep neural network optimization","author":"zhang","year":"2018"},{"key":"ref24","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume":"80","author":"fujimoto","year":"0","journal-title":"Mach Learn Res"},{"key":"ref41","doi-asserted-by":"crossref","DOI":"10.35784\/acs-2019-18","article-title":"Using GA for evolving weights in neural networks","volume":"15","author":"hameed","year":"2019","journal-title":"Appl Comput Sci"},{"key":"ref23","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1119\/1.10903"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2821369"},{"key":"ref43","article-title":"Mixture density networks","author":"bishop","year":"1994"},{"key":"ref25","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"2016"}],"container-title":["IEEE Transactions on Emerging Topics in Computational Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7433297\/9965775\/09687093.pdf?arnumber=9687093","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,23]],"date-time":"2023-01-23T22:44:04Z","timestamp":1674513844000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9687093\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12]]},"references-count":45,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tetci.2022.3140375","relation":{},"ISSN":["2471-285X"],"issn-type":[{"value":"2471-285X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12]]}}}