{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,22]],"date-time":"2026-07-22T09:44:46Z","timestamp":1784713486628,"version":"3.55.0"},"reference-count":54,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T00:00:00Z","timestamp":1709251200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T00:00:00Z","timestamp":1709251200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,3,1]],"date-time":"2024-03-01T00:00:00Z","timestamp":1709251200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1109\/tnnls.2022.3174051","type":"journal-article","created":{"date-parts":[[2022,5,19]],"date-time":"2022-05-19T20:22:44Z","timestamp":1652991764000},"page":"3121-3129","source":"Crossref","is-referenced-by-count":49,"title":["Improved Soft Actor-Critic: Mixing Prioritized Off-Policy Samples With On-Policy Experiences"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1039-3744","authenticated-orcid":false,"given":"Chayan","family":"Banerjee","sequence":"first","affiliation":[{"name":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2033-4249","authenticated-orcid":false,"given":"Zhiyong","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Engineering, The University of Newcastle, Callaghan, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8566-0870","authenticated-orcid":false,"given":"Nasimul","family":"Noman","sequence":"additional","affiliation":[{"name":"School of Information and Physical Sciences, The University of Newcastle, Callaghan, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"issue":"1","key":"ref1","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref2","first-page":"735","article-title":"Continuous-discrete reinforcement learning for hybrid control in robotics","volume-title":"Proc. Conf. Robot Learn.","author":"Neunert"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073602"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593722"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2808266"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2020.2978037"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2020.109081"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.adhoc.2020.102082"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2019.108759"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref12","first-page":"449","article-title":"A distributional perspective on reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Bellemare"},{"key":"ref13","first-page":"1008","article-title":"Actor-critic algorithms","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Konda"},{"key":"ref14","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih"},{"key":"ref15","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. NIPS","volume":"99","author":"Sutton"},{"key":"ref16","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref17","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref18","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Silver"},{"key":"ref19","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref20","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref21","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref22","article-title":"Equivalence between policy gradients and soft Q-learning","author":"Schulman","year":"2017","journal-title":"arXiv:1704.06440"},{"key":"ref23","article-title":"Sample efficient actor-critic with experience replay","author":"Wang","year":"2016","journal-title":"arXiv:1611.01224"},{"key":"ref24","article-title":"Hindsight experience replay","author":"Andrychowicz","year":"2017","journal-title":"arXiv:1707.01495"},{"key":"ref25","first-page":"12623","article-title":"Curriculum-guided hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Fang"},{"key":"ref26","first-page":"1","article-title":"DHER: Hindsight experience replay for dynamic goals","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Fang"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/DEVLRN.2019.8850705"},{"key":"ref28","first-page":"4851","article-title":"Remember and forget for experience replay","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Novati"},{"key":"ref29","first-page":"8545","article-title":"Off-policy actor-critic with shared experience replay","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schmitt"},{"key":"ref30","first-page":"1407","article-title":"IMPALA: Scalable distributed deep-RL with importance weighted actor-learner architectures","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Espeholt"},{"key":"ref31","first-page":"2525","article-title":"Not all samples are created equal: Deep learning with importance sampling","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Katharopoulos"},{"key":"ref32","article-title":"Prioritized experience replay","author":"Schaul","year":"2015","journal-title":"arXiv:1511.05952"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref34","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01054-6_1"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref37","article-title":"Distributed prioritized experience replay","author":"Horgan","year":"2018","journal-title":"arXiv:1803.00933"},{"key":"ref38","first-page":"1133","article-title":"Reconciling $\\lambda$\n-returns with experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Daley"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/666"},{"key":"ref40","article-title":"Prioritized sequence experience replay","author":"Brittain","year":"2019","journal-title":"arXiv:1905.12726"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/SMC.2017.8122622"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/337"},{"key":"ref43","first-page":"2112","article-title":"Sample-efficient deep reinforcement learning via episodic backward update","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Lee"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2939174"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2019.xv.011"},{"key":"ref47","article-title":"Improving exploration in soft-actor-critic with normalizing flows policies","author":"Ward","year":"2019","journal-title":"arXiv:1906.02771"},{"key":"ref48","article-title":"Band-limited soft actor critic model","author":"Campo","year":"2020","journal-title":"arXiv:2006.11431"},{"key":"ref49","article-title":"Boosting soft actor-critic: Emphasizing recent experience without forgetting the past","author":"Wang","year":"2019","journal-title":"arXiv:1906.04009"},{"key":"ref50","article-title":"Experience replay with likelihood-free importance weights","author":"Sinha","year":"2020","journal-title":"arXiv:2006.13169"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref52","article-title":"OpenAI gym","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref53","volume-title":"PyTorch Implementation of Soft-Actor-Critic-and-Extensions","author":"Dittert","year":"2020"},{"key":"ref54","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10454107\/09778268.pdf?arnumber=9778268","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T00:33:09Z","timestamp":1709339589000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9778268\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3]]},"references-count":54,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2022.3174051","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3]]}}}