{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T14:29:19Z","timestamp":1773930559128,"version":"3.50.1"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,11,24]],"date-time":"2022-11-24T00:00:00Z","timestamp":1669248000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,11,24]],"date-time":"2022-11-24T00:00:00Z","timestamp":1669248000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,11,24]]},"DOI":"10.1109\/anzcc56036.2022.9966956","type":"proceedings-article","created":{"date-parts":[[2022,12,5]],"date-time":"2022-12-05T18:38:04Z","timestamp":1670265484000},"page":"69-74","source":"Crossref","is-referenced-by-count":2,"title":["Physics Informed Intrinsic Rewards in Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Jiazhou","family":"Jiang","sequence":"first","affiliation":[{"name":"University of Newcastle,School of Engineering,Callaghan,NSW,Australia,2308"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minyue","family":"Fu","sequence":"additional","affiliation":[{"name":"University of Newcastle,School of Engineering,Callaghan,NSW,Australia,2308"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Newcastle,School of Engineering,Callaghan,NSW,Australia,2308"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"0"},{"key":"ref11","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0"},{"key":"ref12","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref13","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"2016"},{"key":"ref14","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0"},{"key":"ref15","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref17","article-title":"Large-scale study of curiosity-driven learning","author":"burda","year":"2018"},{"key":"ref18","article-title":"Vime: Variational information maximizing exploration","volume":"29","author":"houthooft","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref19","article-title":"Exploration by random network distillation","author":"burda","year":"2018"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202244"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2021.101360"},{"key":"ref6","article-title":"Actor-critic algorithms","volume":"12","author":"konda","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref8","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref7","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref2","first-page":"1","article-title":"Benchmarking safe exploration in deep reinforcement learning","volume":"7","author":"ray","year":"2019"},{"key":"ref9","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015"},{"key":"ref1","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013"},{"key":"ref20","article-title":"Unifying count-based exploration and intrinsic motivation","volume":"29","author":"bellemare","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.36.823"},{"key":"ref21","article-title":"Openai gym","author":"brockman","year":"2016"},{"key":"ref24","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015"},{"key":"ref23","article-title":"Spinning Up in Deep Reinforcement Learning","author":"achiam","year":"2018"},{"key":"ref25","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018"}],"event":{"name":"2022 Australian & New Zealand Control Conference (ANZCC)","location":"Gold Coast, Australia","start":{"date-parts":[[2022,11,24]]},"end":{"date-parts":[[2022,11,25]]}},"container-title":["2022 Australian &amp; New Zealand Control Conference (ANZCC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9966854\/9966858\/09966956.pdf?arnumber=9966956","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T13:52:09Z","timestamp":1698328329000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9966956\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,24]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/anzcc56036.2022.9966956","relation":{},"subject":[],"published":{"date-parts":[[2022,11,24]]}}}