{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:37:07Z","timestamp":1765546627027,"version":"3.28.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9207681","type":"proceedings-article","created":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T00:40:33Z","timestamp":1601426433000},"page":"1-8","source":"Crossref","is-referenced-by-count":5,"title":["Noisy Importance Sampling Actor-Critic: An Off-Policy Actor-Critic With Experience Replay"],"prefix":"10.1109","author":[{"given":"Norman","family":"Tasfi","sequence":"first","affiliation":[]},{"given":"Miriam","family":"Capretz","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"article-title":"Off-policy actor-critic","year":"2012","author":"degris","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2009.05.011"},{"article-title":"High-dimensional continuous control using generalized advantage estimation","year":"2015","author":"schulman","key":"ref12"},{"key":"ref13","volume":"33","author":"gumbel","year":"1948","journal-title":"Statistical Theory of Extreme Values and Some Practical Applications A Series of Lectures"},{"key":"ref14","first-page":"3086","article-title":"A* sampling","author":"maddison","year":"2014","journal-title":"Advances in neural information processing systems"},{"article-title":"Categorical reparameterization with gumbel-softmax","year":"2016","author":"jang","key":"ref15"},{"article-title":"Off-policy policy search","year":"2000","author":"meuleau","key":"ref16"},{"key":"ref17","first-page":"1000","article-title":"On a connection between importance sampling and the likelihood ratio policy gradient","author":"jie","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref18","first-page":"1","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"International Conference on Machine Learning"},{"article-title":"Sample efficient actor-critic with experience replay","year":"2016","author":"wang","key":"ref19"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"year":"2018","key":"ref3","article-title":"Openai five"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1007\/BF00992698","article-title":"Q-learning","volume":"8","author":"watkins","year":"1992","journal-title":"Machine Learning"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref8","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref9","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref20","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref22","first-page":"441","article-title":"Bias in natural actor-critic algorithms","author":"thomas","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref21","article-title":"Safe and efficient off-policy reinforcement learning","author":"munos","year":"2016","journal-title":"CoRR"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref26","article-title":"Openai baselines","author":"dhariwal","year":"2017","journal-title":"GitHub repository GitHub"},{"article-title":"Neural networks for machine learning lecture 6a overview of mini-batch gradient descent","year":"2012","author":"hinton","key":"ref25"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, United Kingdom","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09207681.pdf?arnumber=9207681","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T21:56:48Z","timestamp":1656453408000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9207681\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9207681","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}