{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T10:25:15Z","timestamp":1763202315869,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,8,1]],"date-time":"2019-08-01T00:00:00Z","timestamp":1564617600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1109\/cig.2019.8848046","type":"proceedings-article","created":{"date-parts":[[2019,9,27]],"date-time":"2019-09-27T01:49:14Z","timestamp":1569548954000},"page":"1-8","source":"Crossref","is-referenced-by-count":11,"title":["Combining Experience Replay with Exploration by Random Network Distillation"],"prefix":"10.1109","author":[{"given":"Francesco","family":"Sovrano","sequence":"first","affiliation":[]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2019.8848046"},{"key":"ref30","article-title":"Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures","author":"espeholt","year":"2018","journal-title":"arXiv preprint arXiv 1802 01561"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2019.2899159"},{"key":"ref11","article-title":"Crawling in rogue&#x2019;s dungeons with (partitioned) a3c","author":"asperti","year":"2018","journal-title":"arXiv preprint arXiv 1804 08685"},{"key":"ref12","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv preprint arXiv 1312 5602"},{"year":"0","key":"ref13","article-title":"Tensorflow&#x2019;s l2 loss"},{"key":"ref14","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref15","article-title":"Accelerated methods for deep reinforcement learning","author":"stooke","year":"2018","journal-title":"arXiv preprint arXiv 1803 02811"},{"key":"ref16","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"journal-title":"Memory approaches to reinforcement learning in non-Markovian domains","year":"1992","author":"lin","key":"ref17"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"article-title":"Openai baselines","year":"2017","author":"dhariwal","key":"ref28"},{"key":"ref4","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"arXiv preprint arXiv 1511 05952"},{"article-title":"Exploration by random network distillation","year":"2018","author":"burda","key":"ref27"},{"key":"ref3","article-title":"Reinforcement learning with unsupervised auxiliary tasks","author":"jaderberg","year":"2016","journal-title":"arXiv preprint arXiv 1611 05397"},{"key":"ref6","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"2016","journal-title":"arXiv preprint arXiv 1611 01224"},{"key":"ref29","first-page":"693","article-title":"Hogwild: A lock-free approach to parallelizing stochastic gradient descent","author":"recht","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"ref5","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref8","article-title":"Never forget: Balancing exploration and exploitation via learning optical flow","author":"yang","year":"2019","journal-title":"arXiv preprint arXiv 1901 08486"},{"key":"ref7","article-title":"Self-imitation learning","author":"oh","year":"2018","journal-title":"arXiv preprint arXiv 1806 05635"},{"key":"ref2","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref9","article-title":"Deep reinforcement learning and sub-problem decomposition using hierarchical architectures in partially observable environments","author":"sovrano","year":"2018","journal-title":"Master&#x2019;s thesis"},{"key":"ref1","article-title":"Exploration by random network distillation","author":"burda","year":"2018","journal-title":"arXiv preprint arXiv 1810 12894"},{"key":"ref20","first-page":"2721","article-title":"Count-based exploration with neural density models","author":"ostrovski","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70"},{"key":"ref22","first-page":"1281","article-title":"Intrinsically motivated reinforcement learning","author":"chentanez","year":"2005","journal-title":"Advances in neural information processing systems"},{"key":"ref21","first-page":"2753","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","author":"tang","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s12064-011-0142-z"},{"key":"ref23","first-page":"222","article-title":"A possibility for implementing curiosity and boredom in model-building neural controllers","author":"schmidhuber","year":"1991","journal-title":"Proc of the International Conference on Simulation of Adaptive Behavior From Animals to Animats"},{"key":"ref26","article-title":"Distributed prioritized experience replay","author":"horgan","year":"2018","journal-title":"arXiv preprint arXiv 1803 00933"},{"key":"ref25","first-page":"833","article-title":"An intrinsic reward mechanism for efficient exploration","author":"?im?ek","year":"2006","journal-title":"Proceedings of the 23rd International Conference on Machine Learning"}],"event":{"name":"2019 IEEE Conference on Games (CoG)","start":{"date-parts":[[2019,8,20]]},"location":"London, United Kingdom","end":{"date-parts":[[2019,8,23]]}},"container-title":["2019 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8844551\/8847948\/08848046.pdf?arnumber=8848046","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T14:50:49Z","timestamp":1658155849000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8848046\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,8]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/cig.2019.8848046","relation":{},"subject":[],"published":{"date-parts":[[2019,8]]}}}