{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T13:40:21Z","timestamp":1730209221573,"version":"3.28.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,8,1]],"date-time":"2020-08-01T00:00:00Z","timestamp":1596240000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1109\/cog47356.2020.9231802","type":"proceedings-article","created":{"date-parts":[[2020,10,21]],"date-time":"2020-10-21T18:05:50Z","timestamp":1603303550000},"page":"447-454","source":"Crossref","is-referenced-by-count":1,"title":["Obstacle Tower Without Human Demonstrations: How Far a Deep Feed-Forward Network Goes with Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Marco","family":"Pleines","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jenia","family":"Jitsev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mike","family":"Preuss","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Frank","family":"Zimmer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","article-title":"Seed rl: Scalable and efficient deep-rl with accelerated central inference","author":"espeholt","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref32","first-page":"1406","article-title":"IMPALA: scalable distributed deep-rl with importance weighted actor-learner architectures","volume":"80","author":"espeholt","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning ser Proceedings of Machine Learning Research"},{"key":"ref31","first-page":"1792","article-title":"Visualizing and understanding atari agents","author":"greydanus","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-28954-6_10"},{"article-title":"Competing in the obstacle tower challenge","year":"2019","author":"nichol","key":"ref10"},{"key":"ref11","article-title":"PPO dash: Improving generalization in deep reinforcement learning","author":"booth","year":"2019","journal-title":"arXiv 1907 06704"},{"journal-title":"Pattern Recognition and Machine Learning (Information Science and Statistics)","year":"2006","author":"bishop","key":"ref12"},{"key":"ref13","article-title":"A study on overfitting in deep reinforcement learning","author":"zhang","year":"2018","journal-title":"arXiv 1907 06704"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2019.2901021"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-42716-4"},{"key":"ref16","article-title":"Illuminating generalization in deep reinforcement learning through procedural level generation","author":"justesen","year":"2018","journal-title":"arXiv 1806 10729"},{"key":"ref17","first-page":"1282","article-title":"Quantifying generalization in reinforcement learning","volume":"97","author":"cobbe","year":"2019","journal-title":"Proceedings of the 36th International Conference on Machine Learning ICML 2019 9-15 June 2019 Long Beach California USA ser Proceedings of Machine Learning Research"},{"key":"ref18","article-title":"Leveraging procedural generation to benchmark reinforcement learning","author":"cobbe","year":"0","journal-title":"arXiv 1912 01588"},{"key":"ref19","first-page":"12469","article-title":"Generalization of reinforcement learners with working and episodic memory","volume":"32","author":"fortunato","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref4","article-title":"Dota 2 with large scale deep reinforcement learning","author":"berner","year":"2019","journal-title":"arXiv 1912 06680"},{"key":"ref27","first-page":"4131","article-title":"Action branching architectures for deep reinforcement learning","author":"tavakoli","year":"2018","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1126\/science.aau6249"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in starcraft ii using multi-agent reinforcement learning","author":"vinyals","year":"2019","journal-title":"Nature"},{"key":"ref29","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems 30 Annual Conference on Neural Information Processing Systems 2017 4-9 December 2017"},{"key":"ref5","article-title":"Emergent tool use from multi-agent autocurricula","author":"baker","year":"2019","journal-title":"arXiv 1909 07528"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/373"},{"key":"ref7","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2018.2877047"},{"article-title":"Announcing the obstacle tower challenge winners and open source release","year":"2019","author":"juliani","key":"ref9"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref20","first-page":"13978","article-title":"Generalization in reinforcement learning with selective noise injection and information bottleneck","volume":"32","author":"igl","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref22","first-page":"12627","article-title":"Sim-to-real via sim-to-sim: Data-efficient robotic grasping via randomized-to-canonical adaptation networks","author":"james","year":"2019","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref21","article-title":"Network randomization: A simple technique for generalization in deep reinforcement learning","author":"lee","year":"2019","journal-title":"arXiv 1910 05396"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"article-title":"Minimalistic gridworld environment for openai gym","year":"2018","author":"chevalier-boisvert","key":"ref23"},{"key":"ref26","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref25"}],"event":{"name":"2020 IEEE Conference on Games (CoG)","start":{"date-parts":[[2020,8,24]]},"location":"Osaka, Japan","end":{"date-parts":[[2020,8,27]]}},"container-title":["2020 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9222389\/9231525\/09231802.pdf?arnumber=9231802","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:45:26Z","timestamp":1656344726000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9231802\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/cog47356.2020.9231802","relation":{},"subject":[],"published":{"date-parts":[[2020,8]]}}}