{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:29:04Z","timestamp":1730266144377,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9206982","type":"proceedings-article","created":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T00:40:33Z","timestamp":1601426433000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Novelty-Guided Reinforcement Learning via Encoded Behaviors"],"prefix":"10.1109","author":[{"given":"Rajkumar","family":"Ramamurthy","sequence":"first","affiliation":[{"name":"Fraunhofer IAIS,Sankt Augustin,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rafet","family":"Sifa","sequence":"additional","affiliation":[{"name":"Fraunhofer IAIS,Sankt Augustin,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Max","family":"Lubbering","sequence":"additional","affiliation":[{"name":"Fraunhofer IAIS,Sankt Augustin,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Bauckhage","sequence":"additional","affiliation":[{"name":"Fraunhofer Center for ML,Sankt Augustin,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","article-title":"Learning from demonstration for shaping through inverse reinforcement learning","author":"bener suay","year":"2016","journal-title":"Proc Int Conf Autonomous Agents and Multiagent Systems"},{"article-title":"Incentivizing Exploration In Reinforcement Learning With Deep Predictive Models","year":"2015","author":"stadie","key":"ref32"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.1038\/nature24270","article-title":"Mastering the Game of Go without Human Knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref30","article-title":"Deterministic Policy Gradient Algorithms","author":"silver","year":"2014","journal-title":"Proc 7th Int Conf Machine Learning"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref34","article-title":"Exploration: A study of Count-Based Exploration for Deep Reinforcement Learning","author":"tang","year":"2017","journal-title":"Proc Advances in NIPS 7"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2001576.2001606"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-015-9283-7"},{"article-title":"K-sparse Autoencoders","year":"2013","author":"makhzani","key":"ref12"},{"article-title":"Learning to Navigate in Complex Environments","year":"2016","author":"mirowski","key":"ref13"},{"key":"ref14","doi-asserted-by":"crossref","DOI":"10.1038\/nature14236","article-title":"Human-level Control through Deep Reinforcement Learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref15","article-title":"Policy Invariance under Reward Transformations: Theory and Application to Reward Shaping","author":"ng","year":"1999","journal-title":"Proc Int Conf Machine Learning"},{"key":"ref16","article-title":"Algorithms for Inverse Reinforcement Learning","author":"ng","year":"2000","journal-title":"Proc 7th Int Conf Machine Learning"},{"article-title":"Count-Based Exploration with Neural Density Models","year":"2017","author":"ostrovski","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"article-title":"Self- Supervised Exploration via Disagreement","year":"2019","author":"pathak","key":"ref19"},{"key":"ref28","article-title":"Trust Region Policy Optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Machine Learning"},{"article-title":"Improving Exploration in Evolution Strategies for Deep Reinforcement Learning via a Population of Novelty- Seeking Agents","year":"2017","author":"conti","key":"ref4"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2010.2056368"},{"article-title":"Unifying Count-Based Exploration and Intrinsic Motivation","year":"2016","author":"bellemare","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"article-title":"Directly Estimating the Variance of the ?-return using Temporal-Difference Methods","year":"2018","author":"sherstan","key":"ref29"},{"article-title":"Diversity is All You Need: Learning Skills without a Reward function","year":"2018","author":"eysenbach","key":"ref5"},{"key":"ref8","article-title":"Vime: Variational information maximizing explo- ration","author":"houthooft","year":"2016","journal-title":"Advances in NIPS"},{"article-title":"Emergence of Locomotion Behaviours in Rich Environments","year":"2017","author":"heess","key":"ref7"},{"article-title":"Multi-step Reinforcement Learning: A Unifying Algorithm","year":"2017","author":"de asis","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1162\/EVCO_a_00025"},{"article-title":"Concrete problems in AI Safety","year":"2016","author":"amodei","key":"ref1"},{"key":"ref20","article-title":"A Need for Speed: Adapting Agent Action Speed to Improve Task Learning from Non-Expert Humans","author":"peng","year":"2016","journal-title":"Proc Int Conf Autonomous Agents and Multiagent Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01424-7_1"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-30484-3_48"},{"key":"ref24","article-title":"Evolutionsstrategie: Optimierung technischer Systeme nach Prinzipien der biologischen Evolution","author":"rechenberg","year":"1971","journal-title":"PhD thesis MS thesis"},{"key":"ref23","article-title":"Learning to Drive a Bicycle Using Reinforcement Learning and Shaping","author":"randl\u00f8v","year":"1998","journal-title":"Proc 7th Int Conf Machine Learning"},{"article-title":"Evolution Strategies as a Scalable Alternative to Reinforcement Learning","year":"2017","author":"salimans","key":"ref26"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-81283-5_8"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, UK","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09206982.pdf?arnumber=9206982","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T21:53:04Z","timestamp":1656453184000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9206982\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9206982","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}