{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:57:18Z","timestamp":1774540638575,"version":"3.50.1"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,8,17]]},"DOI":"10.1109\/cog52621.2021.9618983","type":"proceedings-article","created":{"date-parts":[[2021,12,7]],"date-time":"2021-12-07T15:53:06Z","timestamp":1638892386000},"page":"01-08","source":"Crossref","is-referenced-by-count":8,"title":["Policy Fusion for Adaptive and Customizable Reinforcement Learning Agents"],"prefix":"10.1109","author":[{"given":"Alessandro","family":"Sestini","sequence":"first","affiliation":[]},{"given":"Alexander","family":"Kuhnle","sequence":"additional","affiliation":[]},{"given":"Andrew D.","family":"Bagdanov","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Prox-imal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref38","author":"chevalier-boisvert","year":"2018","journal-title":"Gym-Miniworld Environment for Openai Gym"},{"key":"ref33","article-title":"Combining multiple correlated reward and shaping signals by measuring confidence","volume":"28","author":"brys","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref32","article-title":"Action guidance: Getting the best of sparse rewards and shaped rewards for real-time strategy games","author":"huang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9091363"},{"key":"ref30","article-title":"Mcp: Learning composable hierarchical control with multiplicative compositional policies","author":"peng","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref37","article-title":"Winning isn't everything: Enhancing game development with intelligent agents","author":"zhao","year":"0","journal-title":"IEEE Transactions on Games 2020"},{"key":"ref36","article-title":"Generative adversarial nets","author":"goodfellow","year":"0","journal-title":"Advances in Neural Information Processing Systems 2014"},{"key":"ref35","year":"2021","journal-title":"Designer-centered reinforcementlearning"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2014.6889732"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref11","article-title":"Discrete and continuous action representation for practical RL in video games","author":"delalleau","year":"0","journal-title":"Proceedings of AAAI-20 Workshop on Reinforcement Learning in Games"},{"key":"ref12","article-title":"Deep reinforcement learning for navigation in AAA video games","author":"alonso","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2019.8848080"},{"key":"ref14","article-title":"Deepcrawl: Deep reinforcement learning for turn based strategy games","author":"sestini","year":"0","journal-title":"Proceedings of AIIDE Workshop on Experimental AI in Games 2019"},{"key":"ref15","article-title":"Deep policy networks for NPC behaviors that adapt to changing design parameters in roguelike games","author":"sestini","year":"0","journal-title":"Proceedings of AAAI Workshop on Reinforcement Learning in Games"},{"key":"ref16","article-title":"Leveraging procedural generation to benchmark reinforcement learning","author":"cobbe","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref17","article-title":"Procedural content generation: from automatically generating game levels to increasing generality in machine learning","author":"risi","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref18","article-title":"Illuminating generalization in deep reinforcement learning through procedural level generation","author":"justesen","year":"0","journal-title":"Proceedings of NIPS Workshop on Deep Reinforcement Learning"},{"key":"ref19","article-title":"Learning robust rewards with adverserial inverse reinforcement learning","author":"fu","year":"0","journal-title":"2018 International Conference on Learning Representations"},{"key":"ref4","article-title":"Concrete problems in AI safety","author":"amodei","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459670"},{"key":"ref3","article-title":"It's unwieldy and it takes a lot of time. Challenges and opportunities for creating agents in commercial games","author":"jacob","year":"0","journal-title":"16th AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment 2020"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2010.66"},{"key":"ref6","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"0","journal-title":"International Conference on Machine Learning 2000"},{"key":"ref5","first-page":"103","article-title":"A framework for behavioural cloning","author":"bain","year":"1995","journal-title":"Machine Intelligence 15"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TTE.2020.2991079"},{"key":"ref8","author":"berner","year":"2019","journal-title":"Dota 2 with large scale deep reinforcement learning"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in StarCraft II using multi-agent reinforcement learning","volume":"575","author":"vinyals","year":"2019","journal-title":"Nature"},{"key":"ref2","year":"2021","journal-title":"MotoGP21"},{"key":"ref1","year":"2021","journal-title":"Source of Madness"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03157-9"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CoG52621.2021.9619084"},{"key":"ref22","article-title":"Deep reinforcement learning from human preferences","author":"christiano","year":"0","journal-title":"Advances in Neural Information Processing Systems 2017"},{"key":"ref21","first-page":"783","article-title":"Extrapolating beyond suboptimal demonstrations via inverse reinforcement learning from observations","author":"brown","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref24","article-title":"Inverse reinforcement learning for video games","author":"tucker","year":"0","journal-title":"Proceedings of NIPS Workshop on Deep Reinforcement Learning 2018"},{"key":"ref23","article-title":"Reward learning from human preferences and demonstrations in Atari","author":"ibarz","year":"0","journal-title":"Advances in Neural Information Processing Systems 2018"},{"key":"ref26","first-page":"56","article-title":"Ensemble methods for reinforcement learning with function approximation","author":"fau\u00dfer","year":"0","journal-title":"International Workshop on Multiple Classifier Systems"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.920231"}],"event":{"name":"2021 IEEE Conference on Games (CoG)","location":"Copenhagen, Denmark","start":{"date-parts":[[2021,8,17]]},"end":{"date-parts":[[2021,8,20]]}},"container-title":["2021 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9618888\/9618891\/09618983.pdf?arnumber=9618983","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T12:53:35Z","timestamp":1652187215000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9618983\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,17]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/cog52621.2021.9618983","relation":{},"subject":[],"published":{"date-parts":[[2021,8,17]]}}}