{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T13:56:49Z","timestamp":1772114209388,"version":"3.50.1"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,8,17]],"date-time":"2021-08-17T00:00:00Z","timestamp":1629158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,8,17]]},"DOI":"10.1109\/cog52621.2021.9619084","type":"proceedings-article","created":{"date-parts":[[2021,12,7]],"date-time":"2021-12-07T20:53:06Z","timestamp":1638910386000},"page":"1-8","source":"Crossref","is-referenced-by-count":5,"title":["Demonstration-Efficient Inverse Reinforcement Learning in Procedurally Generated Environments"],"prefix":"10.1109","author":[{"given":"Alessandro","family":"Sestini","sequence":"first","affiliation":[]},{"given":"Alexander","family":"Kuhnle","sequence":"additional","affiliation":[]},{"given":"Andrew D.","family":"Bagdanov","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"Prox-imal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref32","article-title":"High-fidelity image generation with fewer labels","author":"lu?i?","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref31","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref30","article-title":"Deepcrawl: Deep reinforcement learning for turn based strategy games","author":"sestini","year":"0","journal-title":"Proceedings of AIIDE Workshop on Experimental AI in Games"},{"key":"ref36","article-title":"Unsupervised representation learning with deep convolutional generative adversarial networks","author":"radford","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref35","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref34","article-title":"Adversarial recovery of agent rewards from latent spaces of the limit order book","author":"roa-vicens","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref10","article-title":"Leveraging procedural generation to benchmark reinforcement learning","author":"cobbe","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref13","article-title":"Maximum entropy inverse reinforcement learning","author":"ziebart","year":"0","journal-title":"National Conference on Artificial Intelligence"},{"key":"ref14","article-title":"A connection between generative adversarial networks, inverse reinforcement learning, and energy-based models","author":"finn","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref15","first-page":"103","article-title":"A framework for behavioural cloning","volume":"15","author":"bain","year":"1995","journal-title":"Machine Intelligence"},{"key":"ref16","article-title":"A game-theoretic approach to apprenticeship learning","author":"syed","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"0","journal-title":"International Conference on Artificial Intelligence and Statistics"},{"key":"ref18","article-title":"SQIL: Imitation learning via reinforcement learning with sparse rewards","author":"reddy","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref19","article-title":"Expert-level Atari imitation learning from demonstrations only","author":"cai","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref28","article-title":"Minimalistic gridworld environment for openAIgym","author":"chevalier-boisvert","year":"2019","journal-title":"Github Repository"},{"key":"ref4","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/339"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in StarCraft II using multi-agent reinforcement learning","volume":"575","author":"vinyals","year":"2019","journal-title":"Nature"},{"key":"ref6","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/373"},{"key":"ref5","article-title":"Concrete problems in AI safety","author":"amodei","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref8","article-title":"Guided cost learning: Deep inverse optimal control via policy optimization","author":"finn","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref7","article-title":"Learning robust rewards with adverserial inverse reinforcement learning","author":"fu","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref2","article-title":"Dota 2 with large scale deep reinforcement learning","author":"open","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref9","article-title":"Inverse reinforcement learning for video games","author":"tucker","year":"0","journal-title":"Proceedings of NIPS Workshop on Deep Reinforcement Learning"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"key":"ref22","article-title":"Reward learning from human preferences and demonstrations in Atari","author":"ibarz","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref21","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref24","article-title":"Procedural content generation: from automatically generating game levels to increasing generality in machine learning","author":"risi","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref23","article-title":"Deep reinforcement learning from human preferences","author":"christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref26","article-title":"The nethack learning environment","author":"k\u00fcttler","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref25","article-title":"Illuminating generalization in deep reinforcement learning through procedural level generation","author":"justesen","year":"0","journal-title":"Proceedings of NIPS Workshop on Deep Reinforcement Learning"}],"event":{"name":"2021 IEEE Conference on Games (CoG)","location":"Copenhagen, Denmark","start":{"date-parts":[[2021,8,17]]},"end":{"date-parts":[[2021,8,20]]}},"container-title":["2021 IEEE Conference on Games (CoG)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9618888\/9618891\/09619084.pdf?arnumber=9619084","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:53:32Z","timestamp":1652201612000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9619084\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,17]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/cog52621.2021.9619084","relation":{},"subject":[],"published":{"date-parts":[[2021,8,17]]}}}