{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T05:46:19Z","timestamp":1749015979558,"version":"3.37.3"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100009148","name":"Queen Mary University of London","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100009148","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Games"],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1109\/tg.2021.3113644","type":"journal-article","created":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T20:39:22Z","timestamp":1632170362000},"page":"522-532","source":"Crossref","is-referenced-by-count":7,"title":["Student-Initiated Action Advising via Advice Novelty"],"prefix":"10.1109","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0400-0043","authenticated-orcid":false,"given":"Ercument","family":"Ilhan","sequence":"first","affiliation":[{"name":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jeremy","family":"Gow","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1958-0212","authenticated-orcid":false,"given":"Diego","family":"Perez","sequence":"additional","affiliation":[{"name":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Mastering Chess and Shogi by self-play with a general reinforcement learning algorithm","volume-title":"Comput. Res. Repository","author":"Silver","year":"2017"},{"article-title":"AlphaStar: Mastering the real-time strategy game Starcraft II","year":"2019","author":"Vinyals","key":"ref2"},{"key":"ref3","article-title":"Dota 2 with large scale deep reinforcement learning","volume-title":"Comput. Res. Repository","author":"Berner","year":"2019"},{"key":"ref4","first-page":"39:1","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1109\/MSP.2017.2743240","article-title":"A brief survey of deep reinforcement learning","volume-title":"Comput. Res. Repository","author":"Arulkumaran","year":"2017"},{"key":"ref6","article-title":"Benchmarking bonus-based exploration methods on the arcade learning environment","volume-title":"Comput. Res. Repository","author":"Taga","year":"2019"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09430-0"},{"key":"ref8","first-page":"1040","article-title":"Learning from demonstration","volume-title":"Proc. Adv. Neural Inf. Process. Syst. 9","author":"Schaal","year":"1996"},{"key":"ref9","first-page":"1053","article-title":"Teaching on a budget: Agents advising agents in reinforcement learning","volume-title":"Proc. Int. Conf. Auton. Agents Multi-Agent Syst.","author":"Torrey","year":"2013"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref11","article-title":"ZPD teaching strategies for deep reinforcement learning from demonstrations","volume-title":"Comput. Res. Repository","author":"Seita","year":"2019"},{"article-title":"Active learning literature survey","year":"2009","author":"Settles","key":"ref12"},{"key":"ref13","first-page":"1100","article-title":"Simultaneously learning and advising in multiagent reinforcement learning","volume-title":"Proc. 16th Conf. Auton. Agents Multi-Agent Syst.","author":"da Silva","year":"2017"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CIG.2019.8847988"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6036"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05849-4"},{"article-title":"Playing Atari with deep reinforcement learning","volume-title":"Comput. Res. Repository","author":"Mnih","key":"ref17"},{"key":"ref18","article-title":"Exploration by random network distillation","volume-title":"7th Int. Conf. Learn. Representations, ICLR 2019","author":"Burda","year":"2019"},{"key":"ref19","article-title":"Teacher-student framework: A reinforcement learning approach","volume-title":"AAMAS Workshop Auton. Robots Multirobot Syst.","author":"Zimmer","year":"2014"},{"key":"ref20","article-title":"Interactive teaching strategies for agent training","volume-title":"Proc. 25th Int. Joint Conf. Artif. Intell.","author":"Amir","year":"2014"},{"key":"ref21","first-page":"2315","article-title":"Theoretically-grounded policy advice from multiple teachers in reinforcement learning settings with applications to negative transfer","volume-title":"Proc. 25th Int. Joint Conf. Artif. Intell.","author":"Zhan","year":"2016"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.3390\/make1010002"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016128"},{"key":"ref24","article-title":"Learning hierarchical teaching in cooperative multiagent reinforcement learning","volume-title":"Comput. Res. Repository","author":"Kim","year":"2019"},{"key":"ref25","first-page":"629","article-title":"Action advising with advice imitation in deep reinforcement learning","volume-title":"Proc. 20th Conf. Auton. Agents Multi-Agent Syst.","author":"Ilhan","year":"2021"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref28","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. 33nd Int. Conf. Mach. Learn.","author":"Wang","year":"2016"},{"key":"ref29","article-title":"Noisy networks for exploration","volume-title":"Proc. 6th Int. Conf. Learn. Representations","author":"Fortunato","year":"2018"},{"key":"ref30","article-title":"Prioritized experience replay","volume-title":"Proc. 4th Int. Conf. Learn. Representations","author":"Schaul","year":"2016"},{"key":"ref31","article-title":"MinAtar: An Atari-inspired testbed for more efficient reinforcement learning experiments","volume-title":"Comput. Res. Repository","author":"Young","year":"2019"},{"key":"ref32","first-page":"2052","article-title":"Off-Policy deep reinforcement learning without exploration","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2019"},{"key":"ref33","first-page":"11761","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. Adv. Neural Inf. Process. Syst. 32: Annu. Conf. Neural Inf. Process. Syst.","author":"Kumar","year":"2019"}],"container-title":["IEEE Transactions on Games"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7782673\/9891715\/09541003.pdf?arnumber=9541003","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T23:43:55Z","timestamp":1705016635000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9541003\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9]]},"references-count":33,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tg.2021.3113644","relation":{},"ISSN":["2475-1502","2475-1510"],"issn-type":[{"type":"print","value":"2475-1502"},{"type":"electronic","value":"2475-1510"}],"subject":[],"published":{"date-parts":[[2022,9]]}}}