{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:10:11Z","timestamp":1740100211289,"version":"3.37.3"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076238,61902402"],"award-info":[{"award-number":["62076238,61902402"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2019AAA010340X"],"award-info":[{"award-number":["2019AAA010340X"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100015803","name":"CCF-Tencent Open Research Fund","doi-asserted-by":"publisher","award":["RAGR20200104"],"award-info":[{"award-number":["RAGR20200104"]}],"id":[{"id":"10.13039\/100015803","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002367","name":"Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["XDA27000000"],"award-info":[{"award-number":["XDA27000000"]}],"id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534251","type":"proceedings-article","created":{"date-parts":[[2021,9,22]],"date-time":"2021-09-22T20:32:37Z","timestamp":1632342757000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Learning to Play Hard Exploration Games Using Graph-Guided Self-Navigation"],"prefix":"10.1109","author":[{"given":"Enmin","family":"Zhao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Renye","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lijuan","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junliang","family":"Xing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","first-page":"3875","article-title":"Self-imitation learning","author":"oh","year":"0","journal-title":"International Conference on Machine Learning"},{"journal-title":"OpenAI Baselines","year":"2017","author":"dhariwal","key":"ref32"},{"key":"ref31","first-page":"1","article-title":"Contingency-aware exploration in reinforcement learning","author":"choi","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2011.6116281"},{"key":"ref34","first-page":"2721","article-title":"Count-based exploration with neural density models","author":"ostrovski","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref10","article-title":"Learning montezuma's revenge from a single demonstration","author":"salimans","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref11","first-page":"3223","article-title":"Deep Q-learning from demonstrations","author":"hester","year":"0","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref12","first-page":"2474","article-title":"Policy optimization with demonstrations","author":"kang","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref13","first-page":"3360","article-title":"EMI: Exploration with mutual information","author":"kim","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1006\/ceps.1999.1020"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1093\/oxfordhb\/9780195399820.013.0010","article-title":"Curiosity and motivation","author":"silvia","year":"2012","journal-title":"The Oxford Handbook of Human Motivation"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref17","first-page":"1","article-title":"Episodic curiosity through reachability","author":"savinov","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref18","first-page":"1","author":"gregor","year":"2017","journal-title":"Variational Intrinsic Control"},{"key":"ref19","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref28","first-page":"1","article-title":"Exploration by random network distillation","author":"burda","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref4","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref27","first-page":"3309","article-title":"Deeply aggrevated: Differentiable imitation learning for sequential prediction","author":"sun","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref3","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref6","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03157-9"},{"key":"ref5","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref8","first-page":"2753","article-title":"Exploration: A study of count-based exploration for deep rein-forcement learning","author":"tang","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref7","first-page":"1","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref2","first-page":"4026","article-title":"Deep exploration via bootstrapped DQN","author":"osband","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref9","first-page":"2935","article-title":"Playing hard exploration games by watching youtube","author":"aytar","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"volodymyr","year":"2015","journal-title":"Nature"},{"key":"ref20","article-title":"Deep curiosity search: Intra-life exploration improves performance on challenging deep reinforcement learning problems","author":"stanton","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref22","first-page":"1449","article-title":"A game-theoretic approach to apprenticeship learning","author":"syed","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref24","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"0","journal-title":"International Conference on Artificial Intelligence and Statistics"},{"key":"ref23","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"0","journal-title":"Advances in neural information processing systems"},{"key":"ref26","first-page":"2058","article-title":"Learning to search better than your teacher","author":"chang","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref25","article-title":"Reinforcement and imitation learning via interactive no-regret learning","author":"ross","year":"2014","journal-title":"ArXiv Preprint"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534251.pdf?arnumber=9534251","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T23:22:17Z","timestamp":1673306537000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534251\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534251","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}