{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:09:07Z","timestamp":1740100147722,"version":"3.37.3"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,1]],"date-time":"2021-07-01T00:00:00Z","timestamp":1625097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,1]],"date-time":"2021-07-01T00:00:00Z","timestamp":1625097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,1]],"date-time":"2021-07-01T00:00:00Z","timestamp":1625097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61273300"],"award-info":[{"award-number":["61273300"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7]]},"DOI":"10.1109\/cacre52464.2021.9501321","type":"proceedings-article","created":{"date-parts":[[2021,8,10]],"date-time":"2021-08-10T20:55:19Z","timestamp":1628628919000},"page":"414-422","source":"Crossref","is-referenced-by-count":4,"title":["Towards Efficient Exploration in Unknown Spaces: A Novel Hierarchical Approach Based on Intrinsic Rewards"],"prefix":"10.1109","author":[{"given":"Yukai","family":"Song","sequence":"first","affiliation":[]},{"given":"Yue","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Junjie","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Cong","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Long","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Quanjun","family":"Yin","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Improving exploration in evolution strategies for deep reinforcement learning via a population of novelty-seeking agents","author":"conti","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref38","article-title":"Prox-imal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref33","article-title":"Emi: Exploration with mutual information","author":"kim","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref32","article-title":"Exploration by random network distillation","author":"burda","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref31","article-title":"Latent world models for intrinsically motivated exploration","author":"ermolov","year":"2020","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref37","first-page":"507","article-title":"Agent57: Outperforming the atari human benchmark","author":"badia","year":"0","journal-title":"Int Conference on Machine Learning"},{"key":"ref36","article-title":"Vime: Variational information maximizing exploration","author":"houthooft","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref35","article-title":"Variational intrinsic control","author":"gregor","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref34","article-title":"Exploration in model-based reinforcement learning by empirically estimating learning progress","author":"lopes","year":"2012","journal-title":"Neural Information Processing Systems (NIPS)"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/TCYB.2014.2314294"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1162\/neco.1997.9.8.1735"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/IROS.2017.8206050"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/RCAR.2016.7784001"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1002\/9780470479216.corpsy0467"},{"key":"ref14","first-page":"6","article-title":"What is intrinsic motivation? a typology of computational approaches","volume":"1","author":"oudeyer","year":"2009","journal-title":"Frontiers in Neurorobotics"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1007\/s10514-012-9298-8"},{"key":"ref16"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.29007\/3b2l","article-title":"Learning exploration policies for navigation","author":"chen","year":"2019","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/LRA.2019.2891991"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/ICRA.2019.8793769"},{"key":"ref28"},{"key":"ref4","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref27"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/TAMD.2010.2051031"},{"key":"ref29","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"stadie","year":"2015","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1038\/s41586-020-03157-9"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1016\/j.artint.2015.05.002"},{"key":"ref7","article-title":"Hi-erarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation","author":"kulkarni","year":"2016","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1371\/journal.pone.0157428"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/TNNLS.2019.2927869"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1371\/journal.pone.0203339"},{"key":"ref20"},{"key":"ref22","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"2016","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1109\/DEVLRN.2011.6037367"},{"doi-asserted-by":"publisher","key":"ref42","DOI":"10.1109\/CIRA.1997.613851"},{"key":"ref24","first-page":"1","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","volume":"30","author":"tang","year":"0","journal-title":"31st Conference on Neural Information Processing Systems (NIPS)"},{"doi-asserted-by":"publisher","key":"ref41","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref23","first-page":"2721","article-title":"Count-based exploration with neural density models","author":"ostrovski","year":"0","journal-title":"Int Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1609\/aaai.v34i04.5955"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1109\/IROS.2011.6095018"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.29007\/shbv","article-title":"Contingency-aware exploration in reinforcement learning","author":"choi","year":"2018","journal-title":"ArXiv Preprint"}],"event":{"name":"2021 6th International Conference on Automation, Control and Robotics Engineering (CACRE)","start":{"date-parts":[[2021,7,15]]},"location":"Dalian, China","end":{"date-parts":[[2021,7,17]]}},"container-title":["2021 6th International Conference on Automation, Control and Robotics Engineering (CACRE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9501256\/9501282\/09501321.pdf?arnumber=9501321","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,7]],"date-time":"2023-01-07T02:20:28Z","timestamp":1673058028000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9501321\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/cacre52464.2021.9501321","relation":{},"subject":[],"published":{"date-parts":[[2021,7]]}}}