{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:39:41Z","timestamp":1730266781706,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,30]],"date-time":"2024-06-30T00:00:00Z","timestamp":1719705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,30]],"date-time":"2024-06-30T00:00:00Z","timestamp":1719705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,30]]},"DOI":"10.1109\/ijcnn60899.2024.10651127","type":"proceedings-article","created":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T17:35:05Z","timestamp":1725903305000},"page":"1-9","source":"Crossref","is-referenced-by-count":0,"title":["Effective State Space Exploration with Phase State Graph Generation and Goal-based Path Planning"],"prefix":"10.1109","author":[{"given":"Sinuo","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence Jilin University,Changchun,China"}]},{"given":"Jifeng","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence Jilin University,Changchun,China"}]},{"given":"Xinqi","family":"Du","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence Jilin University,Changchun,China"}]},{"given":"Zhejian","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence Jilin University,Changchun,China"}]},{"given":"Yang","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence Jilin University,Changchun,China"}]},{"given":"Hechang","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence Jilin University,Changchun,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","first-page":"521","article-title":"Skill discovery for exploration and planning using deep skill graphs","volume-title":"International Conference on Machine Learning","author":"Bagaria"},{"key":"ref3","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref4","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"International Conference on machine learning","author":"Silver"},{"key":"ref5","first-page":"27 730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Dynamic planning in open-ended dialogue using reinforcement learning","year":"2022","author":"Cohen","key":"ref6"},{"article-title":"Sub-policy adaptation for hierarchical reinforcement learning","year":"2019","author":"Li","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053393"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3453160"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2020.106694"},{"key":"ref11","first-page":"17 811","article-title":"Offline meta-reinforcement learning with online self-supervision","volume-title":"International Conference on Machine Learning","author":"Pong"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3027152"},{"article-title":"Hypernetwork-ppo for continual reinforcement learning","volume-title":"Deep Reinforcement Learning Workshop NeurIPS 2022","author":"Sch\u00f6pf","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-30493-5_52"},{"key":"ref15","first-page":"9301","article-title":"Recomposing the reinforcement learning building blocks with hypernetworks","volume-title":"International Conference on Machine Learning","author":"Sarafian"},{"key":"ref16","first-page":"1113","article-title":"Learning latent plans from play","volume-title":"Conference on robot learning","author":"Lynch"},{"key":"ref17","first-page":"5842","article-title":"Composable planning with attributes","volume-title":"International Conference on Machine Learning","author":"Zhang"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_1"},{"article-title":"Hierarchical cooperative multi-agent reinforcement learning with skill discovery","year":"2019","author":"Yang","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CoG52621.2021.9619124"},{"key":"ref21","article-title":"Generating levels and playing super mario bros. with deep reinforcement learning using various techniques for level generation and deep q-networks for playing","volume-title":"Master\u2019s thesis","author":"Engelsvoll","year":"2020"},{"article-title":"Diversity is all you need: Learning skills without a reward function","year":"2018","author":"Eysenbach","key":"ref22"},{"key":"ref23","article-title":"Diversity-driven exploration strategy for deep reinforcement learning","volume":"31","author":"Hong","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref24","first-page":"37 011","article-title":"Heterogeneous skill learning for multi-agent tasks","volume":"35","author":"Liu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3544585.3544600"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TSSC.1968.300136"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1090\/qam\/102435"},{"key":"ref28","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International Conference on machine learning","author":"Haarnoja"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref29"},{"article-title":"Large-scale study of curiosity-driven learning","year":"2018","author":"Burda","key":"ref30"}],"event":{"name":"2024 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2024,6,30]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,7,5]]}},"container-title":["2024 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10649807\/10649898\/10651127.pdf?arnumber=10651127","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T05:36:44Z","timestamp":1725946604000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10651127\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,30]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/ijcnn60899.2024.10651127","relation":{},"subject":[],"published":{"date-parts":[[2024,6,30]]}}}