{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:27:07Z","timestamp":1740122827322,"version":"3.37.3"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T00:00:00Z","timestamp":1654732800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T00:00:00Z","timestamp":1654732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51935005"],"award-info":[{"award-number":["51935005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Basic Scientific Research Projects","award":["JCKY20200603C010"],"award-info":[{"award-number":["JCKY20200603C010"]}]},{"name":"State Scholarship Fund from China Scholarship Council","award":["202006120405"],"award-info":[{"award-number":["202006120405"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s11063-022-10912-8","type":"journal-article","created":{"date-parts":[[2022,6,9]],"date-time":"2022-06-09T17:05:54Z","timestamp":1654794354000},"page":"839-855","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Variational Diversity Maximization for Hierarchical Skill Discovery"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1761-3669","authenticated-orcid":false,"given":"Yingnan","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Peng","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Xianglong","family":"Tang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,9]]},"reference":[{"key":"10912_CR1","unstructured":"Achiam J, Edwards H, Amodei D, Abbeel P (2018) Variational option discovery algorithms. CoRR arxiv: abs\/1807.10299"},{"key":"10912_CR2","doi-asserted-by":"crossref","unstructured":"Bacon P, Harb J, Precup D (2017) The option-critic architecture. In: Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence, p 1726\u20131734. San Francisco, California","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"10912_CR3","unstructured":"Bagaria A, Senthil JK, Konidaris G (2021) Skill discovery for exploration and planning using deep skill graphs. In: International Conference on Machine Learning, p 521\u2013531. PMLR"},{"key":"10912_CR4","unstructured":"Burda Y, Edwards H, Pathak D, Storkey AJ, Darrell T, Efros AA (2019) Large-scale study of curiosity-driven learning. In: 7th International Conference on Learning Representations. New Orleans, LA"},{"key":"10912_CR5","first-page":"271","volume-title":"Advances in neural information processing systems 5","author":"P Dayan","year":"1992","unstructured":"Dayan P, Hinton GE (1992) Feudal reinforcement learning. Advances in neural information processing systems 5. Denver, Colorado, pp 271\u2013278"},{"key":"10912_CR6","doi-asserted-by":"crossref","unstructured":"Deci EL, Ryan RM (2010) Intrinsic motivation. The corsini encyclopedia of psychology pp. 1\u20132","DOI":"10.1002\/9780470479216.corpsy0467"},{"key":"10912_CR7","unstructured":"Eysenbach B, Gupta A, Ibarz J, Levine S (2019) Diversity is all you need: Learning skills without a reward function. In: 7th International Conference on Learning Representations. New Orleans, LA"},{"key":"10912_CR8","unstructured":"Florensa C, Duan Y, Abbeel P (2017) Stochastic neural networks for hierarchical reinforcement learning. arXiv preprint arXiv:1704.03012"},{"key":"10912_CR9","unstructured":"Frans K, Ho J, Chen X, Abbeel P, Schulman J (2018) Meta learning shared hierarchies. In: 6th International Conference on Learning Representations. Vancouver, BC"},{"key":"10912_CR10","unstructured":"Gregor K, Rezende DJ, Wierstra D (2016) Variational intrinsic control. arXiv preprint arXiv:1611.07507"},{"key":"10912_CR11","unstructured":"Ha DR, Schmidhuber J (2018) World models. ArXiv: abs\/1803.10122"},{"key":"10912_CR12","unstructured":"Haarnoja T, Zhou A, Hartikainen K, Tucker G, Ha S, Tan J, Kumar V, Zhu H, Gupta A, Abbeel P et al (2019) Soft actor-critic algorithms and applications"},{"key":"10912_CR13","unstructured":"Houthooft R, Chen X, Duan Y, Schulman J, Turck FD, Abbeel P (2016) VIME: variational information maximizing exploration. In: Advances in Neural Information Processing Systems 29, p 1109\u20131117. Barcelona, Spain"},{"key":"10912_CR14","unstructured":"Kim J, Park S, Kim G (2021) Unsupervised skill discovery with bottleneck option learning. In: International Conference on Machine Learning, p 5572\u20135582. PMLR"},{"key":"10912_CR15","unstructured":"Kingma DP, Welling M (2014) Auto-encoding variational bayes. In: 2nd International Conference on Learning Representations. Banff, AB"},{"key":"10912_CR16","unstructured":"Li Y (2017) Deep reinforcement learning: An overview. arXiv preprint arXiv:1701.07274"},{"key":"10912_CR17","unstructured":"Mahmood AR, Korenkevych D, Vasan G, Ma W, Bergstra J (2018) Benchmarking reinforcement learning algorithms on real-world robots. arXiv preprint arXiv:1809.07731"},{"issue":"7540","key":"10912_CR18","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529","journal-title":"Nature"},{"key":"10912_CR19","unstructured":"Moerland TM, Broekens J, Jonker CM (2020) Model-based reinforcement learning: A survey. arXiv preprint arXiv:2006.16712"},{"key":"10912_CR20","unstructured":"Nachum, O., Gu, S., Lee, H., Levine, S (2018) Data-efficient hierarchical reinforcement learning. In: Advances in Neural Information Processing Systems 31, p 3307\u20133317. Montr\u00e9al, Canada"},{"key":"10912_CR21","unstructured":"Nachum O, Gu S, Lee H, Levine S (2019) Near-optimal representation learning for hierarchical reinforcement learning. In: 7th International Conference on Learning Representations. New Orleans, LA"},{"issue":"2","key":"10912_CR22","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/s10846-017-0468-y","volume":"86","author":"AS Polydoros","year":"2017","unstructured":"Polydoros AS, Nalpantidis L (2017) Survey of model-based reinforcement learning: Applications on robotics. J Intell Robotic Syst 86(2):153\u2013173","journal-title":"J Intell Robotic Syst"},{"key":"10912_CR23","unstructured":"Schulman J, Levine S, Abbeel P, Jordan MI, Moritz P (2015) Trust region policy optimization. In: Proceedings of the 32nd International Conference on Machine Learning, p 1889\u20131897. Lille, France"},{"issue":"7676","key":"10912_CR24","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359","journal-title":"Nature"},{"key":"10912_CR25","unstructured":"Sohn K, Lee H, Yan X (2015) Learning structured output representation using deep conditional generative models. In: Advances in Neural Information Processing Systems 28, p 3483\u20133491. Montreal, Quebec"},{"issue":"4","key":"10912_CR26","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"RS Sutton","year":"1991","unstructured":"Sutton RS (1991) Dyna, an integrated architecture for learning, planning, and reacting. SIGART Bull 2(4):160\u2013163. https:\/\/doi.org\/10.1145\/122344.122377","journal-title":"SIGART Bull"},{"key":"10912_CR27","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An introduction. MIT press, Cambridge, Massachusetts"},{"issue":"1\u20132","key":"10912_CR28","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton RS, Precup D, Singh SP (1999) Between mdps and semi-mdps: A framework for temporal abstraction in reinforcement learning. Artif Intell 112(1\u20132):181\u2013211","journal-title":"Artif Intell"},{"key":"10912_CR29","unstructured":"Vezhnevets AS, Osindero S, Schaul T, Heess N, Jaderberg M, Silver D, Kavukcuoglu K (2017) Feudal networks for hierarchical reinforcement learning. In: Proceedings of the 34th International Conference on Machine Learning, p 3540\u20133549. Sydney, NSW"},{"key":"10912_CR30","unstructured":"Wulfmeier M, Rao D, Hafner R, Lampe T, Abdolmaleki A, Hertweck T, Neunert M, Tirumala D, Siegel N, Heess N, et al (2021) Data-efficient hindsight off-policy option learning. In: International Conference on Machine Learning, p 11340\u201311350. PMLR"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-10912-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-022-10912-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-10912-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,6]],"date-time":"2023-03-06T14:25:02Z","timestamp":1678112702000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-022-10912-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,9]]},"references-count":30,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["10912"],"URL":"https:\/\/doi.org\/10.1007\/s11063-022-10912-8","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"type":"print","value":"1370-4621"},{"type":"electronic","value":"1573-773X"}],"subject":[],"published":{"date-parts":[[2022,6,9]]},"assertion":[{"value":"27 May 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 June 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}