{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T05:55:04Z","timestamp":1773035704578,"version":"3.50.1"},"reference-count":66,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62306115"],"award-info":[{"award-number":["62306115"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476087"],"award-info":[{"award-number":["62476087"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62306242"],"award-info":[{"award-number":["62306242"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tnnls.2025.3613857","type":"journal-article","created":{"date-parts":[[2025,10,14]],"date-time":"2025-10-14T17:40:51Z","timestamp":1760463651000},"page":"907-921","source":"Crossref","is-referenced-by-count":0,"title":["Unsupervised Skill Discovery Through Skill Regions Differentiation"],"prefix":"10.1109","volume":"37","author":[{"given":"Ting","family":"Xiao","sequence":"first","affiliation":[{"name":"Key Laboratory of Smart Manufacturing in Energy Chemical Process, Ministry of Education, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiakun","family":"Zheng","sequence":"additional","affiliation":[{"name":"East China University of Science and Technology, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rushuai","family":"Yang","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Sai Kung, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6040-3002","authenticated-orcid":false,"given":"Kang","family":"Xu","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6114-8453","authenticated-orcid":false,"given":"Qiaosheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai AI Laboratory, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Liu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3759-2041","authenticated-orcid":false,"given":"Zhe","family":"Wang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Smart Manufacturing in Energy Chemical Process, Ministry of Education, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8379-9385","authenticated-orcid":false,"given":"Chenjia","family":"Bai","sequence":"additional","affiliation":[{"name":"China Telecom, Institute of Artificial Intelligence (TeleAI), Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref2","first-page":"25476","article-title":"Mastering Atari games with limited data","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ye"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00610-y"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2022.3185159"},{"key":"ref5","first-page":"8387","article-title":"Temporal difference learning for model predictive control","volume-title":"Proc. Int. Conf. Machin. Learn.","author":"Hansen"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abk2822"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126620"},{"key":"ref8","first-page":"31120","article-title":"Reward design with language models","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Kwon"},{"key":"ref9","first-page":"2058","article-title":"ManiSkill2: A unified benchmark for generalizable manipulation skills","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Gu"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2021.08.002"},{"key":"ref11","article-title":"GPT-4 technical report","volume-title":"arXiv:2303.08774","author":"Achiam","year":"2023"},{"key":"ref12","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Ouyang"},{"key":"ref13","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv:2307.09288"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref15","article-title":"V-JEPA: Latent video prediction for visual representation learning","volume-title":"Trans. Mach. Learn. Res.","author":"Bardes"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3497510"},{"key":"ref17","first-page":"655","article-title":"Where are we in the search for an artificial visual cortex for embodied intelligence?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Majumdar"},{"key":"ref18","first-page":"892","article-title":"R3M: A universal visual representation for robot manipulation","volume-title":"Proc. Conf. Robot Learn.","author":"Nair"},{"key":"ref19","first-page":"1","article-title":"URLB: Unsupervised reinforcement learning benchmark","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Laskin"},{"key":"ref20","article-title":"Variational intrinsic control","author":"Gregor","year":"2016","journal-title":"arXiv:1611.07507"},{"key":"ref21","first-page":"18459","article-title":"Behavior from the void: Unsupervised active pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref22","first-page":"6974","article-title":"Understanding the limitations of variational mutual information estimators","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Song"},{"key":"ref23","article-title":"Diversity is all you need: Learning skills without a reward function","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Eysenbach"},{"key":"ref24","first-page":"39183","article-title":"Behavior contrastive learning for unsupervised skill discovery","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Yang"},{"key":"ref25","first-page":"30289","article-title":"METRA: Scalable unsupervised RL with metric-aware abstraction","volume-title":"Proc. 12th Int. Conf. Learn. Represent.","author":"Park"},{"key":"ref26","first-page":"9679","article-title":"Lipschitz-constrained unsupervised skill discovery","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Park"},{"key":"ref27","first-page":"27225","article-title":"Controllability-aware unsupervised skill discovery","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Park"},{"key":"ref28","first-page":"34478","article-title":"Unsupervised reinforcement learning with contrastive intrinsic control","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Laskin"},{"key":"ref29","first-page":"28598","article-title":"Mastering the unsupervised reinforcement learning benchmark from pixels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rajeswar"},{"key":"ref30","first-page":"13207","article-title":"Dynamics-aware unsupervised discovery of skills","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Sharma"},{"key":"ref31","first-page":"1317","article-title":"Explore, discover and learn: Unsupervised discovery of state-covering skills","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Campos"},{"key":"ref32","first-page":"39034","article-title":"Unsupervised skill discovery via recurrent skill training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jiang"},{"key":"ref33","first-page":"28226","article-title":"Learning to discover skills with guidance","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kim"},{"key":"ref34","first-page":"6736","article-title":"APS: Active pretraining with successor features","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu"},{"key":"ref35","article-title":"Efficient exploration via state marginal matching","author":"Lee","year":"2020","journal-title":"arXiv:1906.05274"},{"key":"ref36","first-page":"25061","article-title":"Learning more skills through optimistic exploration","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Strouse"},{"key":"ref37","first-page":"27727","article-title":"Choreographer: Learning and adapting skills in imagination","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Mazzaglia"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref39","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pathak"},{"key":"ref40","first-page":"4003","article-title":"Exploration by random network distillation","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Burda"},{"key":"ref41","first-page":"11920","article-title":"Reinforcement learning with prototypical representations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yarats"},{"key":"ref42","article-title":"Unsupervised skill-discovery and skill-learning in minecraft","author":"Nieto","year":"2021","journal-title":"arXiv:2107.08398"},{"key":"ref43","first-page":"301","article-title":"Rethinking mutual information for language conditioned skill discovery on imitation learning","volume-title":"Proc. 34th Int. Conf. Automated Planning Scheduling","volume":"34","author":"Ju"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i12.29217"},{"key":"ref45","first-page":"13352","article-title":"EUCLID: Towards efficient unsupervised reinforcement learning with multi-choice dynamics model","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Yuan"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1312.6114"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1561\/2200000056"},{"key":"ref48","first-page":"4767","article-title":"Multi-task reinforcement learning with soft modularization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553441"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"ref51","first-page":"1479","article-title":"Unifying count-based exploration and intrinsic motivation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Bellemare"},{"key":"ref52","first-page":"2721","article-title":"Count-based exploration with neural density models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ostrovski"},{"key":"ref53","first-page":"29304","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Agarwal"},{"key":"ref54","article-title":"DeepMind control suite","author":"Tassa","year":"2018","journal-title":"arXiv:1801.00690"},{"key":"ref55","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. ICLR (Poster)","author":"Lillicrap"},{"key":"ref56","first-page":"10062","article-title":"Mastering visual continuous control: Improved data-augmented reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Yarats"},{"key":"ref57","first-page":"10711","article-title":"Mastering Atari with discrete world models","volume-title":"Proc. ICLR","author":"Hafner"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2025.3548821"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1287\/moor.2022.1309"},{"key":"ref60","first-page":"1582","article-title":"Reward-free model-based reinforcement learning with linear function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref61","first-page":"1182","article-title":"Deep variational information bottleneck","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Alemi"},{"key":"ref62","first-page":"17007","article-title":"Dynamic bottleneck for robust self-supervised exploration","volume-title":"Proc. NIPS","volume":"34","author":"Bai"},{"key":"ref63","first-page":"1283","article-title":"Provably efficient exploration in policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"1","author":"Cai"},{"key":"ref64","first-page":"5084","article-title":"Is pessimism provably efficient for offline RL","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jin"},{"key":"ref65","article-title":"On function approximation in reinforcement learning: Optimism in the face of large state spaces","author":"Yang","year":"2020","journal-title":"arXiv:2011.04622"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7503.003.0011"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/11372199\/11203282.pdf?arnumber=11203282","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T21:07:14Z","timestamp":1770671234000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11203282\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":66,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2025.3613857","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}