{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T12:26:17Z","timestamp":1770467177510,"version":"3.49.0"},"reference-count":48,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,11,22]]},"DOI":"10.1109\/humanoids58906.2024.10769879","type":"proceedings-article","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T18:54:29Z","timestamp":1733252069000},"page":"926-933","source":"Crossref","is-referenced-by-count":3,"title":["Unsupervised Skill Discovery for Robotic Manipulation through Automatic Task Generation"],"prefix":"10.1109","author":[{"given":"Paul","family":"Jansonnie","sequence":"first","affiliation":[{"name":"NAVER LABS Europe, 6 chemin de Maupertuis,Meylan,France,38240"}]},{"given":"Bingbing","family":"Wu","sequence":"additional","affiliation":[{"name":"NAVER LABS Europe, 6 chemin de Maupertuis,Meylan,France,38240"}]},{"given":"Julien","family":"Perez","sequence":"additional","affiliation":[{"name":"EPITA Research Laboratory (LRE),Le Kremlin-Bic&#x00EA;tre,France,FR-94276"}]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[{"name":"TU Darmstadt,Department of Computer Science,Germany"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Learning by playing - solving sparse reward tasks from scratch","author":"Riedmiller","year":"2018"},{"key":"ref2","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2018"},{"key":"ref3","article-title":"Dataefficient deep reinforcement learning for dexterous manipulation","author":"Popov","year":"2017","journal-title":"arXiv preprint arXiv:1704.03073"},{"key":"ref4","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Conference on Robot Learning","author":"Yu"},{"key":"ref5","article-title":"Hindsight experience replay","author":"Andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref7","article-title":"Diversity is all you need: Learning skills without a reward function","author":"Eysenbach","year":"2019","journal-title":"ICLR"},{"key":"ref8","article-title":"Dynamics-aware unsupervised skill discovery","author":"Sharma","year":"2020"},{"key":"ref9","article-title":"Variational option discovery algorithms","author":"Achiam","year":"2018","journal-title":"arXiv preprint arXiv:1807.10299"},{"key":"ref10","article-title":"Variational intrinsic control","author":"Gregor","year":"2016","journal-title":"arXiv preprint arXiv:1611.07507"},{"key":"ref11","article-title":"Unsupervised control through non-parametric discriminative rewards","author":"Warde-Farley","year":"2019","journal-title":"ICLR"},{"key":"ref12","article-title":"Skewfit: State-covering self-supervised reinforcement learning","author":"Pong","year":"2019","journal-title":"arXiv preprint arXiv:1903.03698"},{"key":"ref13","article-title":"Lipschitz-constrained unsupervised skill discovery","volume-title":"International Conference on Learning Representations","author":"Park"},{"key":"ref14","first-page":"27225","article-title":"Controllabilityaware unsupervised skill discovery","volume-title":"Proceedings of the 40th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"202","author":"Park"},{"key":"ref15","first-page":"34478","article-title":"Unsupervised reinforcement learning with contrastive intrinsic control","volume":"35","author":"Laskin","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref16","first-page":"1317","article-title":"Explore, discover and learn: Unsupervised discovery of state-covering skills","volume-title":"International Conference on Machine Learning.","author":"Campos"},{"key":"ref17","article-title":"Intrinsic motivation and automatic curricula via asymmetric self-play","volume-title":"International Conference on Learning Representations","author":"Sukhbaatar"},{"key":"ref18","article-title":"Learning goal embeddings via self-play for hierarchical reinforcement learning","author":"Sukhbaatar","year":"2018","journal-title":"arXiv preprint arXiv:1811.09083"},{"key":"ref19","article-title":"Asymmetric self-play for automatic goal discovery in robotic manipulation","volume":"abs\/2101.04882","author":"OpenAI","year":"2021","journal-title":"ArXiv"},{"key":"ref20","article-title":"Mcp: Learning composable hierarchical control with multiplicative compositional policies","author":"Peng","year":"2019"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45622-8_16"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"ref23","article-title":"The option keyboard: Combining skills in reinforcement learning","volume":"abs\/2106.13105","author":"Barreto","year":"2021","journal-title":"CoRR"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3171915"},{"key":"ref25","article-title":"Reverse curriculum generation for reinforcement learning","volume-title":"Conference on Robot Learning","author":"Florensa"},{"key":"ref26","article-title":"Learning montezuma\u2019s revenge from a single demonstration","author":"Salimans","year":"2018","journal-title":"arXiv preprint arXiv:1812.03381"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2934906"},{"key":"ref28","article-title":"Automatic curriculum learning through value disagreement","author":"Zhang","year":"2020","journal-title":"arXiv preprint arXiv:2006.09641"},{"key":"ref29","first-page":"835","article-title":"Teacher algorithms for curriculum learning of deep rl in continuously parameterized environments","volume-title":"Conference on Robot Learning","author":"Portelas"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.890271"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2012.05.008"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref33","article-title":"Exploration by random network distillation","volume-title":"International Conference on Learning Representations","author":"Burda"},{"key":"ref34","article-title":"Go-explore: a new approach for hard-exploration problems","author":"Ecoffet","year":"2019","journal-title":"arXiv preprint arXiv:1901.10995"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"580","DOI":"10.1038\/s41586-020-03157-9","article-title":"First return, then explore","volume":"590","author":"Ecoffet","year":"2020","journal-title":"Nature"},{"key":"ref36","article-title":"Self-imitation learning","author":"Oh","year":"2018","journal-title":"arXiv preprint arXiv:1806.05635"},{"key":"ref37","article-title":"Paired openended trailblazer (poet): Endlessly generating increasingly complex and diverse learning environments and their solutions","author":"Wang","year":"2019","journal-title":"arXiv preprint arXiv:1901.01753"},{"key":"ref38","first-page":"9940","article-title":"Enhanced poet: Open-ended reinforcement learning through unbounded invention of learning challenges and their solutions","volume-title":"International Conference on Machine Learning","author":"Wang"},{"key":"ref39","article-title":"It takes four to tango: Multiagent self-play for automatic curriculum generation","author":"Du","year":"2022","journal-title":"arXiv preprint arXiv:2202.10608"},{"key":"ref40","first-page":"13049","article-title":"Emergent complexity and zero-shot transfer via unsupervised environment design","volume":"33","author":"Dennis","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref41","article-title":"Evolving curricula with regretbased environment design","author":"Parker-Holder","year":"2022"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.010"},{"key":"ref43","article-title":"Active task randomization: Learning visuomotor skills for sequential manipulation by proposing feasible and novel tasks","author":"Fang","year":"2022","journal-title":"arXiv preprint arXiv:2211.06134"},{"key":"ref44","first-page":"1884","article-title":"Replay-guided adversarial environment design","volume":"34","author":"Jiang","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref45","article-title":"panda-gym: Open-Source Goal-Conditioned Environments for Robotic Learning","volume-title":"4th Robot Learning Workshop: Self-Supervised and Lifelong Learning at NeurIPS","author":"Gallou\u00e9dec"},{"key":"ref46","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint arXiv:1707.06347"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"issue":"268","key":"ref48","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"Journal of Machine Learning Research"}],"event":{"name":"2024 IEEE-RAS 23rd International Conference on Humanoid Robots (Humanoids)","location":"Nancy, France","start":{"date-parts":[[2024,11,22]]},"end":{"date-parts":[[2024,11,24]]}},"container-title":["2024 IEEE-RAS 23rd International Conference on Humanoid Robots (Humanoids)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10769770\/10769590\/10769879.pdf?arnumber=10769879","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T19:56:48Z","timestamp":1736539008000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10769879\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/humanoids58906.2024.10769879","relation":{},"subject":[],"published":{"date-parts":[[2024,11,22]]}}}