{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T23:21:09Z","timestamp":1768778469710,"version":"3.49.0"},"reference-count":38,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Plan of China","award":["2020AAA0108902"],"award-info":[{"award-number":["2020AAA0108902"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1109\/lra.2023.3339400","type":"journal-article","created":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T18:29:12Z","timestamp":1701800952000},"page":"867-874","source":"Crossref","is-referenced-by-count":5,"title":["Sketch RL: Interactive Sketch Generation for Long-Horizon Tasks via Vision-Based Skill Predictor"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1361-4824","authenticated-orcid":false,"given":"Zhenyang","family":"Lin","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0022-7219","authenticated-orcid":false,"given":"Yurou","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2148-1846","authenticated-orcid":false,"given":"Zhiyong","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"issue":"1","key":"ref2","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"ref4","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Nachum","year":"2018"},{"key":"ref5","first-page":"1","article-title":"Diversity is all you need: Learning skills without a reward function","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Eysenbach","year":"2018"},{"key":"ref6","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Vezhnevets","year":"2017"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196619"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196958"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812140"},{"key":"ref10","article-title":"Neural probabilistic motor primitives for humanoid control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Merel","year":"2018"},{"key":"ref11","article-title":"Fast policy learning through imitation and reinforcement","volume-title":"Proc. Uncertainty Artif. Intell.","author":"Cheng","year":"2018"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref13","article-title":"Parrot: Data-driven behavioral priors for reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Singh","year":"2020"},{"key":"ref14","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020"},{"key":"ref15","article-title":"Continual state representation learning for reinforcement learning using generative replay","author":"Caselles-Dupr","year":"2018"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.074"},{"key":"ref17","first-page":"17321","article-title":"Long-horizon visual planning with goal-conditioned hierarchical predictors","volume":"33","author":"Pertsch","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref18","first-page":"11340","article-title":"Data-efficient hindsight off-policy option learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wulfmeier","year":"2021"},{"key":"ref19","article-title":"Learning to coordinate manipulation skills via skill behavior diversification","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lee","year":"2020"},{"key":"ref20","first-page":"2181","article-title":"Learning and retrieval from prior data for skill-based imitation learning","volume-title":"Proc. 6th Conf. Robot Learn.","volume":"205","author":"Nasiriany","year":"2023"},{"key":"ref21","article-title":"Learning an embedding space for transferable robot skills","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hausman","year":"2018"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392474"},{"key":"ref23","first-page":"1113","article-title":"Learning latent plans from play","volume-title":"Proc. Conf. Robot Learn.","author":"Lynch"},{"key":"ref24","first-page":"166","article-title":"Modular multitask reinforcement learning with policy sketches","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Andreas","year":"2017"},{"key":"ref25","first-page":"1235","article-title":"Multi-modal imitation learning from unstructured demonstrations using generative adversarial nets","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Hausman","year":"2017"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3146589"},{"key":"ref27","first-page":"471","article-title":"Modeling long-horizon tasks as sequential interaction landscapes","volume-title":"Proc. Conf. Robot Learn.","author":"Pirk","year":"2021"},{"key":"ref28","first-page":"148","article-title":"Sornet: Spatial object-centric representations for sequential manipulation","volume-title":"Proc. 5th Conf. Robot Learn.","author":"Yuan","year":"2022"},{"key":"ref29","first-page":"892","article-title":"R3m: A universal visual representation for robot manipulation","volume-title":"Proc. 6th Conf. Robot Learn.","volume":"205","author":"Nair","year":"2023"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"331","DOI":"10.1016\/S0927-0507(05)80172-0","article-title":"Markov decision processes","volume":"2","author":"Puterman","year":"1990","journal-title":"Handbooks in Operations Res. and Manage. Sci."},{"key":"ref31","article-title":"Social attention for autonomous decision-making in dense traffic","author":"Leurent","year":"2019"},{"key":"ref32","first-page":"3630","article-title":"Matching networks for one shot learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Vinyals","year":"2016"},{"key":"ref33","article-title":"A simple neural attentive meta-learner","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mishra","year":"2018"},{"key":"ref34","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref35","article-title":"robosuite: A modular simulation framework and benchmark for robot learning","author":"Zhu","year":"2020"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1177\/027836499501400103"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1504\/IJAPR.2016.079733"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10320104\/10342749.pdf?arnumber=10342749","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:29:34Z","timestamp":1734982174000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10342749\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1]]},"references-count":38,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/lra.2023.3339400","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1]]}}}