{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T02:28:54Z","timestamp":1730255334321,"version":"3.28.0"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9561017","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:28:35Z","timestamp":1634689715000},"page":"4459-4466","source":"Crossref","is-referenced-by-count":1,"title":["Distilling a Hierarchical Policy for Planning and Control via Representation and Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Jung-Su","family":"Ha","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Young-Jin","family":"Park","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyeok-Joo","family":"Chae","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Soon-Seo","family":"Park","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Han-Lim","family":"Choi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","first-page":"3604","article-title":"A disentangled recognition and nonlinear dynamics model for unsupervised learning","author":"fraccaro","year":"2017","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref38","first-page":"2101","article-title":"Structured inference networks for nonlinear state space models","author":"krishnan","year":"2017","journal-title":"AAAI Conference on Artificial Intelligence"},{"article-title":"Path integral networks: End-to-end differentiable optimal control","year":"2017","author":"okada","key":"ref33"},{"key":"ref32","first-page":"2154","article-title":"Value iteration networks","author":"tamar","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref31","first-page":"8941","article-title":"Adaptive path-integral autoencoders: Representation learning and planning for dynamical systems","author":"ha","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2856915"},{"key":"ref37","article-title":"Deep variational bayes filters: Unsupervised learning of state space models from raw data","author":"karl","year":"2017","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref36","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref35"},{"key":"ref34","first-page":"8299","article-title":"Differentiable MPC for end-to-end planning and control","author":"amos","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.218"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989202"},{"key":"ref29","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","first-page":"3303","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref22","article-title":"Latent space policies for hierarchical reinforcement learning","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref21","article-title":"Neural probabilistic motor primitives for humanoid control","author":"merel","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref24","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"2019","journal-title":"ICML"},{"article-title":"Dynamics-aware unsupervised discovery of skills","year":"2019","author":"sharma","key":"ref23"},{"key":"ref26","first-page":"6259","article-title":"Hallucinative topological memory for zero-shot visual planning","author":"liu","year":"2020","journal-title":"International Conference on Machine Learning"},{"article-title":"Planning with goal-conditioned policies","year":"2019","author":"nasiriany","key":"ref25"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2012.VIII.045"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201311"},{"key":"ref40","first-page":"3483","article-title":"Learning structured output representation using deep conditional generative models","author":"sohn","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref12","article-title":"Mcp: Learning composable hierarchical control with multiplicative compositional policies","author":"peng","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref13","article-title":"Hierarchical visuomotor control of humanoids","author":"merel","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref14","article-title":"Composing complex skills by learning transition policies with proximity reward induction","author":"lee","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref15","article-title":"Self-consistent trajectory autoencoder: Hierarchical reinforcement learning with trajectory embeddings","author":"co-reyes","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref16","article-title":"Meta-reinforcement learning of structured exploration strategies","author":"gupta","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"Diversity is all you need: Learning skills without a reward function","author":"eysenbach","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref18","article-title":"Stochastic neural networks for hierarchical reinforcement learning","author":"florensa","year":"2017","journal-title":"International Conference on Learning Representations"},{"key":"ref19","article-title":"Learning an embedding space for transferable robot skills","author":"hausman","year":"2018","journal-title":"International Conference on Learning Representations"},{"article-title":"Model-based reinforcement learning for atari","year":"2019","author":"kaiser","key":"ref4"},{"key":"ref3","first-page":"2455","article-title":"Recurrent world models facilitate policy evolution","author":"ha","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IEMBS.2003.1279744"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511546877"},{"article-title":"Reinforcement learning and control as probabilistic inference: Tutorial and review","year":"2018","author":"levine","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/nn1309"},{"key":"ref49","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume":"8","author":"ziebart","year":"2008","journal-title":"AAAI"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2008.4739438"},{"article-title":"Auto-encoding variational bayes","year":"2013","author":"kingma","key":"ref46"},{"key":"ref45","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"abadi","year":"0","journal-title":"Journal of SoftWare 2015"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1177\/0278364918790369"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553508"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s10955-016-1446-7"},{"key":"ref41","article-title":"Probabilistic planning with sequential monte carlo methods","author":"piche","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref44","first-page":"5","article-title":"Bullet physics library","volume":"15","author":"coumans","year":"2013","journal-title":"Open Source Bulletphysics Org"},{"article-title":"Learning latent plans from play","year":"2019","author":"lynch","key":"ref43"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2021,5,30]]},"location":"Xi'an, China","end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09561017.pdf?arnumber=9561017","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:47:08Z","timestamp":1652197628000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9561017\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9561017","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}