{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T18:41:26Z","timestamp":1730227286987,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,9,1]]},"DOI":"10.1109\/icac55051.2022.9911100","type":"proceedings-article","created":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T20:25:08Z","timestamp":1665433508000},"page":"1-6","source":"Crossref","is-referenced-by-count":3,"title":["Abstract Demonstrations and Adaptive Exploration for Efficient and Stable Multi-step Sparse Reward Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Xintong","family":"Yang","sequence":"first","affiliation":[{"name":"Cardiff University,School of Engineering,Cardiff,UK"}]},{"given":"Ze","family":"Ji","sequence":"additional","affiliation":[{"name":"Cardiff University,School of Engineering,Cardiff,UK"}]},{"given":"Jing","family":"Wu","sequence":"additional","affiliation":[{"name":"Cardiff University,School of Computer Science and Informatics,Cardiff,UK"}]},{"given":"Yu-Kun","family":"Lai","sequence":"additional","affiliation":[{"name":"Cardiff University,School of Computer Science and Informatics,Cardiff,UK"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.1900533"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref12","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref13","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"arXiv preprint arXiv 1812 09111"},{"article-title":"Minimalistic gridworld environment for openai gym","year":"2018","author":"chevalier-boisvert","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-89177-0_2"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1129"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(71)90010-5"},{"key":"ref18","first-page":"5020","article-title":"Sub-goal trees a framework for goal-based reinforcement learning","author":"jurgenson","year":"2020","journal-title":"ICML"},{"key":"ref19","first-page":"166","article-title":"Modular multitask reinforcement learning with policy sketches","author":"andreas","year":"2017","journal-title":"ICML"},{"key":"ref4","article-title":"Automatic task decomposition and state abstraction from demonstration","author":"cobo","year":"2012","journal-title":"Georgia Institute of Technology"},{"journal-title":"Monitoring the execution of temporal plans for robotic systems","year":"2012","author":"levine","key":"ref3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref5","article-title":"Hierarchical reinforcement learning with universal policies for multistep robotic manipulation","author":"yang","year":"2021","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"article-title":"Reinforcement learning: An introduction","year":"2018","author":"sutton","key":"ref8"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-100819-063206"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2016.XII.002"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12412"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref22","first-page":"4654","article-title":"TACO: Learning task decomposition via temporal alignment for control","volume":"80","author":"shiarlis","year":"2018","journal-title":"ICML ser PMLR"},{"key":"ref21","first-page":"418","article-title":"Ddco: Discovery of deep continuous options for robot learning from demonstrations","author":"krishnan","year":"2017","journal-title":"CoRL"},{"key":"ref24","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"NIPS"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/671"},{"key":"ref25","first-page":"8565","article-title":"Universal asymptotic optimality of polyak momentum","author":"scieur","year":"2020","journal-title":"ICML"}],"event":{"name":"2022 27th International Conference on Automation and Computing (ICAC)","start":{"date-parts":[[2022,9,1]]},"location":"Bristol, United Kingdom","end":{"date-parts":[[2022,9,3]]}},"container-title":["2022 27th International Conference on Automation and Computing (ICAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9911058\/9911059\/09911100.pdf?arnumber=9911100","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,4]],"date-time":"2022-11-04T01:27:12Z","timestamp":1667525232000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9911100\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,1]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/icac55051.2022.9911100","relation":{},"subject":[],"published":{"date-parts":[[2022,9,1]]}}}