{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T21:56:52Z","timestamp":1757541412466,"version":"3.28.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T00:00:00Z","timestamp":1701734400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,5]],"date-time":"2023-12-05T00:00:00Z","timestamp":1701734400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,5]]},"DOI":"10.1109\/icar58858.2023.10406318","type":"proceedings-article","created":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T18:27:00Z","timestamp":1706812020000},"page":"499-505","source":"Crossref","is-referenced-by-count":1,"title":["Learning Complicated Manipulation Skills Via Deterministic Policy with Limited Demonstrations"],"prefix":"10.1109","author":[{"given":"Haofeng","family":"Liu","sequence":"first","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]},{"given":"Jiayi","family":"Tan","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]},{"given":"Yiwen","family":"Chen","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]},{"given":"Marcelo H","family":"Ang","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref3","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"Computerence"},{"key":"ref4","first-page":"19960","article-title":"Policy learning using weak supervision","volume":"34","author":"Wang","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref5","article-title":"Integrating behavior cloning and reinforcement learning for improved performance in dense and sparse reward environments","author":"Goecks","year":"2019","journal-title":"arXiv preprint"},{"key":"ref6","article-title":"Reinforcement learning from demonstration through shaping","volume-title":"Twenty-fourth international joint conference on artificial intelligence","author":"Brys","year":"2015"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00942"},{"key":"ref8","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics","author":"Ross","year":"2011"},{"key":"ref9","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019","journal-title":"arXiv preprint"},{"key":"ref10","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"International Conference on Machine Learning","author":"Fujimoto"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-021-05961-4"},{"key":"ref12","article-title":"D4r1: Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020","journal-title":"arXiv preprint"},{"key":"ref13","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"Vecerik","year":"2017","journal-title":"arXiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"issue":"1","key":"ref15","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref17","article-title":"Tractable inference for complex stochastic processes","author":"Boyen","year":"2013","journal-title":"arXiv preprint"},{"key":"ref18","article-title":"What matters for on-policy deep actor-critic methods? a large-scale study","volume-title":"International conference on learning representations","author":"Andrychowicz","year":"2021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref20","article-title":"Stabilizing off-policy q-learning via bootstrapping error reduction","volume":"32","author":"Kumar","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"ar Xiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/tnn.1998.712192"},{"key":"ref23","article-title":"Query-efficient imitation learning for end-to-end autonomous driving","author":"Zhang","year":"2016","journal-title":"arXiv preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/tits.2021.3054625"},{"key":"ref25","article-title":"Soft actor-critic algorithms and applications","author":"Haarnoja","year":"2018","journal-title":"arXiv preprint"},{"key":"ref26","article-title":"Leveraging Demonstrations for Deep Reinforcement Learning on Robotics Problems with Sparse Rewards","author":"Vecerik","year":"2017","journal-title":"arXiv e-prints"},{"key":"ref27","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International Conference on Machine Learning","author":"Fujimoto"},{"key":"ref28","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv preprint"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.049"},{"volume-title":"Openai gym","year":"2016","author":"Brockman","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.54.2084"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.5220\/0007724500590066"},{"key":"ref33","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja"}],"event":{"name":"2023 21st International Conference on Advanced Robotics (ICAR)","start":{"date-parts":[[2023,12,5]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2023,12,8]]}},"container-title":["2023 21st International Conference on Advanced Robotics (ICAR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10406297\/10406219\/10406318.pdf?arnumber=10406318","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T18:26:26Z","timestamp":1706898386000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10406318\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,5]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/icar58858.2023.10406318","relation":{},"subject":[],"published":{"date-parts":[[2023,12,5]]}}}