{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T09:54:06Z","timestamp":1730195646891,"version":"3.28.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/aim55361.2024.10637217","type":"proceedings-article","created":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T17:52:35Z","timestamp":1724349155000},"page":"697-702","source":"Crossref","is-referenced-by-count":0,"title":["Solving Challenging Tasks with Manipulation Policy with Limited Demonstrations"],"prefix":"10.1109","author":[{"given":"Haofeng","family":"Liu","sequence":"first","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]},{"given":"Jiayi","family":"Tan","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]},{"given":"Yanchun","family":"Cheng","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]},{"given":"Yiwen","family":"Chen","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]},{"given":"Haiyue","family":"Zhu","sequence":"additional","affiliation":[{"name":"Agency for Science, Technology and Research (A*STAR)"}]},{"given":"Marcelo H","family":"Ang","sequence":"additional","affiliation":[{"name":"National University of Singapore,Department of Mechanical Engineering"}]}],"member":"263","reference":[{"key":"ref1","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume":"34","author":"Fujimoto","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref2","article-title":"Learning complicated manipulation skills via deterministic policy with limited demonstrations","author":"Haofeng","year":"2023","journal-title":"arXiv:2303.16469"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref5","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"Computer ence"},{"key":"ref6","first-page":"19960","article-title":"Policy learning using weak supervision","volume":"34","author":"Wang","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","article-title":"Integrating behavior cloning and reinforcement learning for improved performance in dense and sparse reward environments","author":"Goecks","year":"2019","journal-title":"arXiv:1910.04281"},{"article-title":"Reinforcement learning from demonstration through shaping","volume-title":"Twenty-fourth international joint conference on artificial intelligence","author":"Brys","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00942"},{"key":"ref10","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics","author":"Ross"},{"key":"ref11","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"International Conference on Machine Learning","author":"Fujimoto"},{"key":"ref12","article-title":"Challenges of real-world reinforcement learning","author":"Dulac-Arnold","year":"2019","journal-title":"arXiv:1904.12901"},{"key":"ref13","article-title":"D4rl Datasets for deep data-driven reinforcement learning","author":"Fu","year":"2020","journal-title":"arXiv:2004.07219"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"issue":"1","key":"ref16","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref17","article-title":"Tractable inference for complex stochastic processes","author":"Boyen","year":"2013","journal-title":"arXiv preprint arXiv:1301.7362"},{"key":"ref18","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International Conference on Machine Learning","author":"Fujimoto"},{"key":"ref19","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv preprint arXiv:1509.02971"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref21","article-title":"Leveraging Demonstrations for Deep Reinforcement Learning on Robotics Prob lems with Sparse Rewards","author":"Vecerik","year":"2017","journal-title":"arXiv e-prints, p"},{"key":"ref22","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019","journal-title":"arXiv preprint arXiv:1911.11361"},{"key":"ref23","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"arXiv preprint arXiv:2005.01643"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2018.xiv.049"},{"article-title":"Openai gym","year":"2016","author":"Brockman","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.54.2084"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.5220\/0007724500590066"},{"key":"ref28","first-page":"7436","article-title":"Uncertainty-based offline reinforcement learning with diversified q-ensemble","volume":"34","author":"An","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref29","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume":"34","author":"Chen","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref30","article-title":"Awac: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020","journal-title":"arXiv preprint arXiv:2006.09359"},{"key":"ref31","article-title":"Offline reinforcement learning with implicit q-learning","author":"Kostrikov","year":"2021","journal-title":"arXiv preprint arXiv:2110.06169"},{"key":"ref32","first-page":"1861","article-title":"Soft actor-critic: Off policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja"}],"event":{"name":"2024 IEEE\/ASME International Conference on Advanced Intelligent Mechatronics (AIM)","start":{"date-parts":[[2024,7,15]]},"location":"Boston, MA, USA","end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Advanced Intelligent Mechatronics (AIM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10636941\/10636942\/10637217.pdf?arnumber=10637217","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T06:12:41Z","timestamp":1725430361000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10637217\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/aim55361.2024.10637217","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}