{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:14:13Z","timestamp":1740100453345,"version":"3.37.3"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,27]]},"DOI":"10.1109\/iros51168.2021.9636842","type":"proceedings-article","created":{"date-parts":[[2021,12,16]],"date-time":"2021-12-16T20:45:38Z","timestamp":1639687538000},"page":"3971-3978","source":"Crossref","is-referenced-by-count":0,"title":["Trajectory-based Split Hindsight Reverse Curriculum Learning"],"prefix":"10.1109","author":[{"given":"Jiaxi","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dianmin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shanlin","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hong","family":"Qiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Sim2real view invariant visual servoing by recurrent control","year":"2017","author":"sadeghi","key":"ref31"},{"key":"ref30","first-page":"334","article-title":"Transferring end-to-end visuomotor control from simulation to real world for a multi-stage task","author":"james","year":"2017","journal-title":"Conference on Robot Learning"},{"key":"ref10","first-page":"267","article-title":"Approximately optimal approximate reinforcement learning","author":"kakade","year":"2002","journal-title":"International Conference on Machine Learning"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1177\/0278364910369189"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917712421"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1177\/02783649922066385"},{"key":"ref14","first-page":"482","article-title":"Reverse curriculum generation for reinforcement learning","author":"florensa","year":"2017","journal-title":"Conference on Robot Learning"},{"article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","year":"2017","author":"vecerik","key":"ref15"},{"key":"ref16","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"International Conference on Artificial Intelligence and Statistics"},{"key":"ref17","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref19","first-page":"113","article-title":"Energy-based hindsight experience prioritization","author":"zhao","year":"2018","journal-title":"Conference on Robot Learning"},{"journal-title":"Learning OpenCV Computer Vision With the OpenCV Library","year":"2008","author":"bradski","key":"ref28"},{"article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","year":"2017","author":"popov","key":"ref4"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref3","first-page":"734","article-title":"Sim-to-real reinforcement learning for deformable object manipulation","author":"matas","year":"2018","journal-title":"Conference on Robot Learning"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2019.2917392"},{"key":"ref5","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref8","first-page":"5055","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in Neural Information Processing Systems CAI"},{"article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793789"},{"article-title":"Qtopt: Scalable deep reinforcement learning for vision-based robotic manipulation","year":"2018","author":"kalashnikov","key":"ref1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref20","first-page":"1515","article-title":"Automatic goal generation for reinforcement learning agents","author":"florensa","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref22","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref21","first-page":"1","article-title":"Learning symmetric and low-energy locomotion","volume":"37","author":"yu","year":"2018","journal-title":"ACM Transactions on Graphics"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"article-title":"Openai baselines","year":"2017","author":"dhariwal","key":"ref25"}],"event":{"name":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2021,9,27]]},"location":"Prague, Czech Republic","end":{"date-parts":[[2021,10,1]]}},"container-title":["2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9635848\/9635849\/09636842.pdf?arnumber=9636842","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:54:45Z","timestamp":1652201685000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9636842\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,27]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/iros51168.2021.9636842","relation":{},"subject":[],"published":{"date-parts":[[2021,9,27]]}}}