{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:23:36Z","timestamp":1766067816983,"version":"3.37.3"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,7,8]],"date-time":"2023-07-08T00:00:00Z","timestamp":1688774400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,7,8]],"date-time":"2023-07-08T00:00:00Z","timestamp":1688774400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"publisher","award":["61973294"],"award-info":[{"award-number":["61973294"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,7,8]]},"DOI":"10.1109\/icarm58088.2023.10218831","type":"proceedings-article","created":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T17:17:20Z","timestamp":1692983840000},"page":"185-190","source":"Crossref","is-referenced-by-count":3,"title":["SIRL: Self-Imitation Reinforcement Learning for Single-step Hitting Tasks"],"prefix":"10.1109","author":[{"given":"Yongle","family":"Luo","sequence":"first","affiliation":[{"name":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuxin","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kun","family":"Dong","sequence":"additional","affiliation":[{"name":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Sun","sequence":"additional","affiliation":[{"name":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Song","sequence":"additional","affiliation":[{"name":"Institute of Intelligent Machines, Hefei Institute of Physical Science, CAS,Hefei,China,230031"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref35","article-title":"robosuite: A modular simulation framework and benchmark for robot learning","author":"zhu","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560764"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.02.090"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICMA52036.2021.9512787"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref14","first-page":"1","article-title":"Optimal stroke learning with policy gradient approach for robotic table tennis","author":"gao","year":"2022","journal-title":"Applied Intelligence"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevFluids.6.053902"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1063\/5.0097241"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-89177-0_2"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9101742"},{"journal-title":"OpenAI Gym","year":"2016","author":"brockman","key":"ref32"},{"journal-title":"Self-imitation learning via trajectory-conditioned policy for hard-exploration tasks","year":"2019","author":"guo","key":"ref10"},{"key":"ref2","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3113709"},{"key":"ref16","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CASE48305.2020.9249227"},{"key":"ref18","article-title":"Efficient grasp detection network with gaussian-based grasp representation for robotic manipulation","author":"cao","year":"2022","journal-title":"IEEE\/ASME Transactions on Mechatronics"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN55064.2022.9892776"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341191"},{"key":"ref26","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"vecerik","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref25","first-page":"6818","article-title":"Imitation learning from imperfect demonstration","author":"wu","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abb9764"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3093340"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2022.3176207"},{"key":"ref28","article-title":"Self-imitation advantage learning","author":"ferret","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref27","article-title":"Imitating past successes can be very suboptimal","author":"eysenbach","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2020.110080"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref7","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref9","first-page":"3878","article-title":"Self-imitation learning","author":"oh","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref3","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202244"},{"key":"ref5","first-page":"1","article-title":"Residual policy learning facilitates efficient modelfree autonomous racing","author":"zhang","year":"2022","journal-title":"IEEE l of Robotics and Automation"}],"event":{"name":"2023 International Conference on Advanced Robotics and Mechatronics (ICARM)","start":{"date-parts":[[2023,7,8]]},"location":"Sanya, China","end":{"date-parts":[[2023,7,10]]}},"container-title":["2023 International Conference on Advanced Robotics and Mechatronics (ICARM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10218726\/10218397\/10218831.pdf?arnumber=10218831","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,25]],"date-time":"2023-09-25T17:49:58Z","timestamp":1695664198000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10218831\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,8]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/icarm58088.2023.10218831","relation":{},"subject":[],"published":{"date-parts":[[2023,7,8]]}}}