{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,17]],"date-time":"2026-06-17T16:19:39Z","timestamp":1781713179076,"version":"3.54.5"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100020194","name":"Wellcome \/ EPSRC Centre for Interventional and Surgical Sciences","doi-asserted-by":"publisher","award":["203145\/Z\/16\/Z"],"award-info":[{"award-number":["203145\/Z\/16\/Z"]}],"id":[{"id":"10.13039\/501100020194","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000266","name":"Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","award":["EP\/S021566\/1"],"award-info":[{"award-number":["EP\/S021566\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000266","name":"Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","award":["EP\/P012841\/1"],"award-info":[{"award-number":["EP\/P012841\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]},{"name":"UKRI Future Leaders Fellowship","award":["MR\/V025333\/1"],"award-info":[{"award-number":["MR\/V025333\/1"]}]},{"name":"Royal Academy of Engineering Chair in Emerging Technologies Scheme"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/lra.2023.3266720","type":"journal-article","created":{"date-parts":[[2023,4,12]],"date-time":"2023-04-12T17:34:47Z","timestamp":1681320887000},"page":"3326-3333","source":"Crossref","is-referenced-by-count":18,"title":["Learning Needle Pick-and-Place Without Expert Demonstrations"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2801-1743","authenticated-orcid":false,"given":"Rokas","family":"Bendikas","sequence":"first","affiliation":[{"name":"Surgical Robot Vision Group, WEISS, Department of Computer Science, University College London, London, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5177-428X","authenticated-orcid":false,"given":"Valerio","family":"Modugno","sequence":"additional","affiliation":[{"name":"Robot Perception and Learning Lab, Department of Computer Science, University College London, London, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3684-1472","authenticated-orcid":false,"given":"Dimitrios","family":"Kanoulas","sequence":"additional","affiliation":[{"name":"Robot Perception and Learning Lab, Department of Computer Science, University College London, London, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4609-1177","authenticated-orcid":false,"given":"Francisco","family":"Vasconcelos","sequence":"additional","affiliation":[{"name":"Surgical Robot Vision Group, WEISS, Department of Computer Science, University College London, London, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0980-3227","authenticated-orcid":false,"given":"Danail","family":"Stoyanov","sequence":"additional","affiliation":[{"name":"Surgical Robot Vision Group, WEISS, Department of Computer Science, University College London, London, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Efficient adaptation for end-to-end vision-based robotic manipulation","author":"Julian","year":"2020"},{"issue":"1","key":"ref2","first-page":"1334","article-title":"End-to-End training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974707"},{"key":"ref4","article-title":"Dealing with sparse rewards in reinforcement learning","author":"Hare","year":"2019"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561673"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812393"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.3031236"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN47096.2020.9223543"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45622-8_16"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9635867"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160327"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461200"},{"key":"ref13","first-page":"1026","article-title":"Object-oriented curriculum generation for reinforcement learning","volume-title":"Proc. 17th Int. Conf. Auton. Agents MultiAgent Syst., ser. AAMAS 18. Richland, SC: Int. Found. Auton. Agents Multiagent Syst.","author":"Silva","year":"2018"},{"key":"ref14","first-page":"6434","article-title":"An open-source research kit for the da Vinci Surgical System","volume-title":"Proc. IEEE Int. Conf. Robot. Automat","author":"Kazanzides","year":"2014"},{"issue":"4","key":"ref15","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1109\/MRA.2021.3101646","article-title":"Accelerating surgical robotics research: A review of 10 years with the da Vinci research kit","volume":"28","author":"DEttorre","year":"2021","journal-title":"IEEE Robot. Automat. Mag."},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-062420-090543"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CASE49439.2021.9551569"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989278"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2913082"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3171795"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ISMR.2019.8710194"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-019-01955-9"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1142\/9789813232266_0010"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ISMR48331.2020.9312930"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3453160"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01611-x"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207427"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2018.8489712"},{"key":"ref30","first-page":"91","article-title":"Learning to walk in minutes using massively parallel deep reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Rudin","year":"2022"},{"key":"ref31","article-title":"Hindsight experience replay","volume":"30","author":"Andrychowicz","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref32","article-title":"Policy distillation","author":"Rusu","year":"2015"},{"key":"ref33","article-title":"An empirical investigation of catastrophic forgetting in gradient-based neural networks","author":"Goodfellow","year":"2013"},{"key":"ref34","first-page":"1633","article-title":"Transfer learning for reinforcement learning domains: A survey","volume":"10","author":"Taylor","year":"2009","journal-title":"J. Mach. Learn. Res."},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74958-5_20"},{"key":"ref36","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lillicrap","year":"2016"},{"key":"ref37","first-page":"561","article-title":"Benchmarking reinforcement learning algorithms on real-world robots","volume-title":"Proc. 2nd Conf. Robot Learn.","author":"Mahmood","year":"2018"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561249"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2022.106004"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/10102643\/10100702.pdf?arnumber=10100702","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T18:37:58Z","timestamp":1723055878000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10100702\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":39,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/lra.2023.3266720","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6]]}}}