{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T16:10:15Z","timestamp":1774023015947,"version":"3.50.1"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2020,10,1]],"date-time":"2020-10-01T00:00:00Z","timestamp":1601510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,10,1]],"date-time":"2020-10-01T00:00:00Z","timestamp":1601510400000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,10,1]],"date-time":"2020-10-01T00:00:00Z","timestamp":1601510400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,10,1]],"date-time":"2020-10-01T00:00:00Z","timestamp":1601510400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"NSF NRI","award":["1637949"],"award-info":[{"award-number":["1637949"]}]},{"name":"NSF NRI","award":["1763705"],"award-info":[{"award-number":["1763705"]}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-17-1-2124"],"award-info":[{"award-number":["N00014-17-1-2124"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2020,10]]},"DOI":"10.1109\/lra.2020.3015448","type":"journal-article","created":{"date-parts":[[2020,8,11]],"date-time":"2020-08-11T21:33:59Z","timestamp":1597181639000},"page":"6724-6731","source":"Crossref","is-referenced-by-count":47,"title":["\u201cGood Robot!\u201d: Efficient Reinforcement Learning for Multi-Step Visual Tasks with Sim to Real Transfer"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2023-1810","authenticated-orcid":false,"given":"Andrew","family":"Hundt","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2511-7929","authenticated-orcid":false,"given":"Benjamin","family":"Killeen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nicholas","family":"Greene","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6442-8159","authenticated-orcid":false,"given":"Hongtao","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Heeyeon","family":"Kwon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1009-5982","authenticated-orcid":false,"given":"Chris","family":"Paxton","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6662-9763","authenticated-orcid":false,"given":"Gregory D.","family":"Hager","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","author":"ng","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref38","first-page":"515","article-title":"Learning deep policies for robot bin picking by simulating robust grasping sequences","author":"mahler","year":"2017","journal-title":"Conference on Robot Learning"},{"key":"ref33","article-title":"Ai safety gridworlds","author":"leike","year":"2017"},{"key":"ref32","article-title":"Concrete problems in ai safety","author":"amodei","year":"2016"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989250"},{"key":"ref30","first-page":"166","article-title":"Modular multitask reinforcement learning with policy sketches","author":"andreas","year":"0","journal-title":"Proc 34th Int Conf Mach Learn -Volume 70"},{"key":"ref37","first-page":"734","article-title":"Sim-to-real reinforcement learning for deformable object manipulation","author":"matas","year":"2018","journal-title":"Conference on Robot Learning"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.009"},{"key":"ref35","first-page":"3215","article-title":"RainBow: Combining improvements in deep reinforcement learning","author":"hessel","year":"0","journal-title":"Proc AAAI"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/768"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914549607"},{"key":"ref40","article-title":"Prioritized experience replay","author":"schaul","year":"2016","journal-title":"ICLR 2016 Int Conf Learning Representations 2016"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.021"},{"key":"ref12","article-title":"6-DoF grasping for target-driven object manipulation in clutter","author":"murali","year":"2020","journal-title":"Int Conf Robot Autom"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759557"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461196"},{"key":"ref15","first-page":"430","article-title":"Learning physical intuition of block towers by example","author":"lerer","year":"2016","journal-title":"Int Conf Mach Learn"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_43"},{"key":"ref17","first-page":"64","article-title":"Unsupervised learning for physical interaction through video prediction","author":"finn","year":"0","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989023"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.058"},{"key":"ref28","first-page":"2935","article-title":"Playing hard exploration games by watching youtube","author":"aytar","year":"0","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917710318"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460689"},{"key":"ref3","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"Journal of Machine Learning Research"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139361"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967784"},{"key":"ref5","article-title":"QT-OPT: Scalable deep reinforcement learning for vision-based robotic manipulation","author":"kalashnikov","year":"2018","journal-title":"CoRL"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202237"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540108"},{"key":"ref2","article-title":"A review of robot learning for manipulation: Challenges, representations, and algorithms","author":"kroemer","year":"2019"},{"key":"ref9","first-page":"119","article-title":"End-to-end learning of semantic grasping","author":"jang","year":"0","journal-title":"CoRL"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593986"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau4984"},{"key":"ref22","article-title":"Modular deep Q networks for sim-to-real transfer of visuo-motor policies","author":"zhang","year":"2017","journal-title":"ACRA"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00299"},{"key":"ref42","article-title":"Minimalistic gridworld environment for openai gym","author":"chevalier-boisvert","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref41","article-title":"Scalable deep reinforcement learning for vision-based robotic manipulation","author":"kalashnikov","year":"0","journal-title":"Proc Conf Robot Learn"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.004"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594127"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.009"},{"key":"ref43","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"0","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460875"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7083369\/9133350\/9165109-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/9133350\/09165109.pdf?arnumber=9165109","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:33:58Z","timestamp":1651080838000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9165109\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10]]},"references-count":44,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/lra.2020.3015448","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,10]]}}}