{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T08:24:24Z","timestamp":1765268664727,"version":"3.37.3"},"reference-count":29,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61873045"],"award-info":[{"award-number":["61873045"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20180190"],"award-info":[{"award-number":["BK20180190"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Dalian Sci & Tech Innovation Foundation Program","award":["2019J12GX043"],"award-info":[{"award-number":["2019J12GX043"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["DUT19JC56"],"award-info":[{"award-number":["DUT19JC56"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/access.2020.3001130","type":"journal-article","created":{"date-parts":[[2020,6,9]],"date-time":"2020-06-09T21:09:41Z","timestamp":1591736981000},"page":"108429-108437","source":"Crossref","is-referenced-by-count":26,"title":["A Reinforcement Learning-Based Framework for Robot Manipulation Skill Acquisition"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3988-7879","authenticated-orcid":false,"given":"Dong","family":"Liu","sequence":"first","affiliation":[]},{"given":"Zitu","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Binpeng","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Ming","family":"Cong","sequence":"additional","affiliation":[]},{"given":"Honghua","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Qiang","family":"Zou","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2932257"},{"key":"ref11","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2015","journal-title":"J Mach Learn Res"},{"key":"ref12","article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","author":"popov","year":"2017","journal-title":"arXiv 1704 03073"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref15","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref16","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"article-title":"Meta learning shared hierarchies","year":"0","author":"frans","key":"ref17"},{"article-title":"Hierarchical imitation and reinforcement learning","year":"0","author":"le","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6094992"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref27","first-page":"1","article-title":"Qt-opt: Scalable deep reinforcement learning for vision-based robotic manipulation","author":"kalashnikov","year":"2018","journal-title":"Proc Conf Robot Learn"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00833"},{"key":"ref6","first-page":"2834","article-title":"Reinforcement learning: Survey of recent work","volume":"27","author":"chen","year":"2010","journal-title":"Appl Res Comput"},{"key":"ref29","article-title":"Emergence of locomotion behaviours in rich environments","author":"heess","year":"2017","journal-title":"arXiv 1707 02286"},{"key":"ref5","first-page":"86","article-title":"Research on reinforcement learning technology: A review","volume":"30","author":"gao","year":"2004","journal-title":"Acta Autom Sinica"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2872693"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386153"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-05431-5_11"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509152"},{"key":"ref20","first-page":"5692","article-title":"A Meta-MDP approach to exploration for lifelong reinforcement learning","author":"garcia and","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.048"},{"article-title":"Reinforcement learning and the reward engineering principle","year":"0","author":"dewey","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2015.XI.018"},{"key":"ref23","article-title":"Deep learning for reward design to improve Monte Carlo tree search in ATARI games","author":"guo","year":"2016","journal-title":"arXiv 1604 07095"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593702"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.023"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8948470\/09112186.pdf?arnumber=9112186","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T01:08:30Z","timestamp":1641949710000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9112186\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/access.2020.3001130","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2020]]}}}