{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T01:00:29Z","timestamp":1780707629843,"version":"3.54.1"},"reference-count":42,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1109\/icra.2017.7989385","type":"proceedings-article","created":{"date-parts":[[2017,7,25]],"date-time":"2017-07-25T21:44:28Z","timestamp":1501019068000},"page":"3389-3396","source":"Crossref","is-referenced-by-count":951,"title":["Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates"],"prefix":"10.1109","author":[{"given":"Shixiang","family":"Gu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ethan","family":"Holly","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Timothy","family":"Lillicrap","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2009.5152577"},{"key":"ref38","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"ICLRE"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6631180"},{"key":"ref32","article-title":"Cloud-enabled humanoid robots","author":"kuffner","year":"2010","journal-title":"IEEE-RAS International Conference on Humanoid Robotics"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1177\/02783640022067878"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-011-5235-x"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICSMC.2004.1400771"},{"key":"ref35","article-title":"Col-lective robot reinforcement learning with distributed asynchronous guided policy search","author":"yahya","year":"2016","journal-title":"arXiv preprint arXiv 1610 01292"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2014.2376492"},{"key":"ref10","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"ICLRE"},{"key":"ref40","author":"tedrake","year":"0","journal-title":"Learning to walk in 20 minutes"},{"key":"ref11","article-title":"Continuous deep q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"ICML"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(92)90053-I"},{"key":"ref14","first-page":"317","article-title":"Neural fitted q in experiences with a data efficient neural reinforcement learning method","author":"riedmiller","year":"2005","journal-title":"European Conference on Machine Learning"},{"key":"ref15","article-title":"Neural reinforcement learning controllers for a real robot application","author":"hafner","year":"2007","journal-title":"ICRA"},{"key":"ref16","article-title":"Autonomous reinforcement learning on raw visual input data in a real world application","author":"riedmiller","year":"2012","journal-title":"International Joint Conference on Neural Networks"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/2463372.2463509"},{"key":"ref18","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"ICML"},{"key":"ref19","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"Journal of Machine Learning Resaerch"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","article-title":"Reinforcement learning of motor skills in high dimensions","author":"theodorou","year":"2010","journal-title":"ICRA"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2463372.2463509"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6095096"},{"key":"ref29","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"ICML"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v24i1.7727","article-title":"Relative entropy policy search","author":"peters","year":"2010","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref7","article-title":"An application of reinforcement learning to aerobatic helicopter flight","author":"abbeel","year":"2006","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364907084980"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2009.5152385"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2004.1307456"},{"key":"ref20","article-title":"PILCO: a model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"ICML"},{"key":"ref22","article-title":"Sample-based information-theoretic stochastic optimal control","author":"lioutikov","year":"2014","journal-title":"International Conference on Robotics and Automation"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139645"},{"key":"ref42","article-title":"Policy distillation","author":"rusu","year":"2016","journal-title":"ICLRE"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref41","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"ICML"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2300000021","article-title":"A survey on policy search for robotics","volume":"2","author":"deisenroth","year":"2013","journal-title":"Foundations and Trends in Robotics"},{"key":"ref26","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"1999","journal-title":"Advances in Neural Information Processing Systems (NIPS)"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"}],"event":{"name":"2017 IEEE International Conference on Robotics and Automation (ICRA)","location":"Singapore, Singapore","start":{"date-parts":[[2017,5,29]]},"end":{"date-parts":[[2017,6,3]]}},"container-title":["2017 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7960754\/7988677\/07989385.pdf?arnumber=7989385","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,24]],"date-time":"2025-06-24T17:58:15Z","timestamp":1750787895000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7989385\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icra.2017.7989385","relation":{},"subject":[],"published":{"date-parts":[[2017,5]]}}}