{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T05:55:00Z","timestamp":1775109300308,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,5]]},"DOI":"10.1109\/icra.2018.8461039","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T22:28:03Z","timestamp":1537568883000},"page":"6284-6291","source":"Crossref","is-referenced-by-count":157,"title":["Deep Reinforcement Learning for Vision-Based Robotic Grasping: A Simulated Comparative Evaluation of Off-Policy Methods"],"prefix":"10.1109","author":[{"given":"Deirdre","family":"Quillen","sequence":"first","affiliation":[]},{"given":"Eric","family":"Jang","sequence":"additional","affiliation":[]},{"given":"Ofir","family":"Nachum","sequence":"additional","affiliation":[]},{"given":"Chelsea","family":"Finn","sequence":"additional","affiliation":[]},{"given":"Julian","family":"Ibarz","sequence":"additional","affiliation":[]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref38","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref33","article-title":"Supersizing self-supervision: Learning to grasp from 50k tries and 700 robot hours","author":"pinto","year":"2016","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref32","article-title":"Grasp pose detection in point clouds","author":"pas","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref31","article-title":"PGQ: Combining policy gradient and Q-learning","author":"o'donoghue","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref30","article-title":"Trust-pcl: An off-policy trust region method for continuous control","author":"nachum","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref37","article-title":"(cad)$?2$rl: Real single-image flight without a single real image","author":"sadeghi","year":"2016","journal-title":"CoRR abs\/1611 04201"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1177\/0278364912442972"},{"key":"ref35","article-title":"Neural fitted q iteration-first experiences with a data efficient neural reinforcement learning method","author":"riedmiller","year":"2005","journal-title":"European Conference on Machine Learning (ECML)"},{"key":"ref34","article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","author":"popov","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref10","article-title":"Deep spatial autoencoders for visuomotor learning","author":"finn","year":"2016","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref40","article-title":"Domain randomization for transferring deep neural networks from simulation to the real world","author":"tobin","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref11","article-title":"Deep predictive policy training using reinforcement learning","author":"ghadirzadeh","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref12","article-title":"The columbia grasp database","author":"goldfeder","year":"2009","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref13","article-title":"Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates","author":"gu","year":"2017","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref14","article-title":"Q-prop: Sample-efficient policy gradient with an off-policy critic","author":"gu","year":"2017","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref15","article-title":"Continuous deep Q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref16","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-013-9366-8"},{"key":"ref18","article-title":"Reproducibility of benchmarked deep reinforcement learning tasks for continuous control","author":"islam","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref19","article-title":"3d simulation for robot arm control with deep q-learning","author":"james","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref28","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref4","article-title":"Combining model-based and model-free updates for trajectory-centric reinforcement learning","author":"chebotar","year":"2017","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref27","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref3","article-title":"Openai gym","author":"brockman","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref6","author":"coumans","year":"2016","journal-title":"pybullet a python module for physics simulation games robotics and machine learning"},{"key":"ref29","article-title":"Bridging the gap between value and policy based reinforcement learning","author":"nachum","year":"2017","journal-title":"Neural Information Processing Systems (NIPS)"},{"key":"ref5","article-title":"Path integral guided policy search","author":"chebotar","year":"2017","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref8","article-title":"Unsupervised learning for physical interaction through video prediction","author":"finn","year":"2016","journal-title":"Neural Information Processing Systems (NIPS)"},{"key":"ref7","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1109\/TRO.2013.2289018","article-title":"Data-driven grasp synthesisa survey","author":"bohg","year":"2014","journal-title":"Transactions on Robotics"},{"key":"ref9","article-title":"Deep visual foresight for planning robot motion","author":"finn","year":"2017","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref46","article-title":"Towards vision-based deep reinforcement learning for robotic motion control","author":"zhang","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref20","article-title":"Leveraging big data for grasp planning","author":"kappler","year":"2015","journal-title":"International Conference on Robotics and Automation (ICRA)"},{"key":"ref45","article-title":"Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation","author":"wu","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1177\/0278364914549607"},{"key":"ref21","article-title":"Autonomous reinforcement learning on raw visual input data in a real world application","author":"lange","year":"2012","journal-title":"International Joint Conference on Neural Networks (IJCNN)"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref24","article-title":"Learning hand-eye coordination for robotic grasping with deep learning and large-scale data collection","author":"levine","year":"2016","journal-title":"The International Journal of Robotics Research (IJRR)"},{"key":"ref41","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v30i1.10295","article-title":"Deep reinforcement learning with double q-learning","author":"van hasselt","year":"2016","journal-title":"AAAI"},{"key":"ref23","article-title":"End-to-end training of deep visuomotor policies","author":"levine","year":"2016","journal-title":"Journal of Machine Learning Research (JMLR)"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref26","article-title":"Dex-net 2.0: Deep learning to plan robust grasps with synthetic point clouds and analytic grasp metrics","author":"mahler","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2012.6224697"},{"key":"ref25","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"International Conference on Learning Representations (ICLR)"}],"event":{"name":"2018 IEEE International Conference on Robotics and Automation (ICRA)","location":"Brisbane, QLD","start":{"date-parts":[[2018,5,21]]},"end":{"date-parts":[[2018,5,25]]}},"container-title":["2018 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8449910\/8460178\/08461039.pdf?arnumber=8461039","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,5]],"date-time":"2023-09-05T09:08:13Z","timestamp":1693904893000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8461039\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icra.2018.8461039","relation":{},"subject":[],"published":{"date-parts":[[2018,5]]}}}