{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:52:23Z","timestamp":1775065943984,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,5]]},"DOI":"10.1109\/icra.2018.8460756","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T22:28:03Z","timestamp":1537568883000},"page":"6244-6251","source":"Crossref","is-referenced-by-count":121,"title":["Composable Deep Reinforcement Learning for Robotic Manipulation"],"prefix":"10.1109","author":[{"given":"Tuomas","family":"Haarnoja","sequence":"first","affiliation":[]},{"given":"Vitchyr","family":"Pong","sequence":"additional","affiliation":[]},{"given":"Aurick","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Murtaza","family":"Dalal","sequence":"additional","affiliation":[]},{"given":"Pieter","family":"Abbeel","sequence":"additional","affiliation":[]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref33","first-page":"2370","article-title":"Stein variational gradient descent: A general purpose bayesian inference algorithm","author":"liu","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1088\/1742-5468\/2005\/11\/P11011"},{"key":"ref31","first-page":"1369","article-title":"Linearly-solvable Markov decision problems","author":"todorov","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref30","author":"nachum","year":"2017","journal-title":"Trust-PCL An off-policy trust region method for continuous control"},{"key":"ref37","author":"haarnoja","year":"2018","journal-title":"Soft actor-critic Off-policy maximum entropy deep reinforcement learning with a stochastic actor"},{"key":"ref36","first-page":"1856","article-title":"Compositionality of optimal control laws","author":"todorov","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref35","author":"lillicrap","year":"2015","journal-title":"Continuous control with deep reinforcement learning"},{"key":"ref34","author":"wang","year":"2016","journal-title":"Learning to draw samples With application to amortized mle for generative adversarial learning"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2006.282564"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2008.02.003"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.2174\/1573399812666160613113556"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206046"},{"key":"ref14","author":"zhang","year":"2015","journal-title":"Towards vision-based deep reinforcement learning for robotic motion control"},{"key":"ref15","author":"sadeghi","year":"2016","journal-title":"(CAD)2RL Real single-image flight without a single real image"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref17","first-page":"5055","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref18","volume":"abs 1707 2267","author":"james","year":"2017","journal-title":"Transferring end-to-end visuomotor control from simulation to real world for a multistage task"},{"key":"ref19","article-title":"Visual servoing from deep neural networks","volume":"abs 1705 8940","author":"bateux","year":"2017","journal-title":"CoRR"},{"key":"ref28","first-page":"2772","article-title":"Bridging the gap between value and policy based reinforcement learning","author":"nachum","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref4","author":"rusu","year":"2016","journal-title":"Progressive neural networks"},{"key":"ref27","article-title":"Taming the noise in reinforcement learning via soft updates","author":"fox","year":"2016","journal-title":"Conf on Uncertainty in Artificial Intelligence"},{"key":"ref3","author":"vecerfk","year":"2017","journal-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards"},{"key":"ref6","first-page":"1433","article-title":"Maxi-mum entropy inverse reinforcement learning","author":"ziebart","year":"2008","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref29","author":"schulman","year":"2017","journal-title":"Equivalence between policy gradients and soft Q-leaming"},{"key":"ref5","author":"james","year":"2017","journal-title":"Transferring end-to-end visuomotor control from simulation to real world for a multi-stage task"},{"key":"ref8","first-page":"1547","article-title":"Learning attractor landscapes for learning motor primitives","author":"ijspeert","year":"2003","journal-title":"Advances in neural information processing systems"},{"key":"ref7","author":"haarnoja","year":"2017","journal-title":"Reinforce-ment learning with deep energy-based policies"},{"key":"ref2","author":"finn","year":"2016","journal-title":"A connection between generative adversarial networks inverse reinforcement learning and energy-based models"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1162\/NECO_a_00393"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"1521","DOI":"10.1163\/156855307782148550","article-title":"Reinforcement learning for imitating constrained reaching movements","volume":"21","author":"guenter","year":"2007","journal-title":"Advanced Robotics"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509336"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2009.5152385"},{"key":"ref24","first-page":"4286","article-title":"General duality between optimal control and estimation","author":"todorov","year":"2008","journal-title":"IEEE Conf on Decision and Control"},{"key":"ref23","first-page":"2829","article-title":"Continuous deep Q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"Int Conf on Machine Learning"},{"key":"ref26","first-page":"1049","article-title":"Robot trajectory optimization using approximate inference","author":"toussaint","year":"2009","journal-title":"Int Conf on Machine Learning"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2012.VIII.045"}],"event":{"name":"2018 IEEE International Conference on Robotics and Automation (ICRA)","location":"Brisbane, QLD","start":{"date-parts":[[2018,5,21]]},"end":{"date-parts":[[2018,5,25]]}},"container-title":["2018 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8449910\/8460178\/08460756.pdf?arnumber=8460756","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T00:42:57Z","timestamp":1598229777000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8460756\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icra.2018.8460756","relation":{},"subject":[],"published":{"date-parts":[[2018,5]]}}}