{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T18:31:15Z","timestamp":1770057075163,"version":"3.49.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,23]]},"DOI":"10.1109\/iros47612.2022.9981565","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T14:38:15Z","timestamp":1672065495000},"page":"9363-9370","source":"Crossref","is-referenced-by-count":9,"title":["Cloud-Edge Training Architecture for Sim-to-Real Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Hongpeng","family":"Cao","sequence":"first","affiliation":[{"name":"Technical University of Munich (TUM), School of Engineering and Design,Munich,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mirco","family":"Theile","sequence":"additional","affiliation":[{"name":"Technical University of Munich (TUM), School of Engineering and Design,Munich,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Federico G.","family":"Wyrwal","sequence":"additional","affiliation":[{"name":"Technical University of Munich (TUM), School of Engineering and Design,Munich,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marco","family":"Caccamo","sequence":"additional","affiliation":[{"name":"Technical University of Munich (TUM), School of Engineering and Design,Munich,Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","first-page":"464","article-title":"Why off-the-shelf physics simulators fail in evaluating feedback controller performance-a case study for quadrupedal robots","author":"neunert","year":"2017","journal-title":"Advances in Cooperative Robotics"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794127"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"ref31","year":"0","journal-title":"Quanser courseware and resources"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.36.823"},{"key":"ref11","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"vecerik","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref33","article-title":"Transfer from simulation to real world through learning deep inverse dynamics model","volume":"abs 1610 3518","author":"christiano","year":"2016","journal-title":"CoRR"},{"key":"ref10","first-page":"1","article-title":"Data efficient reinforcement learning for legged robots","author":"yang","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref32","author":"carlson","year":"2013","journal-title":"Redis in Action"},{"key":"ref2","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.011"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.034"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2899918"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01291"},{"key":"ref24","article-title":"Prioritized experience replay","author":"schaul","year":"2016","journal-title":"4th International Conference on Learning Representations ICLR 2016"},{"key":"ref23","article-title":"Reinforcement learning for robots using neural networks","author":"lin","year":"1993","journal-title":"tech rep Carnegie-Mellon Univ Pittsburgh PA School of Computer Science"},{"key":"ref26","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref25","article-title":"A deeper look at experience replay","author":"zhang","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref20","article-title":"Continuous control with deep rein-forcement learning","author":"lillicrap","year":"2016","journal-title":"4th International Conference on Learning Representations ICLR 2016"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref21","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21236\/ADA373286"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-0676-1"},{"key":"ref29","author":"brockman","year":"2016","journal-title":"OpenAI Gym"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref7","first-page":"651","article-title":"Scalable deep reinforcement learning for vision-based robotic manipulation","author":"kalashnikov","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref9","article-title":"Learning to adapt in dynamic, real-world environments through meta-reinforcement learning","author":"nagabandi","year":"2019","journal-title":"7th International Conference on Learning Representations ICLR 2019"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794102"},{"key":"ref6","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1177\/0278364920987859"}],"event":{"name":"2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Kyoto, Japan","start":{"date-parts":[[2022,10,23]]},"end":{"date-parts":[[2022,10,27]]}},"container-title":["2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9981026\/9981028\/09981565.pdf?arnumber=9981565","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,29]],"date-time":"2023-11-29T18:00:10Z","timestamp":1701280810000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9981565\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/iros47612.2022.9981565","relation":{},"subject":[],"published":{"date-parts":[[2022,10,23]]}}}