{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T20:34:52Z","timestamp":1780605292462,"version":"3.54.1"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610297","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"7772-7779","source":"Crossref","is-referenced-by-count":2,"title":["Mastering Stacking of Diverse Shapes with Large-Scale Iterative Reinforcement Learning on Real Robots"],"prefix":"10.1109","author":[{"given":"Thomas","family":"Lampe","sequence":"first","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Abbas","family":"Abdolmaleki","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sarah","family":"Bechtle","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sandy H.","family":"Huang","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jost","family":"Tobias Springenberg","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Michael","family":"Bloesch","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Oliver","family":"Groth","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Roland","family":"Hafner","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tim","family":"Hertweck","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Michael","family":"Neunert","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Markus","family":"Wulfmeier","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jingwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Francesco","family":"Nori","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nicolas","family":"Heess","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Martin","family":"Riedmiller","sequence":"additional","affiliation":[{"name":"Google DeepMind,London,N1C4AG"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.009"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197326"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197108"},{"key":"ref4","article-title":"Beyond pick-and-place: Tackling robotic stacking of diverse shapes","volume-title":"5th Annual Conference on Robot Learning","author":"Lee"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adi8022"},{"key":"ref6","article-title":"Imitate and repurpose: Learning reusable robot movement skills from human and animal behaviors","author":"Bohez","year":"2022"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.051"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.025"},{"key":"ref9","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","author":"Brohan","year":"2023"},{"key":"ref10","article-title":"Open-world object manipulation using pre-trained vision-language models","author":"Stone","year":"2023"},{"key":"ref11","article-title":"Robocat: A self-improving foundation agent for robotic manipulation","author":"Bousmalis","year":"2023"},{"key":"ref12","article-title":"Collect & infer \u2013 a fresh look at data-efficient reinforcement learnin","author":"Riedmiller","year":"2021"},{"key":"ref13","first-page":"4344","article-title":"Learning by playing solving sparse reward tasks from scratch","volume-title":"International conference on machine learning","author":"Riedmiller"},{"key":"ref14","article-title":"A distributional perspective on reinforcement learning","volume-title":"International conference on machine learning","author":"Bellemare"},{"key":"ref15","article-title":"Learning continuous control policies by stochastic value gradients","author":"Heess","year":"2015","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref16","article-title":"Maximum a Posteriori policy optimisation","volume-title":"International Conference on Learning Representations","author":"Abdolmaleki"},{"key":"ref17","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International Conference on Machine Learning","author":"Haarnoja"},{"key":"ref18","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_2"},{"key":"ref20","article-title":"Offline reinforcement learning: Tutorial, review","volume":"5","author":"Levine","year":"2020","journal-title":"and Perspectives on Open Problems"},{"key":"ref21","first-page":"7768","article-title":"Critic regularized regression","volume":"33","author":"Wang","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref22","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref23","article-title":"Qt-opt: Scalable deep reinforcement learning for vision-based robotic manipulation","author":"Kalashnikov","year":"2018"},{"key":"ref24","article-title":"Actionable models: Unsupervised offline reinforcement learning of robotic skills","author":"Chebotar","year":"2021"},{"key":"ref25","article-title":"Scaling data-driven robotics with reward sketching and batch reinforcement learning","author":"Cabi","year":"2019"},{"key":"ref26","article-title":"Benchmarks and algorithms for offline preference-based reward learning","author":"Shin","year":"2023","journal-title":"Transactions on Machine Learning Research"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981126"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907421"},{"key":"ref29","article-title":"Compositional transfer in hierarchical reinforcement learning","author":"Wulfmeier","year":"2020","journal-title":"Robotics: Science and Systems XVI"},{"key":"ref30","article-title":"Distral: Robust multitask reinforcement learning","volume":"30","author":"Teh","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref31","article-title":"Policy distillation","author":"Rusu","year":"2015"},{"issue":"1","key":"ref32","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref33","article-title":"Actor-mimic: Deep multitask and transfer reinforcement learning","author":"Parisotto","year":"2015"},{"key":"ref34","article-title":"Divide-and-conquer reinforcement learning","author":"Ghosh","year":"2017"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-012-9290-3"},{"key":"ref36","article-title":"Learning parameterized skills","author":"Da Silva","year":"2012"},{"key":"ref37","article-title":"Scaling up multi-task robotic reinforcement learning","volume-title":"5th Annual Conference on Robot Learning","author":"Kalashnikov"},{"key":"ref38","article-title":"Sawyer robot","author":"Robotics","year":"2023"},{"key":"ref39","article-title":"Simple sensor intentions for exploration","volume-title":"CoRR","author":"Hertweck","year":"2020"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.90"},{"key":"ref41","article-title":"The challenges of exploration for offline reinforcement learning","author":"Lambert","year":"2022"},{"key":"ref42","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics","author":"Ross"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610297.pdf?arnumber=10610297","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:47:04Z","timestamp":1723268824000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610297\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610297","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}