{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T21:01:00Z","timestamp":1781038860841,"version":"3.54.1"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/robosoft60065.2024.10522003","type":"proceedings-article","created":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T17:23:35Z","timestamp":1715621015000},"page":"933-939","source":"Crossref","is-referenced-by-count":6,"title":["Reinforcement Learning Controllers for Soft Robots Using Learned Environments"],"prefix":"10.1109","author":[{"given":"Uljad","family":"Berdica","sequence":"first","affiliation":[{"name":"University of Oxford"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matthew","family":"Jackson","sequence":"additional","affiliation":[{"name":"University of Oxford"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Niccol\u00f2 Enrico","family":"Veronese","sequence":"additional","affiliation":[{"name":"Polytechnic University of Milan,Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jakob","family":"Foerster","sequence":"additional","affiliation":[{"name":"University of Oxford"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Perla","family":"Maiolino","sequence":"additional","affiliation":[{"name":"University of Oxford"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aah3690"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1089\/soro.2017.0007"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s00422-012-0471-0"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2023.3253421"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOSOFT.2018.8404894"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2018.2878318"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1088\/1748-3190\/aa839f"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/RoboSoft55895.2023.10121988"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3157369"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref11","volume-title":"Flax: A neural network library and ecosystem for JAX","author":"Heek","year":"2023"},{"key":"ref12","author":"DiPietro","year":"2017","journal-title":"Analyzing and exploiting narx recurrent neural networks for long-term dependencies"},{"key":"ref13","author":"Schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_7"},{"key":"ref16","author":"Hessel","year":"2021","journal-title":"Podracer architectures for scalable reinforcement learning"},{"key":"ref17","first-page":"16455","article-title":"Discovered policy optimisation","volume":"35","author":"Lu","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref18","author":"Bradbury","year":"2018","journal-title":"Jax: composable transformations of python+ numpy programs"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793653"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206123"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1177\/1729881416687132"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2010.2062511"},{"key":"ref23","volume-title":"gymnax: A jax-based reinforcement learning environment library, 2022b","author":"Lange","year":"2022"},{"key":"ref24","volume-title":"Mean reverting random walks","volume":"2011","author":"Demonstrations Project","year":"2024"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abm6074"}],"event":{"name":"2024 IEEE 7th International Conference on Soft Robotics (RoboSoft)","location":"San Diego, CA, USA","start":{"date-parts":[[2024,4,14]]},"end":{"date-parts":[[2024,4,17]]}},"container-title":["2024 IEEE 7th International Conference on Soft Robotics (RoboSoft)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10521892\/10521893\/10522003.pdf?arnumber=10522003","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T05:32:33Z","timestamp":1715664753000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10522003\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/robosoft60065.2024.10522003","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}