{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T15:03:40Z","timestamp":1780671820500,"version":"3.54.1"},"reference-count":53,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160983","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T13:20:56Z","timestamp":1688476856000},"page":"871-878","source":"Crossref","is-referenced-by-count":14,"title":["Comparison of Model-Based and Model-Free Reinforcement Learning for Real-World Dexterous Robotic Manipulation Tasks"],"prefix":"10.1109","author":[{"given":"David","family":"Valencia","sequence":"first","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"John","family":"Jia","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Raymond","family":"Li","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alex","family":"Hayashi","sequence":"additional","affiliation":[{"name":"New Dexterity Research Group, The University of Auckland,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Megan","family":"Lecchi","sequence":"additional","affiliation":[{"name":"New Dexterity Research Group, The University of Auckland,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Reuel","family":"Terezakis","sequence":"additional","affiliation":[{"name":"New Dexterity Research Group, The University of Auckland,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Trevor","family":"Gee","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Minas","family":"Liarokapis","sequence":"additional","affiliation":[{"name":"New Dexterity Research Group, The University of Auckland,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bruce A.","family":"MacDonald","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Henry","family":"Williams","sequence":"additional","affiliation":[{"name":"The University of Auckland,Centre for Automation and Robotic Engineering Science,New Zealand"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5652970"},{"key":"ref12","article-title":"Average-reward model-free reinforcement learning: a systematic review and literature mapping","author":"dewanto","year":"2020","journal-title":"ar Xiv preprint"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref14","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794102"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2011.VII.008"},{"key":"ref10","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561384"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759557"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.3390\/s21041278"},{"key":"ref18","article-title":"Soft actor-critic algorithms and applications","author":"haarnoja","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/SSRR.2018.8468643"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593986"},{"key":"ref46","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref45","article-title":"Model-free reinforcement learning for financial portfolios: a brief survey","author":"sato","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"124","DOI":"10.1109\/TRO.2018.2878318","article-title":"Model-based reinforcement learning for closed-loop dynamic control of soft robotic manipulators","volume":"35","author":"thuruthel","year":"2018","journal-title":"IEEE Transactions on Robotics"},{"key":"ref47","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref42","article-title":"Epopt: Learning robust neural network policies using model ensembles","author":"rajeswaran","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636557"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-009-9120-4"},{"key":"ref43","first-page":"1","article-title":"Model-free rein-forcement learning from expert demonstrations: a survey","author":"ramirez","year":"2021","journal-title":"Artificial Intelligence Review"},{"key":"ref49","article-title":"Dexterous manipulation for multi-fingered robotic hands with reinforcement learning: A review","volume":"16","author":"yu","year":"2022","journal-title":"Fron-tiers in neurorobotics"},{"key":"ref8","author":"bishop","year":"1994","journal-title":"Mixture density networks"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1126\/science.aat8414","volume":"364","author":"billard","year":"2019","journal-title":"Science"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/SIU.2019.8806389"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907059"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.1997.606886"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2000.844081"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1177\/027836499501400402"},{"key":"ref40","article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","author":"popov","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref35","first-page":"1101","article-title":"Deep dynamics models for learning dexterous manipulation","author":"nagabandi","year":"2020","journal-title":"Conference on Robot Learning"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref37","first-page":"1","article-title":"Model predictive-actor critic reinforcement learning for dex-terous manipulation","author":"omer","year":"2021","journal-title":"2020 International Conference on Computer Control Electrical and Electronics Engineering (ICCCEEE)"},{"key":"ref36","first-page":"363","article-title":"Autonomous inverted helicopter flight via reinforcement learning","author":"andrew","year":"2006","journal-title":"Experimental Robotics IX"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/614"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1098\/rspa.2021.0618"},{"key":"ref33","first-page":"137","article-title":"Contact-invariant optimization for hand manipulation","author":"mordatch","year":"2012","journal-title":"Proceedings of the ACM SIGGRAPH\/Eurographics Symposium on Computer Animation"},{"key":"ref32","article-title":"Model-based reinforcement learning: A survey","author":"thomas","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1177\/0278364919887447","article-title":"Learning dexterous in-hand manipulation","volume":"39","author":"ai","year":"2020","journal-title":"The International Journal of Robotics Research"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cag.2013.04.007"},{"key":"ref39","article-title":"High-accuracy model-based reinforcement learning, a survey","author":"plaat","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref38","article-title":"Model-based deep reinforcement learning for high-dimensional problems, a survey","author":"plaat","year":"2020","journal-title":"ar Xiv preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487156"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s43154-020-00021-6"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2018.2885801"},{"key":"ref25","article-title":"Model-ensemble trust-region policy optimization","author":"kurutach","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref20","article-title":"When to trust your model: Model-based policy optimization","author":"janner","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref22","article-title":"Deep reinforcement learning for autonomous driving: A survey","author":"kiran","year":"2021","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"ref21","article-title":"Model-based reinforcement learning for atari","author":"kaiser","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref28","article-title":"Continuous control with deep reinforcement learning","author":"timothy","year":"2015","journal-title":"ar Xiv preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759394"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2020.3012947"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160983.pdf?arnumber=10160983","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T13:37:23Z","timestamp":1690205843000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160983\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160983","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}