{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,4]],"date-time":"2025-09-04T14:06:30Z","timestamp":1756994790294,"version":"3.28.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icra46639.2022.9811993","type":"proceedings-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T19:36:40Z","timestamp":1657654600000},"page":"01-07","source":"Crossref","is-referenced-by-count":5,"title":["Value learning from trajectory optimization and Sobolev descent: A step toward reinforcement learning with superlinear convergence properties"],"prefix":"10.1109","author":[{"given":"Amit","family":"Parag","sequence":"first","affiliation":[{"name":"Artificial and Natural Intelligence Toulouse Institute,France"}]},{"given":"Sebastien","family":"Kleff","sequence":"additional","affiliation":[{"name":"Artificial and Natural Intelligence Toulouse Institute,France"}]},{"given":"Leo","family":"Saci","sequence":"additional","affiliation":[{"name":"Artificial and Natural Intelligence Toulouse Institute,France"}]},{"given":"Nicolas","family":"Mansard","sequence":"additional","affiliation":[{"name":"Artificial and Natural Intelligence Toulouse Institute,France"}]},{"given":"Olivier","family":"Stasse","sequence":"additional","affiliation":[{"name":"Artificial and Natural Intelligence Toulouse Institute,France"}]}],"member":"263","reference":[{"year":"2016","author":"brockman","journal-title":"OpenAI Gym","key":"ref33"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1109\/70.388788"},{"key":"ref31","volume":"35","author":"wright","year":"1999","journal-title":"Numerical Optimization"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1021\/ct049976i"},{"key":"ref37","first-page":"2016","author":"coumans","year":"0","journal-title":"Pybullet a python module for physics sim-ulation for games robotics and machine learning"},{"doi-asserted-by":"publisher","key":"ref36","DOI":"10.1109\/SII.2019.8700380"},{"key":"ref35","doi-asserted-by":"crossref","DOI":"10.1126\/science.153.3731.34","article-title":"Dynamic programming","volume":"153","author":"bellman","year":"1966","journal-title":"Science"},{"key":"ref34","article-title":"Correct equations for the dynamics of the cart-pole system","author":"florian","year":"2007","journal-title":"Center for Cognitive and Neural Studies"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/IROS.2012.6386109"},{"year":"2018","author":"hwangbo","journal-title":"Per-Contact Iteration Method for Solving Contact Dynamics","key":"ref11"},{"key":"ref12","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref13","article-title":"Guided policy search","author":"levine","year":"0","journal-title":"International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1109\/ICRA.2018.8463154"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.15607\/RSS.2014.X.052"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/ICRA.2016.7487140"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.15607\/RSS.2015.XI.012"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/ICRA.2017.7989043"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/ADPRL.2013.6614995"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1109\/IJCNN.1993.716972"},{"key":"ref4","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","author":"yu","year":"0","journal-title":"Conference on Robot Learning"},{"key":"ref27","article-title":"Explanation-based neural net-work learning for robot control","author":"mitchell","year":"1993","journal-title":"Advances in neural information processing systems"},{"key":"ref3","article-title":"Trust region policy optimization","author":"schulman","year":"0","journal-title":"International Conference on Machine Learning"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICRA40945.2020.9196619"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1162\/neco.1997.9.5.937"},{"key":"ref5","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref8","article-title":"Learning locomotion skills for cassie: Iterative design and sim-to-real","author":"xie","year":"0","journal-title":"Conference on Robot Learning"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.15607\/RSS.2019.XV.011"},{"key":"ref2","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"0","journal-title":"International Conference on Learning Representations"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1080\/01691864.2017.1378591"},{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref20","article-title":"Seagul: Sample efficient adver-sarially guided learning of value functions","author":"landry","year":"2021","journal-title":"Learning for Dynamics and Control"},{"key":"ref22","article-title":"A closer look at deep policy gradients","author":"ilyas","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref21","article-title":"Deep value model predictive control","author":"hoeller","year":"0","journal-title":"Conference on Robot Learning"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/ACC.2005.1469949"},{"key":"ref23","article-title":"Differential dynamic programming-a unified approach to the optimization of dynamic systems","volume":"10","author":"mayne","year":"1973","journal-title":"Control and Dynamic Systems"},{"key":"ref26","article-title":"Sobolev training for neural networks","author":"czarnecki","year":"2017","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/ICRA40945.2020.9196673"}],"event":{"name":"2022 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2022,5,23]]},"location":"Philadelphia, PA, USA","end":{"date-parts":[[2022,5,27]]}},"container-title":["2022 International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9811522\/9811357\/09811993.pdf?arnumber=9811993","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T23:06:48Z","timestamp":1667516808000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9811993\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/icra46639.2022.9811993","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}