{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:22:53Z","timestamp":1766067773854,"version":"3.37.3"},"reference-count":48,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000185","name":"DARPA","doi-asserted-by":"publisher","award":["FA8750-18-C-0092"],"award-info":[{"award-number":["FA8750-18-C-0092"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000181","name":"AFOSR","doi-asserted-by":"publisher","award":["FA9550-19-1-0041"],"award-info":[{"award-number":["FA9550-19-1-0041"]}],"id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160928","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"2913-2920","source":"Crossref","is-referenced-by-count":4,"title":["Learning Stabilization Control from Observations by Learning Lyapunov-like Proxy Models"],"prefix":"10.1109","author":[{"given":"Milan","family":"Ganai","sequence":"first","affiliation":[{"name":"UC San Diego,Department of Computer Science and Engineering,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chiaki","family":"Hirayama","sequence":"additional","affiliation":[{"name":"UC San Diego,Department of Computer Science and Engineering,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ya-Chien","family":"Chang","sequence":"additional","affiliation":[{"name":"UC San Diego,Department of Computer Science and Engineering,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sicun","family":"Gao","sequence":"additional","affiliation":[{"name":"UC San Diego,Department of Computer Science and Engineering,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Reinforcement learning and the reward engineering principle","volume-title":"In 2014 AAAI Spring Symposium Series","author":"Dewey","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202141"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5649089"},{"key":"ref4","article-title":"Concrete problems in ai safety","author":"Amodei","year":"2016","journal-title":"arXiv preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/882"},{"key":"ref8","article-title":"Generative adversarial imitation from observation","volume":"abs\/1807.06158","author":"Torabi","year":"2018","journal-title":"CoRR"},{"key":"ref9","article-title":"Generative adversarial imitation learning","volume-title":"Advances in Neural Information Processing Systems","volume":"29","author":"Ho","year":"2016"},{"key":"ref10","article-title":"Generative adversarial nets","volume-title":"Advances in Neural Information Processing Systems","volume":"27","author":"Goodfellow","year":"2014"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1515\/9781400841042-005"},{"key":"ref12","article-title":"Learning robust rewards with adverserial inverse reinforcement learning","author":"Fu","year":"2018","journal-title":"In ICLR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11775"},{"key":"ref14","article-title":"Discriminator-actor-critic: Addressing sample inefficiency and reward bias in adversarial imitation learning","author":"Kostrikov","year":"2019","journal-title":"In ICLR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3061397"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1093\/oso\/9780198538677.003.0006"},{"key":"ref17","article-title":"Alvinn: An autonomous land vehicle in a neural network","author":"Pomerleau","year":"1988","journal-title":"In NIPS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"article-title":"Algorithms for inverse reinforcement learning","volume-title":"ICML 00 Proceedings of the Seventeenth International Conference on Machine Learning","author":"Ng","key":"ref19"},{"key":"ref20","article-title":"Discriminator-actor-critic: Addressing sample inefficiency and reward bias in adversarial imitation learning","author":"Kostrikov","year":"2019","journal-title":"In ICLR"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/687"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487167"},{"key":"ref23","first-page":"661","article-title":"Efficient reductions for imitation learning","volume-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, volume 9 of Proceedings of Machine Learning Research","author":"Ross"},{"key":"ref24","article-title":"No-regret reductions for imitation learning and structured prediction","volume":"abs\/1011.0686","author":"Ross","year":"2010","journal-title":"CoRR"},{"key":"ref25","first-page":"12402","article-title":"Off-policy imitation learning from observations","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Zhu","year":"2020"},{"key":"ref26","article-title":"Imitation learning from observations by minimizing inverse dynamics disagreement","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Yang","year":"2019"},{"key":"ref27","first-page":"6036","article-title":"Provably efficient imitation learning from observation alone","volume-title":"Proceedings of the 36th International Conference on Machine Learning, volume 97 of Proceedings of Machine Learning Research","author":"Sun"},{"key":"ref28","first-page":"1755","article-title":"Imitating latent policies from observation","volume-title":"Proceedings of the 36th International Conference on Machine Learning, volume 97 of Proceedings of Machine Learning Research","author":"Edwards"},{"key":"ref29","first-page":"8092","article-title":"A lyapunov-based approach to safe reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Chow","year":"2018"},{"key":"ref30","article-title":"Lyapunov-based safe policy optimization for continuous control","volume":"abs\/1901.10031","author":"Chow","year":"2019","journal-title":"CoRR"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3011351"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3045114"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560886"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/LCSYS.2021.3077861"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2011.2159412"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2014.03.001"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2018.2833497"},{"key":"ref38","article-title":"A physically-consistent bayesian non-parametric mixture model for dynamical system learning","author":"Figueroa","year":"2018","journal-title":"In CoRL"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1002\/SERIES1345"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref40"},{"key":"ref41","article-title":"Proximal policy optimization algorithms","volume":"abs\/1707.06347","author":"Schulman","year":"2017","journal-title":"CoRR"},{"journal-title":"Automatic steering methods for autonomous automobile path tracking","year":"2009","author":"Snider","key":"ref42"},{"issue":"46","key":"ref43","first-page":"1573","article-title":"Rlpy: A value-function-based reinforcement learning framework for education and research","volume":"16","author":"Geramifard","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-019-01085-z"},{"journal-title":"Openai gym","year":"2016","author":"Brockman","key":"ref45"},{"journal-title":"Pythonrobotics: a python code collection of robotics algorithms","year":"2018","author":"Sakai","key":"ref46"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref48","article-title":"Neural lyapunov control","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Chang","year":"2019"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2023,5,29]]},"location":"London, United Kingdom","end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160928.pdf?arnumber=10160928","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T01:04:32Z","timestamp":1705021472000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160928\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160928","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}