{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T07:08:09Z","timestamp":1769843289044,"version":"3.49.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,14]],"date-time":"2021-12-14T00:00:00Z","timestamp":1639440000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,14]],"date-time":"2021-12-14T00:00:00Z","timestamp":1639440000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,14]]},"DOI":"10.1109\/cdc45484.2021.9683641","type":"proceedings-article","created":{"date-parts":[[2022,2,1]],"date-time":"2022-02-01T20:50:18Z","timestamp":1643748618000},"page":"1808-1813","source":"Crossref","is-referenced-by-count":6,"title":["Online Policies for Real-Time Control Using MRAC-RL"],"prefix":"10.1109","author":[{"given":"Anubhav","family":"Guha","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology,Department of Mechanical Engineering,Cambridge,MA,USA,02139"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anuradha M.","family":"Annaswamy","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology,Department of Mechanical Engineering,Cambridge,MA,USA,02139"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","article-title":"Mrac-rl: A framework for on-line policy adaptation under parametric model uncertainty","author":"guha","year":"2020","journal-title":"arXiv preprint arXiv 2011 13439"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1109\/37.126844","article-title":"Reinforcement learning is direct adaptive optimal control","volume":"12","author":"sutton","year":"1992","journal-title":"IEEE Control Systems Magazine"},{"key":"ref31","article-title":"Adaptive flight control in the presence of limits on magnitude and rate","author":"gaudio","year":"2019","journal-title":"arXiv preprint arXiv 1907 11634"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2004.1383848"},{"key":"ref36","first-page":"700","article-title":"Domain randomization for simulation-based policy optimization with transferability assessment","author":"muratore","year":"2018","journal-title":"Conference on Robot Learning"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2012.2200104"},{"key":"ref34","first-page":"1","article-title":"Deep reinforcement learning for drone navigation using sensor data","author":"hodge","year":"2020","journal-title":"Neural Computing and Applications"},{"key":"ref10","first-page":"3043","article-title":"Reinforcement learning under model mismatch","author":"roy","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref11","first-page":"6550","article-title":"Towards generalization and simplicity in continuous control","author":"rajeswaran","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref12","article-title":"Assessing generalization in deep reinforcement learning","author":"packer","year":"2018","journal-title":"arXiv preprint arXiv 1810 10053"},{"key":"ref13","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv preprint arXiv 1707 06347"},{"key":"ref14","article-title":"Benchmarking model-based reinforcement learning","author":"langlois","year":"2019","journal-title":"arXiv preprint arXiv 1907 09509"},{"key":"ref15","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref16","article-title":"Deep reinforcement learning that matters","author":"henderson","year":"2017","journal-title":"arXiv preprint arXiv 1709 02232"},{"key":"ref17","article-title":"DARLA: Improving zero-shot transfer in reinforcement learning","author":"higgins","year":"2017","journal-title":"arXiv preprint arXiv 1707 08385"},{"key":"ref18","article-title":"Learning to adapt in dynamic, real-world environments through meta-reinforcement learning","author":"nagabandi","year":"2018","journal-title":"arXiv preprint arXiv 1803 11347"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1017\/S0140525X16001837"},{"key":"ref28","author":"narendra","year":"1989","journal-title":"Stable Adaptive Systems"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.2514\/6.2013-4514"},{"key":"ref3","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"arXiv preprint arXiv 1509 02971"},{"key":"ref6","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"arXiv preprint arXiv 1506 02349"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/9.333787"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-053018-023825"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1830483.1830505"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/11552246_35"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.12107","article-title":"Safe reinforcement learning via formal methods","author":"fulton","year":"2018","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref20","first-page":"465","article-title":"PILCO: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2019.2942989"},{"key":"ref21","article-title":"Epopt: Learning robust neural network policies using model ensembles","author":"rajeswaran","year":"2016","journal-title":"arXiv preprint arXiv 1610 01283"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TCST.2012.2200104"},{"key":"ref23","first-page":"908","article-title":"Safe model-based reinforcement learning with stability guarantees","author":"berkenkamp","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.2514\/6.2009-5754"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"6555","DOI":"10.3182\/20110828-6-IT-1002.00716","article-title":"Robust and adaptive control of x-45a jucas: a design trade study","volume":"44","author":"wise","year":"2011","journal-title":"IFAC Proceedings Volumes"}],"event":{"name":"2021 60th IEEE Conference on Decision and Control (CDC)","location":"Austin, TX, USA","start":{"date-parts":[[2021,12,14]]},"end":{"date-parts":[[2021,12,17]]}},"container-title":["2021 60th IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9682670\/9682776\/09683641.pdf?arnumber=9683641","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,16]],"date-time":"2023-11-16T18:34:11Z","timestamp":1700159651000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9683641\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,14]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/cdc45484.2021.9683641","relation":{},"subject":[],"published":{"date-parts":[[2021,12,14]]}}}