{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T00:01:07Z","timestamp":1755993667598,"version":"3.44.0"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,8]]},"DOI":"10.23919\/acc63710.2025.11108065","type":"proceedings-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:17:51Z","timestamp":1755800271000},"page":"4536-4551","source":"Crossref","is-referenced-by-count":0,"title":["The Search for Feedback in Reinforcement Learning"],"prefix":"10.23919","author":[{"given":"Ran","family":"Wang","sequence":"first","affiliation":[{"name":"Texas A&amp;M University,Department of Aerospace Engineering,College Station,TX,USA,77843"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aayushman","family":"Sharma","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University,Department of Aerospace Engineering,College Station,TX,USA,77843"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Karthikeya S.","family":"Parunandi","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University,Department of Aerospace Engineering,College Station,TX,USA,77843"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Raman","family":"Goyal","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University,Department of Aerospace Engineering,College Station,TX,USA,77843"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohamed Naveed","family":"Gul Mohamed","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University,Department of Aerospace Engineering,College Station,TX,USA,77843"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Suman","family":"Chakravorty","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University,Department of Aerospace Engineering,College Station,TX,USA,77843"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611974263","volume-title":"Stochastic systems: Estimation, identification, and adaptive control","volume":"75","author":"Kumar","year":"2015"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/b978-0-443-14081-5.00174-4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"issue":"1","key":"ref5","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"article-title":"Scalable trust-region method for deep reinforcement learning using Kronecker-factored approximation","year":"2017","author":"Yuhuai","key":"ref6"},{"article-title":"Trust Region Policy Optimization","year":"2017","author":"Schulman","key":"ref7"},{"article-title":"Proximal Policy Optimization Algorithms","year":"2017","author":"Schulman","key":"ref8"},{"key":"ref9","first-page":"1587","article-title":"Addressing Function Approximation Error in Actor-Critic Methods","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Fujimoto"},{"key":"ref10","first-page":"1861","article-title":"Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Haarnoja"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"article-title":"Q-prop: Sample-efficient policy gradient with an off-policy critic","year":"2016","author":"Gu","key":"ref12"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref13"},{"volume-title":"Differential Dynamic Programming","year":"1970","author":"Jacobsen","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2010.5530971"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2005.1469949"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1080\/00207170701364913"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/37.126844"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487156"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"article-title":"Learning Complex Neural Network Policies with Trajectory Optimization","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Levine","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1017\/s0962492900002518"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.automatica.2014.10.128"},{"volume-title":"Model Predictive Control: Theory and Design","year":"2015","author":"Rawlings","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1002\/SERIES1345"},{"volume-title":"Dynamic Programming and Optimal Control, Two Volume Set","year":"1995","author":"Bertsekas","key":"ref26"},{"article-title":"PILCO: A Model-Based and Data-Efficient Approach to Policy Search","volume-title":"International Conference on Machine Learning (ICML)","author":"Deisenroth","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-05181-4_4"},{"article-title":"Continuous Control with Deep Reinforcement Learning","volume-title":"Proc. ICLR","author":"Lillicrap","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683350"},{"article-title":"On the Feedback Law in Stochastic Optimal Nonlinear Control","year":"2024","author":"Mohamed","key":"ref31"},{"key":"ref32","volume-title":"Methods of Mathematical Physics, vol. II","volume":"336","author":"Courant","year":"1953"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-8489-2_3"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"article-title":"Openai gym","year":"2016","author":"Brockman","key":"ref35"},{"article-title":"DeepMind Control Suite","year":"2018","author":"Tassa","key":"ref36"},{"volume-title":"keras-rl","year":"2016","author":"Plappert","key":"ref37"},{"issue":"268","key":"ref38","first-page":"1","article-title":"Stable-Baselines3: Reliable Reinforcement Learning Implementations","volume-title":"Journal of Machine Learning Research","volume":"22","author":"Raffin","year":"2021"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/0001-6160(79)90196-2"},{"article-title":"Reproducibility of Benchmarked Deep Reinforcement Learning Tasks for Continuous Control","volume-title":"Reproducibility in Machine Learning Workshop, ICML\u201917","author":"Islam","key":"ref40"},{"article-title":"On the Convergence of Reinforcement Learning","year":"2020","author":"Chakravorty","key":"ref41"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/tro.2022.3184837"}],"event":{"name":"2025 American Control Conference (ACC)","start":{"date-parts":[[2025,7,8]]},"location":"Denver, CO, USA","end":{"date-parts":[[2025,7,10]]}},"container-title":["2025 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11107441\/11107442\/11108065.pdf?arnumber=11108065","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T23:57:39Z","timestamp":1755907059000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11108065\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,8]]},"references-count":42,"URL":"https:\/\/doi.org\/10.23919\/acc63710.2025.11108065","relation":{},"subject":[],"published":{"date-parts":[[2025,7,8]]}}}