{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T21:16:16Z","timestamp":1765228576355,"version":"3.46.0"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"ONR","award":["N00014-22-1-2475"],"award-info":[{"award-number":["N00014-22-1-2475"]}]},{"name":"NSF","award":["1802867"],"award-info":[{"award-number":["1802867"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1109\/tnnls.2025.3593259","type":"journal-article","created":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T18:16:43Z","timestamp":1755627403000},"page":"20386-20400","source":"Crossref","is-referenced-by-count":0,"title":["Information-State-Based Reinforcement Learning for the Control of Partially Observed Nonlinear Systems"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8128-3051","authenticated-orcid":false,"given":"Raman","family":"Goyal","sequence":"first","affiliation":[{"name":"Department of Aerospace Engineering, Texas A&#x0026;M University, College Station, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5718-2261","authenticated-orcid":false,"given":"Mohamed","family":"Naveed Gul Mohamed","sequence":"additional","affiliation":[{"name":"Department of Aerospace Engineering, Texas A&#x0026;M University, College Station, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1698-5978","authenticated-orcid":false,"given":"Ran","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Aerospace Engineering, Texas A&#x0026;M University, College Station, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6481-8968","authenticated-orcid":false,"given":"Aayushman","family":"Sharma","sequence":"additional","affiliation":[{"name":"Department of Aerospace Engineering, Texas A&#x0026;M University, College Station, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1598-5060","authenticated-orcid":false,"given":"Suman","family":"Chakravorty","sequence":"additional","affiliation":[{"name":"Department of Aerospace Engineering, Texas A&#x0026;M University, College Station, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"volume-title":"Dynamic Programming and Optimal Control","year":"2000","author":"Bertsekas","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2024.3382376"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2010.2043839"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MCS.2023.3253419"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1115\/1.4065801"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3053269"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/9781118122631"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3007414"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/9.481517"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCE.2023.3300883"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-062922-090153"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3207346"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3245980"},{"key":"ref15","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"issue":"1","key":"ref16","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"Levine","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3010304"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10383754"},{"issue":"12","key":"ref20","first-page":"1","article-title":"Approximate information state for approximate planning and reinforcement learning in partially observed systems","volume":"23","author":"Subramanian","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74690-4_71"},{"key":"ref22","article-title":"Memory-based control with recurrent neural networks","author":"Heess","year":"2015","journal-title":"arXiv:1512.04455"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636140"},{"key":"ref24","first-page":"5175","article-title":"When is partially observable reinforcement learning not scary?","volume-title":"Proc. 35th Conf. Learn. Theory","author":"Liu"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2021.3108552"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1115\/1.4068705"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CDC45484.2021.9683350"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561001"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2005.1469949"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1080\/00207170701364913"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.23919\/ACC55779.2023.10156137"},{"volume-title":"Mathematical Analysis","year":"1974","author":"Apostol","key":"ref32"},{"volume-title":"Stochastic Systems: Estimation, Identification, and Adaptive Control","year":"1986","author":"Kumar","key":"ref33"},{"key":"ref34","article-title":"On the feedback law in stochastic optimal nonlinear control","author":"Mohamed","year":"2024","journal-title":"arXiv:2004.01041"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386025"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10384069"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/5962385\/11272992\/11130400.pdf?arnumber=11130400","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T18:41:34Z","timestamp":1765219294000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11130400\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":37,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2025.3593259","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"type":"print","value":"2162-237X"},{"type":"electronic","value":"2162-2388"}],"subject":[],"published":{"date-parts":[[2025,12]]}}}