{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,11]],"date-time":"2025-01-11T05:34:14Z","timestamp":1736573654343,"version":"3.32.0"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100016311","name":"Arm","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100016311","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006754","name":"Army Research Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006754","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1109\/milcom61039.2024.10773784","type":"proceedings-article","created":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T18:47:40Z","timestamp":1733510860000},"page":"463-468","source":"Crossref","is-referenced-by-count":0,"title":["Designing Near-Optimal Partially Observable Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Ming","family":"Shi","sequence":"first","affiliation":[{"name":"The Ohio State University,Dept. of ECE,Columbus,OH"}]},{"given":"Yingbin","family":"Liang","sequence":"additional","affiliation":[{"name":"The Ohio State University,Dept. of ECE,Columbus,OH"}]},{"given":"Ness","family":"Shroff","sequence":"additional","affiliation":[{"name":"The Ohio State University,Dept. of ECE and CSE,Columbus,OH"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2007.070409"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2008.920248"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2015.2399923"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2011.5940562"},{"article-title":"Solving Rubik\u2019s Cube with a Robot Hand","year":"2019","author":"Akkaya","key":"ref5"},{"key":"ref6","article-title":"PAC Reinforcement Learning with Rich Observations","volume":"29","author":"Krishnamurthy","year":"2016","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1287\/moor.12.3.441"},{"key":"ref8","first-page":"31 877","article-title":"Hindsight Learning for MDPs with Exogenous Inputs","volume-title":"International Conference on Machine Learning","author":"Sinclair"},{"key":"ref9","first-page":"18 733","article-title":"Learning in POMDPs is Sample-Efficient with Hindsight Observability","volume-title":"International Conference on Machine Learning","author":"Lee"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1287\/moor.2022.1309"},{"key":"ref11","first-page":"10","article-title":"Reinforcement Learning: Theory and Algorithms","volume-title":"Tech. Rep","author":"Agarwal","year":"2019"},{"key":"ref12","first-page":"31 243","article-title":"A Near-Optimal Algorithm for Safe Reinforcement Learning Under Instantaneous Hard Constraints","volume-title":"International Conference on Machine Learning","author":"Shi"},{"key":"ref13","first-page":"463","article-title":"Model-Based RL with Value-Targeted Regression","volume-title":"International Conference on Machine Learning","author":"Ayoub"},{"key":"ref14","first-page":"5175","article-title":"When Is Partially Observable Reinforcement Learning Not Scary?","volume-title":"Conference on Learning Theory","author":"Liu"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015411"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2021.3053910"},{"article-title":"Theoretical Hardness and Tractability of POMDPs in RL with Partial Online State Information","year":"2023","author":"Shi","key":"ref17"},{"key":"ref18","first-page":"263","article-title":"Minimax Regret Bounds for Reinforcement Learning","volume-title":"International Conference on Machine Learning","author":"Azar"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3492866.3549720"}],"event":{"name":"MILCOM 2024 - 2024 IEEE Military Communications Conference (MILCOM)","start":{"date-parts":[[2024,10,28]]},"location":"Washington, DC, USA","end":{"date-parts":[[2024,11,1]]}},"container-title":["MILCOM 2024 - 2024 IEEE Military Communications Conference (MILCOM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10773620\/10773624\/10773784.pdf?arnumber=10773784","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,10]],"date-time":"2025-01-10T20:00:27Z","timestamp":1736539227000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10773784\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/milcom61039.2024.10773784","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]}}}