{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T02:25:08Z","timestamp":1730341508023,"version":"3.28.0"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,31]],"date-time":"2023-05-31T00:00:00Z","timestamp":1685491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,31]],"date-time":"2023-05-31T00:00:00Z","timestamp":1685491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,31]]},"DOI":"10.23919\/acc55779.2023.10155792","type":"proceedings-article","created":{"date-parts":[[2023,7,3]],"date-time":"2023-07-03T17:48:03Z","timestamp":1688406483000},"page":"2560-2567","source":"Crossref","is-referenced-by-count":0,"title":["Belief State Actor-Critic Algorithm from Separation Principle for POMDP"],"prefix":"10.23919","author":[{"given":"Yujie","family":"Yang","sequence":"first","affiliation":[{"name":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuxuan","family":"Jiang","sequence":"additional","affiliation":[{"name":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianyu","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University,Institute of Interdisciplinary Information Sciences,Beijing,China,100084"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziqing","family":"Gu","sequence":"additional","affiliation":[{"name":"Tsinghua University,State Key Lab of Automotive Safety and Energy, School of Vehicle and Mobility,Beijing,China,100084"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuming","family":"Yin","sequence":"additional","affiliation":[{"name":"Zhejiang University of Technology,College of Mechanical Engineering,Zhejiang,China,310014"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qian","family":"Zhang","sequence":"additional","affiliation":[{"name":"Horizon Robotics,Beijing,China,100085"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Yu","sequence":"additional","affiliation":[{"name":"Horizon Robotics,Beijing,China,100085"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Deep variational reinforcement learning for pomdps","author":"igl","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref12","article-title":"Recurrent world models facilitate policy evolution","author":"ha","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref15","article-title":"Dream to control: Learning behaviors by latent imagination","author":"hafner","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref14","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"2019","journal-title":"International Conference on Machine Learning"},{"article-title":"Deep variational bayes filters: Unsupervised learning of state space models from raw data","year":"2016","author":"karl","key":"ref11"},{"article-title":"Deep kalman filters","year":"2015","author":"krishnan","key":"ref10"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.2078"},{"key":"ref17","article-title":"Auto-encoding variational bayes","author":"kingma","year":"2013","journal-title":"28th International Conference on Computational Linguistics"},{"key":"ref16","article-title":"Stochastic latent actor-critic: Deep reinforcement learning with a latent variable model","volume":"33","author":"lee","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1115\/1.3656559"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.1961.6371743"},{"key":"ref24","article-title":"Distributional soft actor-critic: Off-policy reinforcement learning for addressing value estimation errors","author":"duan","year":"2021","journal-title":"IEEE transactions on neural networks learning systems"},{"key":"ref23","volume":"5","author":"bertsekas","year":"1996","journal-title":"Stochastic Optimal Control The Discrete-Time Case"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-19-7784-8"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3175595"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1016\/0022-247X(65)90154-X","article-title":"Optimal control of markov processes with incomplete state information","volume":"10","author":"\u00e5str\u00f6m","year":"1965","journal-title":"Journal of Mathematical Analysis Applications"},{"key":"ref22","volume":"1","author":"bertsekas","year":"2012","journal-title":"Dynamic Programming and Optimal Control Volumes I and II"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/0016-0032(65)90528-4"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref28"},{"key":"ref27","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref8","first-page":"1772","article-title":"Despot: Online pomdp planning with regularization","author":"somani","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref7","first-page":"2164","article-title":"Monte-carlo planning in large pomdps","author":"silver","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref9","first-page":"611","author":"kurniawati","year":"2016","journal-title":"An Online POMDP Solver for Uncertainty Planning in Dynamic Environment In ISRR"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1287\/opre.21.5.1071"},{"journal-title":"The optimal control of partially observable Markov processes","year":"1971","author":"sondik","key":"ref3"},{"key":"ref6","first-page":"542","article-title":"Point-based pomdp algorithms: Improved analysis and implementation","author":"smith","year":"2005","journal-title":"Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence UAI 2005"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1659"}],"event":{"name":"2023 American Control Conference (ACC)","start":{"date-parts":[[2023,5,31]]},"location":"San Diego, CA, USA","end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10155646\/10155787\/10155792.pdf?arnumber=10155792","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:30:06Z","timestamp":1690219806000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10155792\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,31]]},"references-count":28,"URL":"https:\/\/doi.org\/10.23919\/acc55779.2023.10155792","relation":{},"subject":[],"published":{"date-parts":[[2023,5,31]]}}}