{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,9]],"date-time":"2025-07-09T22:46:08Z","timestamp":1752101168056,"version":"3.37.3"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T00:00:00Z","timestamp":1632009600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T00:00:00Z","timestamp":1632009600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,19]],"date-time":"2021-09-19T00:00:00Z","timestamp":1632009600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"publisher","award":["2020YFB1600200"],"award-info":[{"award-number":["2020YFB1600200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004147","name":"Tsinghua University-Didi Joint Research Center for Future Mobility","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004147","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,19]]},"DOI":"10.1109\/itsc48978.2021.9564576","type":"proceedings-article","created":{"date-parts":[[2021,10,25]],"date-time":"2021-10-25T19:52:26Z","timestamp":1635191546000},"page":"586-592","source":"Crossref","is-referenced-by-count":4,"title":["Belief state separated reinforcement learning for autonomous vehicle decision making under uncertainty"],"prefix":"10.1109","author":[{"given":"Ziqing","family":"Gu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yujie","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingliang","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shengbo Eben","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianyu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenhan","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sifa","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC45102.2020.9294300"},{"key":"ref11","article-title":"Loss is its own reward: Self-supervision for reinforcement learning","author":"shelhamer","year":"2017","journal-title":"5th International Conference on Learning Representations ICLR 2017"},{"key":"ref12","article-title":"Embed to control: A locally linear latent dynamics model for control from raw images","author":"watter","year":"0","journal-title":"NIPS"},{"key":"ref13","first-page":"7444","article-title":"Solar: Deep structured representations for model-based reinforcement learning","author":"zhang","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref14","article-title":"Dream to control: Learning behaviors by latent imagination","author":"hafner","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340873"},{"key":"ref16","first-page":"2117","article-title":"Deep variational reinforcement learning for pomdps","author":"igl","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref17","article-title":"Learning and querying fast generative models for reinforcement learning","volume":"abs 1802 3006","author":"buesing","year":"2018","journal-title":"CoRR"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3046646"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2017.07.011"},{"journal-title":"Reinforcement learning and control","year":"2020","author":"li","key":"ref4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.23919\/ICCAS50221.2020.9268413"},{"key":"ref6","article-title":"Deep recurrent q-learning for partially observable mdps","author":"hausknecht","year":"0","journal-title":"AAAI Fall Symp"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74690-4_71"},{"key":"ref7","article-title":"Variational recurrent models for solving partially observable control tasks","author":"han","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2017.7995949"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/768"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2014.6957722"},{"key":"ref20","article-title":"Variational temporal abstraction","author":"kim","year":"0","journal-title":"NeurIPS"},{"key":"ref22","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"0","journal-title":"ICML"},{"key":"ref21","volume":"abs 1811 4551","author":"hafner","year":"2019","journal-title":"Learning latent dynamics for planning from pixels"},{"key":"ref24","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume":"80","author":"haarnoja","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref23","article-title":"Auto-encoding variational bayes","volume":"abs 1312 6114","author":"kingma","year":"2014","journal-title":"CoRR"},{"journal-title":"Numerically stable dynamic bicycle model for discrete-time control","year":"2020","author":"ge","key":"ref25"}],"event":{"name":"2021 IEEE International Intelligent Transportation Systems Conference (ITSC)","start":{"date-parts":[[2021,9,19]]},"location":"Indianapolis, IN, USA","end":{"date-parts":[[2021,9,22]]}},"container-title":["2021 IEEE International Intelligent Transportation Systems Conference (ITSC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9564393\/9564395\/09564576.pdf?arnumber=9564576","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:51:04Z","timestamp":1652201464000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9564576\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,19]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/itsc48978.2021.9564576","relation":{},"subject":[],"published":{"date-parts":[[2021,9,19]]}}}