{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T10:31:38Z","timestamp":1756463498629,"version":"3.28.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534127","type":"proceedings-article","created":{"date-parts":[[2021,9,22]],"date-time":"2021-09-22T16:32:37Z","timestamp":1632328357000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["FoLaR: Foggy Latent Representations for Reinforcement Learning with Partial Observability"],"prefix":"10.1109","author":[{"given":"Hardik","family":"Meisheri","sequence":"first","affiliation":[]},{"given":"Harshad","family":"Khadilkar","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"year":"2016","author":"tasfi","journal-title":"Pygame learning environment","key":"ref39"},{"year":"2018","author":"chevalier-boisvert","journal-title":"Minimalistic grid-world environment for openai gym","key":"ref38"},{"key":"ref33","article-title":"Proximal policy optim. algorithms","author":"schulman","year":"2017","journal-title":"Preprint"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1287\/opre.21.5.1071"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1109\/CDC.1995.478953"},{"year":"2018","author":"sutton","journal-title":"Reinforcement Learning An Introduction","key":"ref30"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref36","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref35","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref34","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume":"abs 1801 1290","author":"haarnoja","year":"2018","journal-title":"CoRR"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref40","first-page":"151","article-title":"Understanding the impact of entropy on policy optimization","volume":"97","author":"ahmed","year":"2019","journal-title":"Proceedings of the 36th International Conference on Machine Learning"},{"key":"ref11","article-title":"Learning invariant representations for reinforcement learning without reconstruction","author":"zhang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref12","article-title":"Darla: Improving zero-shot transfer in RL","author":"higgins","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Data-efficient reinforcement learning with self-predictive representations","author":"schwarzer","year":"0","journal-title":"International Conference on Learning Representations"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.29007\/25x3"},{"key":"ref15","article-title":"A geometric perspective on optimal representations for reinforcement learning","author":"bellemare","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref16","article-title":"The value function polytope in reinforcement learning","author":"dadashi","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref17","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"2019","journal-title":"ICML"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/IJCNN.2010.5596468"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/IJCNN.2012.6252823"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1145\/1102351.1102475"},{"key":"ref4","article-title":"Approximate information state for approximate planning and reinforcement learning in partially observed systems","author":"subramanian","year":"2020","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1145\/1015330.1015441"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1126\/science.aar6404"},{"key":"ref6","first-page":"2450","article-title":"Recurrent world models facilitate policy evolution","author":"ha","year":"2018","journal-title":"Advances in neural information processing systems"},{"doi-asserted-by":"publisher","key":"ref29","DOI":"10.1007\/978-3-540-74690-4_71"},{"key":"ref5","article-title":"Self-imitation learning","author":"oh","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref8","article-title":"Deepmdp: Learning continuous latent space models for representation learning","author":"gelada","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref7","article-title":"World models","volume":"abs 1803 10122","author":"ha","year":"2018","journal-title":"ArXiv"},{"year":"2014","author":"silver","journal-title":"Deterministic policy gradient algorithms","key":"ref2"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1016\/0022-247X(65)90154-X","article-title":"Optimal control of markov processes with incomplete state information","volume":"10","author":"astrom","year":"1965","journal-title":"Journal of Mathematical Analysis and Applications"},{"key":"ref1","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"0","journal-title":"International Conference on Machine Learning"},{"year":"2004","author":"ferns","journal-title":"Metrics for finite markov decision processes","key":"ref20"},{"key":"ref22","article-title":"Equivalence relations in fully and partially observable markov decision processes","author":"castro","year":"0","journal-title":"Twenty-First International Joint Conference on Artificial Intelligence"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1609\/aaai.v34i06.6564"},{"key":"ref24","first-page":"712","article-title":"Learning predictive state representations","author":"singh","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref23","first-page":"1555","article-title":"Predictive representations of state","author":"littman","year":"2002","journal-title":"Advances in Neural Information Proc Systems"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1109\/ICMLA.2004.1383528"},{"key":"ref25","first-page":"1520","article-title":"A planning algorithm for predictive state representations","author":"izadi","year":"2003","journal-title":"IJCAI"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534127.pdf?arnumber=9534127","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T11:46:16Z","timestamp":1652183176000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534127\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534127","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}