{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T10:28:37Z","timestamp":1763202517912,"version":"3.28.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534358","type":"proceedings-article","created":{"date-parts":[[2021,9,23]],"date-time":"2021-09-23T18:32:08Z","timestamp":1632421928000},"page":"1-8","source":"Crossref","is-referenced-by-count":8,"title":["Learning a Belief Representation for Delayed Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Pierre","family":"Liotet","sequence":"first","affiliation":[]},{"given":"Erick","family":"Venneri","sequence":"additional","affiliation":[]},{"given":"Marcello","family":"Restelli","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"ref33","first-page":"881","article-title":"Made: Masked autoencoder for distribution estimation","author":"germain","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref32","first-page":"7184","article-title":"Neural autoregressive distribution estimation","volume":"17","author":"uria","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref31","first-page":"1530","article-title":"Variational inference with normalizing flows","volume":"37","author":"rezende","year":"2015","journal-title":"Proceedings of the 32nd International Conference on Machine Learning ser Proceedings of Machine Learning Research"},{"key":"ref30","first-page":"2338","article-title":"Masked autoregressive flow for density estimation","author":"papamakarios","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref37","first-page":"463","article-title":"Learning to drive a bicycle using reinforcement learning and shaping","volume":"98","author":"randl\u00f8v","year":"1998","journal-title":"ICML"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"journal-title":"OpenAI Gym","year":"2016","author":"brockman","key":"ref35"},{"key":"ref34","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2720851"},{"journal-title":"Challenges of Real-World Reinforcement Learning","year":"2019","author":"dulac-arnold","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/149439.133106"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-012-5322-7"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5650345"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-008-9056-7"},{"journal-title":"At human speed Deep reinforcement learning with action delay","year":"2018","author":"firoiu","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"journal-title":"Delay-aware model-based reinforcement learning for continuous control","year":"2020","author":"chen","key":"ref18"},{"journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming","year":"2014","author":"puterman","key":"ref19"},{"journal-title":"Neural predictive belief representations","year":"2018","author":"guo","key":"ref28"},{"key":"ref4","first-page":"1025","article-title":"Point-based value iteration: An anytime algorithm for pomdps","volume":"3","author":"pineau","year":"2003","journal-title":"IJCAI"},{"key":"ref27","first-page":"1061","article-title":"Learning belief representations for imitation learning in pomdps","author":"gangwani","year":"2020","journal-title":"Uncertainty in Artificial Intelligence"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1613\/jair.2567"},{"key":"ref6","first-page":"323","article-title":"Using eligibility traces to find the best memoryless policy in partially observable markov decision processes","author":"loch","year":"1998","journal-title":"ICML"},{"key":"ref29","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-012-9200-2"},{"journal-title":"Memory-based control with recurrent neural networks","year":"2015","author":"heess","key":"ref8"},{"key":"ref7","article-title":"Deep recurrent q-learning for partially observable mdps","author":"hausknecht","year":"2015","journal-title":"AAAI Fall Symposium on Sequential Decision Making for Intelligent Agents (AAAI-SDMIA15)"},{"journal-title":"Dota 2 with large scale deep reinforcement learning","year":"2019","author":"berner","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593894"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2003.809799"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref22"},{"journal-title":"Dynamic Programming Deterministic and Stochastic Models","year":"1987","author":"bertsekas","key":"ref21"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1038\/370256b0","article-title":"Motion extrapolation in catching","author":"nijhawan","year":"1994","journal-title":"Nature"},{"journal-title":"Learning from delayed rewards","year":"1989","author":"watkins","key":"ref23"},{"key":"ref26","first-page":"1172","article-title":"Predictive-state decoders: Encoding the future into recurrent networks","author":"venkatraman","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref25","first-page":"1954","article-title":"Recurrent predictive state policy networks","author":"hefny","year":"2018","journal-title":"ICMLC 2018"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534358.pdf?arnumber=9534358","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T11:45:51Z","timestamp":1652183151000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534358\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534358","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}