{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:29:04Z","timestamp":1730266144129,"version":"3.28.0"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9892381","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-9","source":"Crossref","is-referenced-by-count":0,"title":["MaxEnt Dreamer: Maximum Entropy Reinforcement Learning with World Model"],"prefix":"10.1109","author":[{"given":"Hongying","family":"Ma","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University,Shanghai,China,200240"}]},{"given":"Wuyang","family":"Xue","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Shanghai,China,200240"}]},{"given":"Rendong","family":"Ying","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Shanghai,China,200240"}]},{"given":"PeiLin","family":"Liu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University,Shanghai,China,200240"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460756"},{"key":"ref38","first-page":"1851","article-title":"Latent space policies for hierarchical reinforcement learning","author":"haarnoja","year":"2018","journal-title":"International con-ference on machine learning"},{"key":"ref33","article-title":"Learning continuous control policies by stochastic value gradients","author":"heess","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref32","article-title":"Model-augmented actor-critic: Back-propagating through paths","author":"clavera","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.29007\/shbv"},{"key":"ref30","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","author":"hafner","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref37","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref36","article-title":"On stochastic opti-mal control and reinforcement learning by approximate inference","author":"rawlik","year":"0","journal-title":"Twenty-Third International Joint Conference on Artificial Intelligence"},{"key":"ref35","first-page":"1433","article-title":"Maximum entropy inverse reinforcement learning","volume":"8","author":"ziebart","year":"2008","journal-title":"AAAI"},{"key":"ref34","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","author":"deisenroth","year":"0","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref10","article-title":"Parameter space noise for exploration","author":"plappert","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref40","first-page":"7553","article-title":"Maximum entropy-regularized multi-goal reinforcement learning","author":"zhao","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref11","first-page":"449","article-title":"A distributional perspective on reinforcement learning","author":"bellemare","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref12","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref14","first-page":"1373","article-title":"Revisiting rainbow: Promoting more insightful and inclusive deep reinforcement learning research","author":"ceron","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref15","first-page":"1928","article-title":"Asynchronous methods for deep rein-forcement learning","author":"mnih","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref16","article-title":"Prox-imal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref17","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref18","article-title":"Maximum a posteriori policy optimisation","author":"abdolmaleki","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref19","article-title":"V-mpo: On-policy maximum a posteriori policy optimization for discrete and continuous control","author":"song","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref28","article-title":"Model-based value expansion for efficient model-free rein-forcement learning","author":"feinberg","year":"0","journal-title":"Proceedings of the 35th International Conference on Machine Learning (ICML 2018)"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref27","article-title":"Dream to control: Learning behaviors by latent imagination","author":"hafner","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1203"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","article-title":"Mastering atari, go, chess and shogi by planning with a learned model","volume":"588","author":"schrittwieser","year":"2020","journal-title":"Nature"},{"key":"ref5","first-page":"8583","article-title":"Planning to explore via self-supervised world models","author":"sekar","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2018.07.006"},{"key":"ref9","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","author":"wang","year":"0","journal-title":"International Conference on Machine Learning"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref1"},{"key":"ref20","article-title":"Decision transformer: Reinforcement learning via sequence modeling","author":"chen","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref45","article-title":"Distributed distributional deterministic policy gradients","author":"barth-maron","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref22","article-title":"Reinforcement learning based recommender systems: A survey","author":"afsar","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref21","first-page":"1","article-title":"Lane change decision-making through deep reinforcement learning with rule-based con-straints","author":"wang","year":"0","journal-title":"2019 International Joint Conference on Neural Networks (IJCNN)"},{"key":"ref42","article-title":"Deepmind control suite","author":"tassa","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref24","article-title":"Algorithmic framework for model-based deep reinforcement learning with theoretical guarantees","author":"luo","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref41","article-title":"Dreamerpro: Reconstruction-free model-based reinforcement learning with prototypical representations","author":"deng","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-65289-4_17"},{"journal-title":"dreamer-pytorch","year":"2020","author":"urakami","key":"ref44"},{"key":"ref26","first-page":"9190","article-title":"Model-based reinforcement learning via latent-space col-location","author":"rybkin","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref25","article-title":"When to trust your model: Model-based policy optimization","author":"janner","year":"2019","journal-title":"ArXiv Preprint"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2022,7,18]]},"location":"Padua, Italy","end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09892381.pdf?arnumber=9892381","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T22:59:23Z","timestamp":1667516363000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9892381\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9892381","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}