{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T15:47:07Z","timestamp":1774453627444,"version":"3.50.1"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T00:00:00Z","timestamp":1698796800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T00:00:00Z","timestamp":1698796800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T00:00:00Z","timestamp":1698796800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2018AAA0101400"],"award-info":[{"award-number":["2018AAA0101400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61921004"],"award-info":[{"award-number":["61921004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province of China","doi-asserted-by":"publisher","award":["BK20202006"],"award-info":[{"award-number":["BK20202006"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1109\/tnnls.2022.3151412","type":"journal-article","created":{"date-parts":[[2022,3,7]],"date-time":"2022-03-07T20:45:04Z","timestamp":1646685904000},"page":"8493-8502","source":"Crossref","is-referenced-by-count":7,"title":["Learning a World Model With Multitimescale Memory Augmentation"],"prefix":"10.1109","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4610-3454","authenticated-orcid":false,"given":"Wenzhe","family":"Cai","sequence":"first","affiliation":[{"name":"School of Automation, Southeast University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1802-0435","authenticated-orcid":false,"given":"Teng","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Automation, Southeast University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5037-4658","authenticated-orcid":false,"given":"Jiawei","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Control Science and Engineering, Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9269-334X","authenticated-orcid":false,"given":"Changyin","family":"Sun","sequence":"additional","affiliation":[{"name":"School of Automation, Southeast University, Nanjing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref3","article-title":"DD-PPO: Learning near-perfect PointGoal navigators from 2.5 billion frames","author":"Wijmans","year":"2019","journal-title":"arXiv:1911.00357"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref6","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref7","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018","journal-title":"arXiv:1802.09477"},{"key":"ref8","article-title":"Sample efficient actor-critic with experience replay","author":"Wang","year":"2017","journal-title":"arXiv:1611.01224"},{"key":"ref9","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018","journal-title":"arXiv:1801.01290"},{"key":"ref10","article-title":"IMPALA: Scalable distributed deep-RL with importance weighted actor-learner architectures","author":"Espeholt","year":"2018","journal-title":"arXiv:1802.01561"},{"key":"ref11","article-title":"SEED RL: Scalable and efficient deep-RL with accelerated central inference","author":"Espeholt","year":"2019","journal-title":"arXiv:1910.06591"},{"key":"ref12","article-title":"Action-conditional video prediction using deep networks in atari games","author":"Oh","year":"2015","journal-title":"arXiv:1507.08750"},{"key":"ref13","article-title":"A deep learning approach for joint video frame and reward prediction in atari games","author":"Leibfried","year":"2016","journal-title":"arXiv:1611.07078"},{"key":"ref14","article-title":"Recurrent environment simulators","author":"Chiappa","year":"2017","journal-title":"arXiv:1704.02254"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref16","article-title":"Visual foresight: Model-based deep reinforcement learning for vision-based robotic control","author":"Ebert","year":"2018","journal-title":"arXiv:1812.00568"},{"key":"ref17","article-title":"Model-based reinforcement learning for atari","author":"Kaiser","year":"2019","journal-title":"arXiv:1903.00374"},{"key":"ref18","article-title":"Stochastic adversarial video prediction","author":"Lee","year":"2018","journal-title":"arXiv:1804.01523"},{"key":"ref19","article-title":"Learning latent dynamics for planning from pixels","author":"Hafner","year":"2018","journal-title":"arXiv:1811.04551"},{"key":"ref20","first-page":"81","article-title":"High fidelity video prediction with large stochastic recurrent neural networks","volume-title":"Proc. NIPS","author":"Villegas"},{"key":"ref21","article-title":"Dream to control: Learning behaviors by latent imagination","author":"Hafner","year":"2019","journal-title":"arXiv:1912.01603"},{"key":"ref22","article-title":"World models","author":"Ha","year":"2018","journal-title":"arXiv:1803.10122"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.179"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00470"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12276"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00936"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.292"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00151"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/286"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2690910"},{"key":"ref33","article-title":"Unifying count-based exploration and intrinsic motivation","author":"Bellemare","year":"2016","journal-title":"arXiv:1606.01868"},{"key":"ref34","article-title":"Count-based exploration with neural density models","author":"Ostrovski","year":"2017","journal-title":"arXiv:1703.01310"},{"key":"ref35","article-title":"Exploration: A study of count-based exploration for deep reinforcement learning","author":"Tang","year":"2017","journal-title":"arXiv:1611.04717"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5955"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref38","article-title":"Exploration by random network distillation","author":"Burda","year":"2018","journal-title":"arXiv:1810.12894"},{"key":"ref39","article-title":"Noisy networks for exploration","author":"Fortunato","year":"2018","journal-title":"arXiv:1706.10295"},{"key":"ref40","article-title":"Large-scale study of curiosity-driven learning","author":"Burda","year":"2018","journal-title":"arXiv:1808.04355"},{"key":"ref41","article-title":"Never give up: Learning directed exploration strategies","author":"Badia","year":"2020","journal-title":"arXiv:2002.06038"},{"key":"ref42","volume-title":"Episodic Curiosity Through Reachability","author":"Savinov","year":"2019"},{"key":"ref43","article-title":"Prioritized experience replay","author":"Schaul","year":"2016","journal-title":"arXiv:1511.05952"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10299535\/09729537.pdf?arnumber=9729537","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T02:01:25Z","timestamp":1705024885000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9729537\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11]]},"references-count":43,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2022.3151412","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11]]}}}