{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T22:49:38Z","timestamp":1778798978175,"version":"3.51.4"},"reference-count":69,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T00:00:00Z","timestamp":1714521600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62250062"],"award-info":[{"award-number":["62250062"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62106144"],"award-info":[{"award-number":["62106144"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U19B2035"],"award-info":[{"award-number":["U19B2035"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Municipal Science and Technology","award":["2021SHZDZX0102"],"award-info":[{"award-number":["2021SHZDZX0102"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Sailing Program","award":["21Z510202133"],"award-info":[{"award-number":["21Z510202133"]}]},{"name":"CCF-Tencent Rhino-Bird Open Research Fund"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1109\/tpami.2023.3335263","type":"journal-article","created":{"date-parts":[[2023,11,24]],"date-time":"2023-11-24T18:54:51Z","timestamp":1700852091000},"page":"2788-2803","source":"Crossref","is-referenced-by-count":4,"title":["Model-Based Reinforcement Learning With Isolated Imaginations"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4666-510X","authenticated-orcid":false,"given":"Minting","family":"Pan","sequence":"first","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7974-047X","authenticated-orcid":false,"given":"Xiangming","family":"Zhu","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3066-4910","authenticated-orcid":false,"given":"Yitao","family":"Zheng","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6224-2481","authenticated-orcid":false,"given":"Yunbo","family":"Wang","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4029-3322","authenticated-orcid":false,"given":"Xiaokang","family":"Yang","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"23178","article-title":"Iso-dream: Isolating and leveraging noncontrollable visual dynamics in world models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Pan"},{"key":"ref2","first-page":"2863","article-title":"Action-conditional video prediction using deep networks in atari games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Oh"},{"key":"ref3","first-page":"2455","article-title":"Recurrent world models facilitate policy evolution","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ha"},{"key":"ref4","article-title":"Visual foresight: Model-based deep reinforcement learning for vision-based robotic control","author":"Ebert","year":"2018"},{"key":"ref5","first-page":"6118","article-title":"Value prediction network","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Oh"},{"key":"ref6","article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner"},{"key":"ref7","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sekar"},{"key":"ref8","article-title":"Mastering atari with discrete world models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner"},{"key":"ref9","article-title":"Model-based reinforcement learning for Atari","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kaiser"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref11","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hafner"},{"key":"ref12","first-page":"4114","article-title":"Challenging common assumptions in the unsupervised learning of disentangled representations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Locatello"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01614"},{"key":"ref15","first-page":"11525","article-title":"Object-centric learning with slot attention","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Locatello"},{"key":"ref16","article-title":"Recurrent independent mechanisms","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Goyal"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref18","first-page":"1","article-title":"CARLA: An open urban driving simulator","volume-title":"Proc. Conf. Robot Learn.","author":"Dosovitskiy"},{"key":"ref19","article-title":"Deepmind control suite","author":"Tassa","year":"2018"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561103"},{"key":"ref21","article-title":"Mastering diverse domains through world models","author":"Hafner","year":"2023"},{"key":"ref22","first-page":"4956","article-title":"DreamerPro: Reconstruction-free model-based reinforcement learning with prototypical representations","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Deng"},{"key":"ref23","first-page":"5639","article-title":"CURL: Contrastive unsupervised representations for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Laskin"},{"key":"ref24","first-page":"3680","article-title":"Stabilizing deep Q-learning with convnets and vision transformers under data augmentation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hansen"},{"key":"ref25","article-title":"Soft actor-critic algorithms and applications","author":"Haarnoja","year":"2018"},{"key":"ref26","article-title":"Learning invariant representations for reinforcement learning without reconstruction","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang"},{"key":"ref27","first-page":"22591","article-title":"Denoised MDPs: Learning world models better than the world itself","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"ref29","article-title":"Image augmentation is all you need: Regularizing deep reinforcement learning from pixels","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yarats"},{"key":"ref30","first-page":"19884","article-title":"Reinforcement learning with augmented data","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Laskin"},{"key":"ref31","article-title":"Information prioritization through empowerment in visual model-based RL","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Bharadhwaj"},{"key":"ref32","first-page":"26820","article-title":"Learning general world models in a handful of reward-free deployments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref33","first-page":"23150","article-title":"When to update your model: Constrained model-based reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ji"},{"key":"ref34","article-title":"Planning in stochastic environments with a learned model","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Antonoglou"},{"key":"ref35","first-page":"64","article-title":"Unsupervised learning for physical interaction through video prediction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Finn"},{"key":"ref36","article-title":"Recurrent environment simulators","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chiappa"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3165153"},{"key":"ref38","article-title":"FitVid: Overfitting in pixel-level video prediction","author":"Babaeizadeh","year":"2021"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968597"},{"key":"ref40","first-page":"843","article-title":"Unsupervised learning of video representations using LSTMs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Srivastava"},{"key":"ref41","first-page":"802","article-title":"Convolutional LSTM network: A machine learning approach for precipitation nowcasting","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Shi"},{"key":"ref42","first-page":"613","article-title":"Generating videos with scene dynamics","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Vondrick"},{"key":"ref43","first-page":"4271","article-title":"Temporal coherency based criteria for predicting video frames using deep multi-stage generative adversarial networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Bhattacharjee"},{"key":"ref44","first-page":"879","article-title":"PredRNN: Recurrent neural networks for predictive learning using spatiotemporal LSTMs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref45","first-page":"3560","article-title":"Learning to generate long-term future via hierarchical prediction","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Villegas"},{"key":"ref46","first-page":"6038","article-title":"Hierarchical long-term video prediction without supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wichers"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_44"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_44"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01258-8_11"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00158"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00461"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00171"},{"key":"ref53","article-title":"Stochastic variational video prediction","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Babaeizadeh"},{"key":"ref54","first-page":"1174","article-title":"Stochastic video generation with a learned prior","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Denton"},{"key":"ref55","first-page":"81","article-title":"High fidelity video prediction with large stochastic recurrent neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Villegas"},{"key":"ref56","first-page":"11570","article-title":"Variational temporal abstraction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kim"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00770"},{"key":"ref58","first-page":"3233","article-title":"Stochastic latent residual video prediction","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Franceschi"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00235"},{"key":"ref60","article-title":"Relational neural expectation maximization: Unsupervised discovery of objects and their interactions","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chang"},{"key":"ref61","first-page":"517","article-title":"Learning to decompose and disentangle representations for video prediction","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hsieh"},{"key":"ref62","first-page":"2424","article-title":"Multi-object representation learning with iterative variational inference","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Greff"},{"key":"ref63","article-title":"Unsupervised video decomposition using spatio-temporal iterative inference","author":"Zablotskaia","year":"2020"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00096"},{"key":"ref65","first-page":"6694","article-title":"Neural expectation maximization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Greff"},{"key":"ref66","first-page":"8615","article-title":"Sequential attend, infer, repeat: Generative modelling of moving objects","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kosiorek"},{"key":"ref67","article-title":"Decomposing motion and content for natural video sequence prediction","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Villegas"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01149"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01197"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10490207\/10328687.pdf?arnumber=10328687","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T17:50:00Z","timestamp":1743616200000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10328687\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5]]},"references-count":69,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3335263","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5]]}}}