{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T14:03:14Z","timestamp":1780408994295,"version":"3.54.1"},"reference-count":88,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62022050"],"award-info":[{"award-number":["62022050"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62106144"],"award-info":[{"award-number":["62106144"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62021002"],"award-info":[{"award-number":["62021002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005090","name":"Beijing Nova Program","doi-asserted-by":"publisher","award":["Z201100006820041"],"award-info":[{"award-number":["Z201100006820041"]}],"id":[{"id":"10.13039\/501100005090","id-type":"DOI","asserted-by":"publisher"}]},{"name":"BNRist Innovation Fund","award":["BNR2021RC01002"],"award-info":[{"award-number":["BNR2021RC01002"]}]},{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0102"],"award-info":[{"award-number":["2021SHZDZX0102"]}]},{"name":"Shanghai Sailing Program","award":["21Z510202133"],"award-info":[{"award-number":["21Z510202133"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,2,1]]},"DOI":"10.1109\/tpami.2022.3165153","type":"journal-article","created":{"date-parts":[[2022,4,5]],"date-time":"2022-04-05T19:32:03Z","timestamp":1649187123000},"page":"2208-2225","source":"Crossref","is-referenced-by-count":510,"title":["PredRNN: A Recurrent Neural Network for Spatiotemporal Predictive Learning"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6224-2481","authenticated-orcid":false,"given":"Yunbo","family":"Wang","sequence":"first","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Haixu","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Software, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jianjin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Wangjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhifeng","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Software, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6841-7943","authenticated-orcid":false,"given":"Jianmin","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Software, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3491-5968","authenticated-orcid":false,"given":"Philip S.","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Software, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9421-463X","authenticated-orcid":false,"given":"Mingsheng","family":"Long","sequence":"additional","affiliation":[{"name":"School of Software, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"802","article-title":"Convolutional LSTM network: A machine learning approach for precipitation nowcasting","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Shi"},{"key":"ref2","first-page":"879","article-title":"PredRNN: Recurrent neural networks for predictive learning using spatiotemporal lstms","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/408"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00937"},{"key":"ref5","first-page":"153","article-title":"Learning to see physics via visual de-animation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref6","article-title":"Relational neural expectation maximization: Unsupervised discovery of objects and their interactions","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Van Steenkiste"},{"key":"ref7","first-page":"2688","article-title":"Neural relational inference for interacting systems","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kipf"},{"key":"ref8","article-title":"Unsupervised discovery of parts, structure, and dynamics","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Xu"},{"key":"ref9","article-title":"Eidetic 3D LSTM: A model for video prediction and beyond","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang"},{"key":"ref10","first-page":"2450","article-title":"Recurrent world models facilitate policy evolution","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Ha"},{"key":"ref11","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hafner"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref13","first-page":"344","article-title":"Self-supervised visual planning with temporal skip connections","author":"Ebert","year":"2016","journal-title":"Proc. Conf. Robot Learn."},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/1888.003.0013"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/5.58337"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref17","first-page":"1017","article-title":"Generating text with recurrent neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sutskever"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref19","article-title":"Towards end-to-end speech recognition with recurrent neural networks","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","author":"Graves"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref22","article-title":"On the number of response regions of deep feed forward networks with piece-wise linear activations","author":"Pascanu","year":"2013"},{"key":"ref23","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sutskever"},{"key":"ref24","first-page":"1171","article-title":"Scheduled sampling for sequence prediction with recurrent neural networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Bengio"},{"key":"ref25","first-page":"5617","article-title":"Deep learning for precipitation nowcasting: A benchmark and a new model","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Shi"},{"key":"ref26","first-page":"64","article-title":"Unsupervised learning for physical interaction through video prediction","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Finn"},{"key":"ref27","first-page":"5123","article-title":"PredRNN++: Towards a resolution of the deep-in-time dilemma in spatiotemporal predictive learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"key":"ref28","article-title":"Efficient and information-preserving future frame prediction and beyond","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yu"},{"key":"ref29","first-page":"13714","article-title":"Convolutional tensor-train LSTM for spatio-temporal learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Su"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3045007"},{"key":"ref31","first-page":"2863","article-title":"Action-conditional video prediction using deep networks in atari games","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Oh"},{"key":"ref32","article-title":"Deep multi-scale video prediction beyond mean square error","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mathieu"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00165"},{"key":"ref34","first-page":"843","article-title":"Unsupervised learning of video representations using LSTMs","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","author":"Srivastava"},{"key":"ref35","first-page":"9731","article-title":"Stochastic variational video prediction","volume-title":"Proc. IEEE\/CVF Int. Conf. Learn. Representations","author":"Babaeizadeh"},{"key":"ref36","article-title":"Scaling autoregressive video models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Weissenborn"},{"key":"ref37","article-title":"Videoflow: A flow-based generative model for video","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kumar"},{"key":"ref38","first-page":"91","article-title":"Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Xue"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10735"},{"key":"ref40","first-page":"2672","article-title":"Generative adversarial nets","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Goodfellow"},{"key":"ref41","first-page":"1486","article-title":"Deep generative image models using a Laplacian pyramid of adversarial networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Denton"},{"key":"ref42","first-page":"4271","article-title":"Temporal coherency based criteria for predicting video frames using deep multi-stage generative adversarial networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Bhattacharjee"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.194"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00558"},{"key":"ref45","first-page":"16761","article-title":"Hierarchical patch VAE-GAN: Generating diverse videos from a single sample","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Gur"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00076"},{"key":"ref47","article-title":"Video (language) modeling: A baseline for generative models of natural videos","author":"Ranzato","year":"2014"},{"key":"ref48","first-page":"3560","article-title":"Learning to generate long-term future via hierarchical prediction","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Villegas"},{"key":"ref49","first-page":"6038","article-title":"Hierarchical long-term video prediction without supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wichers"},{"key":"ref50","first-page":"11570","article-title":"Variational temporal abstraction","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kim"},{"key":"ref51","first-page":"1174","article-title":"Stochastic video generation with a learned prior","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Denton"},{"key":"ref52","article-title":"Stochastic adversarial video prediction","author":"Lee","year":"2018"},{"key":"ref53","first-page":"81","article-title":"High fidelity video prediction with large stochastic recurrent neural networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Villegas"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00770"},{"key":"ref55","first-page":"3233","article-title":"Stochastic latent residual video prediction","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Franceschi"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00235"},{"key":"ref57","article-title":"Decomposing motion and content for natural video sequence prediction","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Villegas"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00096"},{"key":"ref59","first-page":"4417","article-title":"Unsupervised learning of disentangled representations from video","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Denton"},{"key":"ref60","first-page":"517","article-title":"Learning to decompose and disentangle representations for video prediction","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Hsieh"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01197"},{"key":"ref62","article-title":"Unsupervised video decomposition using spatio-temporal iterative inference","author":"Zablotskaia","year":"2020"},{"key":"ref63","first-page":"2424","article-title":"Multi-object representation learning with iterative variational inference","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Greff"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01149"},{"key":"ref65","article-title":"Spatio-temporal video autoencoder with differentiable memory","volume-title":"Proc. Int. Conf. Learn. Representations Workshop","author":"Patraucean"},{"key":"ref66","article-title":"Deep predictive coding networks for video prediction and unsupervised learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lotter"},{"key":"ref67","first-page":"1771","article-title":"Video pixel networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kalchbrenner"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_46"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_44"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00158"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01518"},{"key":"ref72","article-title":"Neural turing machines","author":"Graves","year":"2014"},{"key":"ref73","first-page":"2440","article-title":"End-to-end memory networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sukhbaatar"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1038\/nature20101"},{"key":"ref75","article-title":"Distributed representations","author":"Hinton","year":"1986","journal-title":"Parallel Distributed Processing: Explorations in the Microstructure of Cognition. Volume 1: Foundations"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/72.846725"},{"issue":"Nov","key":"ref77","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref78","first-page":"231","article-title":"Neural network ensembles, cross validation, and active learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Krogh"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-73003-5_293"},{"key":"ref80","article-title":"Recurrent environment simulators","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chiappa"},{"key":"ref81","article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"ref83","article-title":"Traffic4cast 2019: Traffic map movie forecasting.","year":"2019"},{"key":"ref84","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"ref87","first-page":"667","article-title":"Dynamic filter networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"De Brabandere"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10008914\/09749915.pdf?arnumber=9749915","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:33:00Z","timestamp":1705537980000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9749915\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,1]]},"references-count":88,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3165153","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2,1]]}}}