{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T10:14:43Z","timestamp":1766139283547,"version":"3.44.0"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,5,10]],"date-time":"2025-05-10T00:00:00Z","timestamp":1746835200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,10]],"date-time":"2025-05-10T00:00:00Z","timestamp":1746835200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906087"],"award-info":[{"award-number":["61906087"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research and Development Program of Heilongjiang Province of China","award":["2022ZX01A15"],"award-info":[{"award-number":["2022ZX01A15"]}]},{"name":"Cultivation Plan Project of Qingdao Science and Technology Planning Park of China","award":["23-1-5-yqpy-11-qy"],"award-info":[{"award-number":["23-1-5-yqpy-11-qy"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s00530-025-01800-x","type":"journal-article","created":{"date-parts":[[2025,5,10]],"date-time":"2025-05-10T06:32:25Z","timestamp":1746858745000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["ST-GRU: spatiotemporal gated recurrent unit for video prediction"],"prefix":"10.1007","volume":"31","author":[{"given":"Jing","family":"Dong","sequence":"first","affiliation":[]},{"given":"Junzhuo","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Ben","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Chang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Cheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,10]]},"reference":[{"key":"1800_CR1","unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D.-Y., Wong, W.-K., Woo, W.-c.: Convolutional lstm network: a machine learning approach for precipitation nowcasting. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"1800_CR2","unstructured":"Shi, X., Gao, Z., Lausen, L., Wang, H., Yeung, D.-Y., Wong, W.-k., Woo, W.-c.: Deep learning for precipitation nowcasting: a benchmark and a new model. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"1800_CR3","unstructured":"Wang, Y., Long, M., Wang, J., Gao, Z., Yu, P.S.: Predrnn: recurrent neural networks for predictive learning using spatiotemporal lstms. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"1800_CR4","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zheng, Y., Qi, D.: Deep spatio-temporal residual networks for citywide crowd flows prediction. In: Thirty-first AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.10735"},{"key":"1800_CR5","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zhang, J., Zhu, H., Long, M., Wang, J., Yu, P.S.: Memory in memory: a predictive neural network for learning higher-order non-stationarity from spatiotemporal dynamics. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9154\u20139162 (2019)","DOI":"10.1109\/CVPR.2019.00937"},{"key":"1800_CR6","unstructured":"Oh, J., Guo, X., Lee, H., Lewis, R.L., Singh, S.: Action-conditional video prediction using deep networks in atari games. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"1800_CR7","unstructured":"Finn, C., Goodfellow, I., Levine, S.: Unsupervised learning for physical interaction through video prediction. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"issue":"6088","key":"1800_CR8","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning representations by back-propagating errors. Nature 323(6088), 533\u2013536 (1986)","journal-title":"Nature"},{"issue":"8","key":"1800_CR9","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"1800_CR10","doi-asserted-by":"crossref","unstructured":"Cho, K., Van\u00a0Merri\u00ebnboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., Bengio, Y.: Learning phrase representations using rnn encoder-decoder for statistical machine translation (2014). arXiv:1406.1078","DOI":"10.3115\/v1\/D14-1179"},{"key":"1800_CR11","unstructured":"Ranzato, M., Szlam, A., Bruna, J., Mathieu, M., Collobert, R., Chopra, S.: Video (language) modeling: a baseline for generative models of natural videos (2014). arXiv:1412.6604"},{"key":"1800_CR12","unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using lstms. In: International Conference on Machine Learning, pp. 843\u2013852 (2015). PMLR"},{"key":"1800_CR13","unstructured":"Ballas, N., Yao, L., Pal, C., Courville, A.C.: Delving deeper into convolutional networks for learning video representations. In: International Conference on Learning Representations (ICLR) (2016)"},{"key":"1800_CR14","unstructured":"Mathieu, M., Couprie, C., LeCun, Y.: Deep multi-scale video prediction beyond mean square error (2015). arXiv:1511.05440"},{"key":"1800_CR15","unstructured":"Babaeizadeh, M., Finn, C., Erhan, D., Campbell, R.H., Levine, S.: Stochastic variational video prediction (2017). arXiv:1710.11252"},{"key":"1800_CR16","unstructured":"Oprea, S., Martinez-Gonzalez, P., Garcia-Garcia, A., Castro-Vargas, J.A., Orts-Escolano, S., Garcia-Rodriguez, J., Argyros, A.: A review on deep learning techniques for video prediction. IEEE Trans. Pattern Anal. Mach. Intell. (2020)"},{"issue":"7553","key":"1800_CR17","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"1800_CR18","doi-asserted-by":"crossref","unstructured":"Xu, Z., Wang, Y., Long, M., Wang, J., KLiss, M.: Predcnn: predictive learning with cascade convolutions. In: IJCAI, pp. 2940\u20132947 (2018)","DOI":"10.24963\/ijcai.2018\/408"},{"key":"1800_CR19","unstructured":"Wang, Y., Jiang, L., Yang, M.-H., Li, L.-J., Long, M., Fei-Fei, L.: Eidetic 3D LSTM: a model for video prediction and beyond. In: International Conference on Learning Representations (2018)"},{"key":"1800_CR20","unstructured":"Kalchbrenner, N., Oord, A., Simonyan, K., Danihelka, I., Vinyals, O., Graves, A., Kavukcuoglu, K.: Video pixel networks. In: International Conference on Machine Learning, pp. 1771\u20131779 (2017). PMLR"},{"issue":"11","key":"1800_CR21","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"1800_CR22","unstructured":"Lee, A.X., Zhang, R., Ebert, F., Abbeel, P., Finn, C., Levine, S.: Stochastic adversarial video prediction. arXiv preprint arXiv:1804.01523 (2018)"},{"key":"1800_CR23","doi-asserted-by":"crossref","unstructured":"Kwon, Y.-H., Park, M.-G.: Predicting future frames using retrospective cycle gan. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00191"},{"key":"1800_CR24","doi-asserted-by":"crossref","unstructured":"Xu, Z., Du, J., Wang, J., Jiang, C., Ren, Y.: Satellite image prediction relying on gan and lstm neural networks. In: ICC 2019-2019 IEEE International Conference on Communications (ICC), pp. 1\u20136 (2019). IEEE","DOI":"10.1109\/ICC.2019.8761462"},{"key":"1800_CR25","unstructured":"Arjovsky, M., Bottou, L.: Towards principled methods for training generative adversarial networks (2017). arXiv:1701.04862"},{"key":"1800_CR26","doi-asserted-by":"crossref","unstructured":"Mei, K., Patel, V.: VIDM: video implicit diffusion models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 37, pp. 9117\u20139125 (2023)","DOI":"10.1609\/aaai.v37i8.26094"},{"key":"1800_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Hu, J., Cheng, W., Paudel, D., Yang, J.: ExtDM: distribution extrapolation diffusion model for video prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19310\u201319320 (2024)","DOI":"10.1109\/CVPR52733.2024.01827"},{"key":"1800_CR28","doi-asserted-by":"crossref","unstructured":"Shrivastava, G., Shrivastava, A.: Video prediction by modeling videos as continuous multi-dimensional processes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7236\u20137245 (2024)","DOI":"10.1109\/CVPR52733.2024.00691"},{"key":"1800_CR29","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"1800_CR30","doi-asserted-by":"crossref","unstructured":"Schuldt, C., Laptev, I., Caputo, B.: Recognizing human actions: a local svm approach. In: Proceedings of the 17th International Conference on Pattern Recognition, 2004. ICPR 2004, vol. 3, pp. 32\u201336 (2004). IEEE","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"1800_CR31","unstructured":"Ebert, F., Finn, C., Lee, A.X., Levine, S.: Self-supervised visual planning with temporal skip connections. In: CoRL, pp. 344\u2013356 (2017)"},{"key":"1800_CR32","unstructured":"Villegas, R., Yang, J., Hong, S., Lin, X., Lee, H.: Decomposing motion and content for natural video sequence prediction (2017). arXiv:1706.08033"},{"key":"1800_CR33","doi-asserted-by":"crossref","unstructured":"Lin, Z., Li, M., Zheng, Z., Cheng, Y., Yuan, C.: Self-attention ConvLSTM for spatiotemporal prediction. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 11531\u201311538 (2020)","DOI":"10.1609\/aaai.v34i07.6819"},{"issue":"4","key":"1800_CR34","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"1800_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"1800_CR36","unstructured":"Jia, X., De\u00a0Brabandere, B., Tuytelaars, T., Gool, L.V.: Dynamic filter networks. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"1800_CR37","doi-asserted-by":"crossref","unstructured":"Oliu, M., Selva, J., Escalera, S.: Folded recurrent neural networks for future video prediction. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 716\u2013731 (2018)","DOI":"10.1007\/978-3-030-01264-9_44"},{"key":"1800_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, J., Wang, Y., Long, M., Jianmin, W., Philip, S.Y.: Z-order recurrent neural networks for video prediction. In: 2019 IEEE International Conference on Multimedia and Expo (ICME), pp. 230\u2013235 (2019). IEEE","DOI":"10.1109\/ICME.2019.00048"},{"key":"1800_CR39","doi-asserted-by":"crossref","unstructured":"Ni, H., Shi, C., Li, K., Huang, S.X., Min, M.R.: Conditional image-to-video generation with latent flow diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18444\u201318455 (2023)","DOI":"10.1109\/CVPR52729.2023.01769"},{"key":"1800_CR40","first-page":"3","volume":"30","author":"AL Maas","year":"2013","unstructured":"Maas, A.L., Hannun, A.Y., Ng, A.Y., et al.: Rectifier nonlinearities improve neural network acoustic models. Proc. Icml 30, 3 (2013). (Citeseer)","journal-title":"Proc. Icml"},{"key":"1800_CR41","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization (2014). arXiv:1412.6980"},{"key":"1800_CR42","unstructured":"Bengio, S., Vinyals, O., Jaitly, N., Shazeer, N.: Scheduled sampling for sequence prediction with recurrent neural networks. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"1800_CR43","first-page":"13714","volume":"33","author":"J Su","year":"2020","unstructured":"Su, J., Byeon, W., Kossaifi, J., Huang, F., Kautz, J., Anandkumar, A.: Convolutional tensor-train lstm for spatio-temporal learning. Adv. Neural. Inf. Process. Syst. 33, 13714\u201313726 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1800_CR44","first-page":"23371","volume":"35","author":"V Voleti","year":"2022","unstructured":"Voleti, V., Jolicoeur-Martineau, A., Pal, C.: MCVD\u2014masked conditional video diffusion for prediction, generation, and interpolation. Adv. Neural. Inf. Process. Syst. 35, 23371\u201323385 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01800-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01800-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01800-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T09:00:43Z","timestamp":1757926843000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01800-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,10]]},"references-count":44,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1800"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01800-x","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,5,10]]},"assertion":[{"value":"21 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"241"}}