{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T04:04:05Z","timestamp":1749873845383,"version":"3.41.0"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T00:00:00Z","timestamp":1749772800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T00:00:00Z","timestamp":1749772800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07486-2","type":"journal-article","created":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T06:45:19Z","timestamp":1749797119000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["UST-SU: a U-shaped video prediction network based on partial autoregression"],"prefix":"10.1007","volume":"81","author":[{"given":"Zhaojun","family":"Cui","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fan","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shengqin","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,13]]},"reference":[{"issue":"5","key":"7486_CR1","doi-asserted-by":"publisher","first-page":"999","DOI":"10.1109\/TMM.2016.2639379","volume":"19","author":"H Kalbkhani","year":"2017","unstructured":"Kalbkhani H, Shayesteh MG, Haghighat N (2017) Adaptive LSTAR model for long-range variable bit rate video traffic prediction. IEEE Trans Multimed 19(5):999\u20131014. https:\/\/doi.org\/10.1109\/TMM.2016.2639379","journal-title":"IEEE Trans Multimed"},{"issue":"16","key":"7486_CR2","doi-asserted-by":"publisher","first-page":"18293","DOI":"10.1007\/s11227-023-05383-0","volume":"79","author":"Z Su","year":"2023","unstructured":"Su Z, Liu T, Hao X, Hu X (2023) Spatial-temporal graph convolutional networks for traffic flow prediction considering multiple traffic parameters. J Supercomput 79(16):18293\u201318312","journal-title":"J Supercomput"},{"key":"7486_CR3","unstructured":"Shi X, Chen Z, Wang H, Yeung D.-Y, Wong W.-K, Woo W.-c (2015) Convolutional LSTM network: a machine learning approach for precipitation nowcasting. In Advances in Neural Information Processing Systems 28"},{"key":"7486_CR4","doi-asserted-by":"crossref","unstructured":"Requena-Mesa C, Benson V, Reichstein M, Runge J, Denzler J (2021). Earthnet2021: a large-scale dataset and challenge for earth surface forecasting as a guided video prediction task. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 1132\u20131142","DOI":"10.1109\/CVPRW53098.2021.00124"},{"issue":"2","key":"7486_CR5","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1109\/TMM.2018.2862341","volume":"21","author":"D Li","year":"2019","unstructured":"Li D, Yao T, Duan L-Y, Mei T, Rui Y (2019) Unified spatio-temporal attention networks for action recognition in videos. IEEE Trans Multimed 21(2):416\u2013428. https:\/\/doi.org\/10.1109\/TMM.2018.2862341","journal-title":"IEEE Trans Multimed"},{"key":"7486_CR6","doi-asserted-by":"crossref","unstructured":"Kocabas M, Athanasiou N, Black MJ (2020) Vibe: video inference for human body pose and shape estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5253\u20135263","DOI":"10.1109\/CVPR42600.2020.00530"},{"issue":"8","key":"7486_CR7","doi-asserted-by":"publisher","first-page":"8322","DOI":"10.1007\/s11227-022-04973-8","volume":"79","author":"X Liao","year":"2023","unstructured":"Liao X, Yuan J, Cai Z, Lai Jh (2023) An attention-based bidirectional GRU network for temporal action proposals generation. J Supercomput 79(8):8322\u20138339","journal-title":"J Supercomput"},{"key":"7486_CR8","doi-asserted-by":"crossref","unstructured":"Sun P, Kretzschmar H, Dotiwalla X, Chouard A, Patnaik V, Tsui P, Guo J, Zhou Y, Chai Y, Caine B, et al (2020) Scalability in perception for autonomous driving: Waymo open dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2446\u20132454","DOI":"10.1109\/CVPR42600.2020.00252"},{"issue":"2","key":"7486_CR9","doi-asserted-by":"publisher","first-page":"2830","DOI":"10.1109\/LRA.2021.3062324","volume":"6","author":"VR Kumar","year":"2021","unstructured":"Kumar VR, Yogamani S, Rashed H, Sitsu G, Witt C, Leang I, Milz S, M\u00e4der P (2021) Omnidet: surround view cameras based multi-task visual perception network for autonomous driving. IEEE Robot Autom Lett 6(2):2830\u20132837","journal-title":"IEEE Robot Autom Lett"},{"key":"7486_CR10","doi-asserted-by":"crossref","unstructured":"Wu H, Yao Z, Wang J, Long M (2021) Motionrnn: a flexible model for video prediction with spacetime-varying motions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 15435\u201315444","DOI":"10.1109\/CVPR46437.2021.01518"},{"key":"7486_CR11","unstructured":"Wang Y, Gao Z, Long M, Wang J, Philip SY (2018) Predrnn++: towards a resolution of the deep-in-time dilemma in spatiotemporal predictive learning. In: International Conference on Machine Learning, pp 5123\u20135132"},{"issue":"2","key":"7486_CR12","doi-asserted-by":"publisher","first-page":"2208","DOI":"10.1109\/TPAMI.2022.3165153","volume":"45","author":"Y Wang","year":"2022","unstructured":"Wang Y, Wu H, Zhang J, Gao Z, Wang J, Philip SY, Long M (2022) Predrnn: a recurrent neural network for spatiotemporal predictive learning. IEEE Trans Pattern Anal Mach Intell 45(2):2208\u20132225","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7486_CR13","first-page":"11531","volume":"34","author":"Z Lin","year":"2020","unstructured":"Lin Z, Li M, Zheng Z, Cheng Y, Yuan C (2020) Self-attention ConvLSTM for spatiotemporal prediction. Proc AAAI Conf Artif Intell 34:11531\u201311538","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7486_CR14","unstructured":"Wang Y, Jiang L, Yang M-H, Li L-J, Long M, Fei-Fei L (2019) Eidetic 3D LSTM: a model for video prediction and beyond. In: International Conference on Learning Representations"},{"key":"7486_CR15","first-page":"26950","volume":"34","author":"Z Chang","year":"2021","unstructured":"Chang Z, Zhang X, Wang S, Ma S, Ye Y, Xinguang X, Gao W (2021) Mau: a motion-aware unit for video prediction and beyond. Adv Neural Inf Process Syst 34:26950\u201326962","journal-title":"Adv Neural Inf Process Syst"},{"key":"7486_CR16","doi-asserted-by":"publisher","first-page":"2354","DOI":"10.1109\/TMM.2022.3146721","volume":"25","author":"Z Chang","year":"2022","unstructured":"Chang Z, Zhang X, Wang S, Ma S, Gao W (2022) STAM: a spatiotemporal attention based memory for video prediction. IEEE Trans Multimed 25:2354","journal-title":"IEEE Trans Multimed"},{"key":"7486_CR17","doi-asserted-by":"crossref","unstructured":"Lee S, Kim HG, Choi DH, Kim H-I, Ro YM (2021) Video prediction recalling long-term motion context via memory alignment learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3054\u20133063","DOI":"10.1109\/CVPR46437.2021.00307"},{"key":"7486_CR18","unstructured":"Wang Y, Long M, Wang J, Gao Z, Yu PS (2017) PredRNN: recurrent neural networks for predictive learning using spatiotemporal LSTMS. In: Advances in Neural Information Processing Systems 30"},{"key":"7486_CR19","doi-asserted-by":"crossref","unstructured":"Ye X, Bilodeau G-A (2022) VPTR: efficient transformers for video prediction. In: 2022 26th International Conference on Pattern Recognition (ICPR), pp 3492\u20133499. IEEE","DOI":"10.1109\/ICPR56361.2022.9956707"},{"key":"7486_CR20","doi-asserted-by":"crossref","unstructured":"Gao Z, Tan C, Wu L, Li SZ (2022) SimVP: simpler yet better video prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3170\u20133180","DOI":"10.1109\/CVPR52688.2022.00317"},{"key":"7486_CR21","doi-asserted-by":"crossref","unstructured":"Wang Y, Zhang J, Zhu H, Long M, Wang J, Yu PS (2019) Memory in memory: a predictive neural network for learning higher-order non-stationarity from spatiotemporal dynamics. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 9154\u20139162","DOI":"10.1109\/CVPR.2019.00937"},{"key":"7486_CR22","doi-asserted-by":"crossref","unstructured":"Gao H, Xu H, Cai Q.-Z, Wang R, Yu F, Darrell T (2019) Disentangling propagation and generation for video prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 9006\u20139015","DOI":"10.1109\/ICCV.2019.00910"},{"key":"7486_CR23","unstructured":"Mathai M (2024) Deep learning-based video prediction. Doctorate dissertation, Santa Clara University. https:\/\/scholarcommons.scu.edu\/eng_phd_theses\/53"},{"key":"7486_CR24","doi-asserted-by":"crossref","unstructured":"Chang Z, Zhang X, Wang S, Ma S, Gao W (2022) STRPM: a spatiotemporal residual predictive model for high-resolution video prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 13946\u201313955","DOI":"10.1109\/CVPR52688.2022.01356"},{"key":"7486_CR25","doi-asserted-by":"crossref","unstructured":"Sun M, Wang W, Zhu X, Liu J (2023) Moso: decomposing motion, scene and object for video prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 18727\u201318737","DOI":"10.1109\/CVPR52729.2023.01796"},{"key":"7486_CR26","unstructured":"Yu W, Lu,Y, Easterbrook S, Fidler S (2019) Crevnet: conditionally reversible video prediction. arXiv preprint arXiv:1910.11577"},{"key":"7486_CR27","unstructured":"Gupta A, Tian S, Zhang Y, Wu J, Mart\u00edn-Mart\u00edn R, Fei-Fei L (2022) Maskvit: masked visual pre-training for video prediction. arXiv preprint arXiv:2206.11894"},{"key":"7486_CR28","doi-asserted-by":"crossref","unstructured":"Akan AK, Erdem E, Erdem A, G\u00fcney F (2021) SLAMP: stochastic latent appearance and motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 14728\u201314737","DOI":"10.1109\/ICCV48922.2021.01446"},{"key":"7486_CR29","unstructured":"Denton E, Fergus R (2018) Stochastic video generation with a learned prior. In: International Conference on Machine Learning, pp 1174\u20131183. PMLR"},{"key":"7486_CR30","unstructured":"Babaeizadeh M, Finn C, Erhan D, Campbell R.H, Levine S (2017) Stochastic variational video prediction. arXiv preprint arXiv:1710.11252"},{"key":"7486_CR31","doi-asserted-by":"crossref","unstructured":"Zhang Z, Hu J, Cheng W, Paudel D, Yang J (2024) Extdm: distribution extrapolation diffusion model for video prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 19310\u201319320","DOI":"10.1109\/CVPR52733.2024.01827"},{"key":"7486_CR32","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5\u20139, 2015, Proceedings, Part III 18, pp 234\u2013241. Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"7486_CR33","doi-asserted-by":"crossref","unstructured":"Zhou Z, Rahman\u00a0Siddiquee M.M, Tajbakhsh N, Liang J (2018) Unet++: a nested u-net architecture for medical image segmentation. In: Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support: 4th International Workshop, DLMIA 2018, and 8th International Workshop, ML-CDS 2018, Held in Conjunction with MICCAI 2018, Granada, Spain, September 20, 2018, Proceedings 4, pp 3\u201311. Springer","DOI":"10.1007\/978-3-030-00889-5_1"},{"key":"7486_CR34","doi-asserted-by":"crossref","unstructured":"Huang H, Lin L, Tong R, Hu H, Zhang Q, Iwamoto Y, Han X, Chen Y-W, Wu J (2020). Unet 3+: A full-scale connected Unet for medical image segmentation. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 1055\u20131059. IEEE","DOI":"10.1109\/ICASSP40776.2020.9053405"},{"issue":"1","key":"7486_CR35","doi-asserted-by":"publisher","first-page":"014006","DOI":"10.1117\/1.JMI.6.1.014006","volume":"6","author":"MZ Alom","year":"2019","unstructured":"Alom MZ, Yakopcic C, Hasan M, Taha TM, Asari VK (2019) Recurrent residual u-net for medical image segmentation. J Med Imaging 6(1):014006\u2013014006","journal-title":"J Med Imaging"},{"key":"7486_CR36","unstructured":"Oktay O, Schlemper J, Folgoc L.L, Lee M, Heinrich M, Misawa K, Mori K, McDonagh S, Hammerla NY, Kainz B et al (2018) Attention u-net: learning where to look for the pancreas. arXiv preprint arXiv:1804.03999"},{"key":"7486_CR37","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"7486_CR38","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"},{"key":"7486_CR39","unstructured":"Bengio S, Vinyals O, Jaitly N, Shazeer N (2015) Scheduled sampling for sequence prediction with recurrent neural networks. In: Advances in Neural Information Processing Systems 28"},{"key":"7486_CR40","doi-asserted-by":"crossref","unstructured":"Zhang J, Zheng Y, Qi D (2017) Deep spatio-temporal residual networks for citywide crowd flows prediction. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 31","DOI":"10.1609\/aaai.v31i1.10735"},{"issue":"4","key":"7486_CR41","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR, Simoncelli EP (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13(4):600\u2013612","journal-title":"IEEE Trans Image Process"},{"key":"7486_CR42","unstructured":"Unterthiner T, Van\u00a0Steenkiste S, Kurach K, Marinier R, Michalski M, Gelly S (2018). Towards accurate generative models of video: a new metric & challenges. arXiv preprint arXiv:1812.01717"},{"key":"7486_CR43","unstructured":"Srivastava N, Mansimov E, Salakhudinov R (2015) Unsupervised learning of video representations using LSTMS. In: International Conference on Machine Learning, pp 843\u2013852"},{"key":"7486_CR44","doi-asserted-by":"crossref","unstructured":"Zhang J, Zheng Y, Qi D (2017) Deep spatio-temporal residual networks for citywide crowd flows prediction. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 31","DOI":"10.1609\/aaai.v31i1.10735"},{"key":"7486_CR45","doi-asserted-by":"crossref","unstructured":"Schuldt C, Laptev I, Caputo B (2004) Recognizing human actions: a local SVM approach. In: Proceedings of the 17th International Conference on Pattern Recognition, 2004. ICPR 2004, vol 3, pp 32\u201336. IEEE","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"7486_CR46","doi-asserted-by":"crossref","unstructured":"Ionescu C, Papava D, Olaru V, Sminchisescu C (2013) Human3. 6m: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans Pattern Anal Mach Intell 36(7):1325\u20131339","DOI":"10.1109\/TPAMI.2013.248"},{"issue":"11","key":"7486_CR47","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger A, Lenz P, Stiller C, Urtasun R (2013) Vision meets robotics: the kitti dataset. Int J Robot Res 32(11):1231\u20131237","journal-title":"Int J Robot Res"},{"key":"7486_CR48","unstructured":"Villegas R, Yang J, Hong S, Lin X, Lee H (2017) Decomposing motion and content for natural video sequence prediction. arXiv preprint arXiv:1706.08033"},{"key":"7486_CR49","doi-asserted-by":"crossref","unstructured":"Tang S, Li C, Zhang P, Tang R (2023) Swinlstm: improving spatiotemporal prediction accuracy using swin transformer and LSTM. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 13470\u201313479","DOI":"10.1109\/ICCV51070.2023.01239"},{"key":"7486_CR50","doi-asserted-by":"crossref","unstructured":"Tan C, Gao Z, Wu L, Xu Y, Xia J, Li S, Li S.Z (2023) Temporal attention unit: towards efficient spatiotemporal predictive learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 18770\u201318782","DOI":"10.1109\/CVPR52729.2023.01800"},{"issue":"11","key":"7486_CR51","first-page":"13281","volume":"45","author":"Z Yao","year":"2023","unstructured":"Yao Z, Wang Y, Wu H, Wang J, Long M (2023) Modernn: harnessing spatiotemporal mode collapse in unsupervised predictive learning. IEEE Trans Pattern Anal Mach Intell 45(11):13281\u201313296","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7486_CR52","doi-asserted-by":"crossref","unstructured":"Tang Y, Dong P, Tang Z, Chu X, Liang J (2024) VMRNN: integrating vision mamba and LSTM for efficient and accurate spatiotemporal forecasting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5663\u20135673","DOI":"10.1109\/CVPRW63382.2024.00575"},{"key":"7486_CR53","unstructured":"Rusch TK, Chamberlain BP, Mahoney MW, Bronstein MM, Mishra S (2022) Gradient gating for deep multi-rate learning on graphs. arXiv preprint arXiv:2210.00513"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07486-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07486-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07486-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T06:45:39Z","timestamp":1749797139000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07486-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,13]]},"references-count":53,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2025,6]]}},"alternative-id":["7486"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07486-2","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,13]]},"assertion":[{"value":"20 May 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 June 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"1021"}}