{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T10:57:40Z","timestamp":1765018660874,"version":"3.46.0"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031736353"},{"type":"electronic","value":"9783031736360"}],"license":[{"start":{"date-parts":[[2024,11,5]],"date-time":"2024-11-05T00:00:00Z","timestamp":1730764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,5]],"date-time":"2024-11-05T00:00:00Z","timestamp":1730764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73636-0_17","type":"book-chapter","created":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T15:03:58Z","timestamp":1730732638000},"page":"286-304","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["PredBench: Benchmarking Spatio-Temporal Prediction Across Diverse Disciplines"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-8462-6819","authenticated-orcid":false,"given":"ZiDong","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0494-911X","authenticated-orcid":false,"given":"Zeyu","family":"Lu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8712-8747","authenticated-orcid":false,"given":"Di","family":"Huang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2772-9320","authenticated-orcid":false,"given":"Tong","family":"He","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1831-9952","authenticated-orcid":false,"given":"Xihui","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9163-2761","authenticated-orcid":false,"given":"Wanli","family":"Ouyang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3378-7201","authenticated-orcid":false,"given":"Lei","family":"Bai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,5]]},"reference":[{"key":"17_CR1","unstructured":"Babaeizadeh, M., Finn, C., Erhan, D., Campbell, R.H., Levine, S.: Stochastic variational video prediction. In: International Conference on Learning Representations (2018)"},{"key":"17_CR2","unstructured":"Babaeizadeh, M., Saffar, M.T., Nair, S., Levine, S., Finn, C., Erhan, D.: FitVid: overfitting in pixel-level video prediction. arXiv preprint arXiv:2106.13195 (2021)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Bi, K., Xie, L., Zhang, H., Chen, X., Gu, X., Tian, Q.: Accurate medium-range global weather forecasting with 3D neural networks. Nature (2023)","DOI":"10.1038\/s41586-023-06185-3"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"17_CR5","unstructured":"Chang, Z., et al.: MAU: a motion-aware unit for video prediction and beyond. In: Advances in Neural Information Processing Systems (2021)"},{"key":"17_CR6","unstructured":"Chen, K., et al.: FengWu: pushing the skillful global medium-range weather forecast beyond 10 days lead. arXiv preprint arXiv:2304.02948 (2023)"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: IEEE Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"17_CR8","unstructured":"Dasari, S., et al.: RoboNet: large-scale multi-robot learning. In: CoRL (2019)"},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Doll\u00e1r, P., Wojek, C., Schiele, B., Perona, P.: Pedestrian detection: a benchmark. In: IEEE Conference on Computer Vision and Pattern Recognition (2009)","DOI":"10.1109\/CVPRW.2009.5206631"},{"key":"17_CR10","unstructured":"Du, Y., et al.: Learning universal policies via text-guided video generation. In: Advances in Neural Information Processing Systems (2023)"},{"key":"17_CR11","unstructured":"Ebert, F., Finn, C., Lee, A.X., Levine, S.: Self-supervised visual planning with temporal skip connections. CoRL (2017)"},{"key":"17_CR12","unstructured":"Eichenberger, C., et al.: Traffic4cast at NeurIPS 2021 - temporal and spatial few-shot transfer learning in gridded geo-spatial processes. In: Kiela, D., Ciccone, M., Caputo, B. (eds.) Proceedings of the NeurIPS 2021 Competitions and Demonstrations Track. Proceedings of Machine Learning Research, vol.\u00a0176, pp. 97\u2013112. PMLR (2022)"},{"key":"17_CR13","unstructured":"Finn, C., Goodfellow, I.J., Levine, S.: Unsupervised learning for physical interaction through video prediction. In: Advances in Neural Information Processing Systems (2016)"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Gao, Z., Tan, C., Wu, L., Li, S.Z.: SimVP: simpler yet better video prediction. In: IEEE Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00317"},{"key":"17_CR15","unstructured":"Gao, Z., et al.: EarthFormer: exploring space-time transformers for earth system forecasting. In: Advances in Neural Information Processing Systems (2022)"},{"key":"17_CR16","unstructured":"Garg, S., Rasp, S., Thuerey, N.: WeatherBench probability: a benchmark dataset for probabilistic medium-range weather forecasting along with deep learning baseline models. arXiv preprint arXiv:2205.00865 (2022)"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Stiller, C., Urtasun, R.: Vision meets robotics: the KITTI dataset. IJRR (2013)","DOI":"10.1177\/0278364913491297"},{"key":"17_CR18","unstructured":"Guen, V.L., Thome, N.: Disentangling physical dynamics from unknown factors for unsupervised video prediction. In: IEEE Conference on Computer Vision and Pattern Recognition (2020)"},{"key":"17_CR19","unstructured":"Gupta, A., Tian, S., Zhang, Y., Wu, J., Mart\u00edn-Mart\u00edn, R., Fei-Fei, L.: MaskViT: masked visual pre-training for video prediction. In: International Conference on Learning Representations (2023)"},{"key":"17_CR20","doi-asserted-by":"crossref","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. (1997)","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"17_CR21","unstructured":"Hu, A., et al.: GAIA-1: a generative world model for autonomous driving. arXiv preprint arXiv:2309.17080 (2023)"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3.6m: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. (2013)","DOI":"10.1109\/TPAMI.2013.248"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Lam, R., et\u00a0al.: GraphCast: learning skillful medium-range global weather forecasting. Science (2023)","DOI":"10.1126\/science.adi2336"},{"key":"17_CR24","doi-asserted-by":"crossref","unstructured":"LeCun, Y., et al.: Backpropagation applied to handwritten zip code recognition. Neural Comput. (1989)","DOI":"10.1162\/neco.1989.1.4.541"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Lin, H., Gao, Z., Xu, Y., Wu, L., Li, L., Li, S.Z.: Conditional local convolution for spatio-temporal meteorological forecasting. In: AAAI (2022)","DOI":"10.1609\/aaai.v36i7.20711"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Rasp, S., et al.: WeatherBench 2: a benchmark for the next generation of data-driven global weather models. arXiv preprint arXiv:2011.13456 (2023)","DOI":"10.1029\/2023MS004019"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Ravuri, S.V., et al.: Skilful precipitation nowcasting using deep generative models of radar. Nature (2021)","DOI":"10.1038\/s41586-021-03854-z"},{"key":"17_CR28","doi-asserted-by":"crossref","unstructured":"Rumelhart, D.E., Hinton, G.E., Williams, R.J.: Learning representations by back-propagating errors. Nature (1986)","DOI":"10.21236\/ADA164453"},{"key":"17_CR29","doi-asserted-by":"crossref","unstructured":"Schuldt, C., Laptev, I., Caputo, B.: Recognizing human actions: a local SVM approach. In: International Conference on Learning Representations (2004)","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"17_CR30","unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D., Wong, W., Woo, W.: Convolutional LSTM network: a machine learning approach for precipitation nowcasting. In: Advances in Neural Information Processing Systems (2015)"},{"key":"17_CR31","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"17_CR32","unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using LSTMs. In: International Conference on Learning Representations (2015)"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: IEEE Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"17_CR34","unstructured":"Tan, C., Gao, Z., Li, S.Z.: SimVP: towards simple yet powerful spatiotemporal predictive learning. arXiv preprint arXiv:2211.12509 (2022)"},{"key":"17_CR35","doi-asserted-by":"crossref","unstructured":"Tan, C., et al.: Temporal attention unit: towards efficient spatiotemporal predictive learning. In: IEEE Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.01800"},{"key":"17_CR36","unstructured":"Tan, C., et al.: OpenSTL: a comprehensive benchmark of spatio-temporal predictive learning. arXiv preprint arXiv:2306.11249 (2023)"},{"key":"17_CR37","unstructured":"Unterthiner, T., van Steenkiste, S., Kurach, K., Marinier, R., Michalski, M., Gelly, S.: Towards accurate generative models of video: a new metric & challenges. arXiv preprint arXiv:1812.01717 (2018)"},{"key":"17_CR38","unstructured":"Historical climate observation and stimulation dataset. https:\/\/tianchi.aliyun.com\/dataset\/98942. Accessed 17 Nov 2023"},{"key":"17_CR39","unstructured":"Veillette, M., Samsi, S., Mattioli, C.: SEVIR: a storm event imagery dataset for deep learning applications in radar and satellite meteorology. In: Advances in Neural Information Processing Systems (2020)"},{"key":"17_CR40","unstructured":"Voleti, V., Jolicoeur-Martineau, A., Pal, C.: MCVD - masked conditional video diffusion for prediction, generation, and interpolation. In: Advances in Neural Information Processing Systems (2022)"},{"key":"17_CR41","unstructured":"Walke, H., et al.: BridgeData V2: a dataset for robot learning at scale. arXiv preprint arXiv:2308.12952 (2023)"},{"key":"17_CR42","unstructured":"Wang, Y., Gao, Z., Long, M., Wang, J., Yu, P.S.: PredRNN++: towards a resolution of the deep-in-time dilemma in spatiotemporal predictive learning. In: International Conference on Machine Learning (2018)"},{"key":"17_CR43","unstructured":"Wang, Y., Jiang, L., Yang, M., Li, L., Long, M., Fei-Fei, L.: Eidetic 3D LSTM: a model for video prediction and beyond. In: International Conference on Learning Representations (2019)"},{"key":"17_CR44","unstructured":"Wang, Y., Long, M., Wang, J., Gao, Z., Yu, P.S.: PredRNN: recurrent neural networks for predictive learning using spatiotemporal LSTMs. In: Advances in Neural Information Processing Systems (2017)"},{"key":"17_CR45","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: PredRNN: a recurrent neural network for spatiotemporal predictive learning. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2022.3165153"},{"key":"17_CR46","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. (2004)"},{"key":"17_CR47","doi-asserted-by":"crossref","unstructured":"Wu, B., Nair, S., Mart\u00edn-Mart\u00edn, R., Fei-Fei, L., Finn, C.: Greedy hierarchical variational autoencoders for large-scale video prediction. In: IEEE Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.00235"},{"key":"17_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zheng, Y., Qi, D., Li, R., Yi, X., Li, T.: Predicting citywide crowd flows using deep spatio-temporal residual networks. Artif. Intell. (2018)","DOI":"10.1016\/j.artint.2018.03.002"},{"key":"17_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: IEEE Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00068"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73636-0_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T10:52:52Z","timestamp":1765018372000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73636-0_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,5]]},"ISBN":["9783031736353","9783031736360"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73636-0_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,5]]},"assertion":[{"value":"5 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}