{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:31:33Z","timestamp":1773318693950,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":89,"publisher":"ACM","funder":[{"name":"U.S. DOE Office of Science-Advanced Scientific Computing Research Program","award":["DE-AC02-06CH11357"],"award-info":[{"award-number":["DE-AC02-06CH11357"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3772094","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"72-85","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["AERIS: Argonne Earth Systems Model for Reliable and Skillful Predictions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4137-3609","authenticated-orcid":false,"given":"V\u00e4in\u00f6","family":"Hatanp\u00e4\u00e4","sequence":"first","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1082-4850","authenticated-orcid":false,"given":"Eugene","family":"Ku","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2354-2373","authenticated-orcid":false,"given":"Jason","family":"Stock","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Fort Collins, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6279-0007","authenticated-orcid":false,"given":"Murali","family":"Emani","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9981-0876","authenticated-orcid":false,"given":"Sam","family":"Foreman","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1899-7860","authenticated-orcid":false,"given":"Chunyong","family":"Jung","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory (ANL), Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0437-8655","authenticated-orcid":false,"given":"Sandeep","family":"Madireddy","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5685-039X","authenticated-orcid":false,"given":"Tung","family":"Nguyen","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, Los Angeles, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0366-4186","authenticated-orcid":false,"given":"Varuni","family":"Sastry","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7255-7647","authenticated-orcid":false,"given":"Ray A. O.","family":"Sinurat","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9008-9552","authenticated-orcid":false,"given":"Huihuo","family":"Zheng","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3271-4145","authenticated-orcid":false,"given":"Sam","family":"Wheeler","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory (ANL), Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9359-6020","authenticated-orcid":false,"given":"Troy","family":"Arcomano","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory (ANL), Seattle, USA and Allen Institute for AI, Seattle, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7248-6116","authenticated-orcid":false,"given":"Venkatram","family":"Vishwanath","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2612-7590","authenticated-orcid":false,"given":"Rao","family":"Kotamarthi","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory (ANL), Lemont, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"[n. d.]. IPEX code repository. https:\/\/github.com\/intel\/intel-extension-for-pytorch"},{"key":"e_1_3_3_2_3_2","unstructured":"[n. d.]. oneCCL code repository. https:\/\/github.com\/uxlfoundation\/oneCCL"},{"key":"e_1_3_3_2_4_2","unstructured":"[n. d.]. TOP500 List. https:\/\/www.top500.org\/lists\/top500\/"},{"key":"e_1_3_3_2_5_2","unstructured":"[n. d.]. WeatherBench2 repository. https:\/\/weatherbench2.readthedocs.io\/en\/latest\/"},{"key":"e_1_3_3_2_6_2","unstructured":"Michael\u00a0S Albergo Nicholas\u00a0M Boffi and Eric Vanden-Eijnden. 2023. Stochastic interpolants: A unifying framework for flows and diffusions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08797 (2023)."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Troy Arcomano Istvan Szunyogh Jaideep Pathak Alexander Wikner Brian\u00a0R. Hunt and Edward Ott. 2020. A Machine Learning-Based Global Atmospheric Forecast Model. Geophysical Research Letters 47 (2020) e2020GL087776.","DOI":"10.1029\/2020GL087776"},{"key":"e_1_3_3_2_8_2","unstructured":"Jinze Bai Shuai Bai Yunfei Chu Zeyu Cui Kai Dang Xiaodong Deng Yang Fan Wenbin Ge Yu Han Fei Huang et\u00a0al. 2023. Qwen technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.16609 (2023)."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Peter Bauer Alan Thorpe and Gilbert Brunet. 2015. The quiet revolution of numerical weather prediction. Nature 525 7567 (2015) 47\u201355.","DOI":"10.1038\/nature14956"},{"key":"e_1_3_3_2_10_2","unstructured":"Kaifeng Bi Lingxi Xie Hengheng Zhang Xin Chen Xiaotao Gu and Qi Tian. 2022. Pangu-Weather: A 3D High-Resolution Model for Fast and Accurate Global Weather Forecast. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.02556 (2022)."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Kaifeng Bi Lingxi Xie Hengheng Zhang Xin Chen Xiaotao Gu and Qi Tian. 2023. Accurate medium-range global weather forecasting with 3D neural networks. Nature 619 7970 (2023) 533\u2013538.","DOI":"10.1038\/s41586-023-06185-3"},{"key":"e_1_3_3_2_12_2","unstructured":"Xiao Bi Deli Chen Guanting Chen Shanhuang Chen Damai Dai Chengqi Deng Honghui Ding Kai Dong Qiushi Du Zhe Fu et\u00a0al. 2024. Deepseek llm: Scaling open-source language models with longtermism. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.02954 (2024)."},{"key":"e_1_3_3_2_13_2","unstructured":"Ashesh Chattopadhyay Y.\u00a0Qiang Sun and Pedram Hassanzadeh. 2024. Challenges of learning multi-scale dynamics with AI weather models: Implications for stability and one solution. arxiv:https:\/\/arXiv.org\/abs\/2304.07029\u00a0[physics.flu-dyn] https:\/\/arxiv.org\/abs\/2304.07029"},{"key":"e_1_3_3_2_14_2","unstructured":"Kang Chen Tao Han Junchao Gong Lei Bai Fenghua Ling Jing-Jia Luo Xi Chen Leiming Ma Tianning Zhang Rui Su Yuanzheng Ci Bin Li Xiaokang Yang and Wanli Ouyang. 2023. FengWu: Pushing the Skillful Global Medium-range Weather Forecast beyond 10 Days Lead. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.02948 (2023)."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Lei Chen Xiaohui Zhong Feng Zhang Yuan Cheng Yinghui Xu Yuan Qi and Hao Li. 2023. FuXi: A cascade machine learning forecasting system for 15-day global weather forecast. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.12873 (2023).","DOI":"10.1038\/s41612-023-00512-1"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Mariana\u00a0CA Clare Omar Jamil and Cyril\u00a0J Morcrette. 2021. Combining distribution-based neural networks to predict weather forecast probabilities. Quarterly Journal of the Royal Meteorological Society 147 741 (2021) 4337\u20134357.","DOI":"10.1002\/qj.4180"},{"key":"e_1_3_3_2_17_2","unstructured":"Guillaume Couairon Renu Singh Anastase Charantonis Christian Lessig and Claire Monteleoni. 2024. Archesweather & archesweathergen: a deterministic and generative model for efficient ml weather forecasting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.12971 (2024)."},{"key":"e_1_3_3_2_18_2","first-page":"7480","volume-title":"International Conference on Machine Learning","author":"Dehghani Mostafa","year":"2023","unstructured":"Mostafa Dehghani, Josip Djolonga, Basil Mustafa, Piotr Padlewski, Jonathan Heek, Justin Gilmer, Andreas\u00a0Peter Steiner, Mathilde Caron, Robert Geirhos, Ibrahim Alabdulmohsin, et\u00a0al. 2023. Scaling vision transformers to 22 billion parameters. In International Conference on Machine Learning. PMLR, 7480\u20137512."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00013"},{"key":"e_1_3_3_2_20_2","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arxiv:https:\/\/arXiv.org\/abs\/2010.11929\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2010.11929"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","unstructured":"P.\u00a0D. Dueben and P. Bauer. 2018. Challenges and design choices for global weather and climate models based on machine learning. Geoscientific Model Development 11 10 (2018) 3999\u20134009. 10.5194\/gmd-11-3999-2018","DOI":"10.5194\/gmd-11-3999-2018"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Imme Ebert-Uphoff Lander Ver\u00a0Hoef John\u00a0S Schreck Jason Stock Maria\u00a0J Molina Amy McGovern Michael Yu Bill Petzke Kyle Hilburn David\u00a0M Hall et\u00a0al. 2025. Measuring Sharpness of AI-Generated Meteorological Imagery. Artificial Intelligence for the Earth Systems (2025).","DOI":"10.1175\/AIES-D-24-0083.1"},{"key":"e_1_3_3_2_23_2","unstructured":"Zhihan Gao Xingjian Shi Hao Wang Yi Zhu Yuyang Wang Mu Li and Dit-Yan Yeung. 2023. Earthformer: Exploring Space-Time Transformers for Earth System Forecasting. arxiv:https:\/\/arXiv.org\/abs\/2207.05833\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2207.05833"},{"key":"e_1_3_3_2_24_2","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et\u00a0al. 2024. The llama 3 herd of models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.21783 (2024)."},{"key":"e_1_3_3_2_25_2","unstructured":"Tao Han Song Guo Fenghua Ling Kang Chen Junchao Gong Jingjia Luo Junxia Gu Kan Dai Wanli Ouyang and Lei Bai. 2024. Fengwu-ghr: Learning the kilometer-scale medium-range global weather forecasting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.00059 (2024)."},{"key":"e_1_3_3_2_26_2","first-page":"289","volume-title":"European Conference on Computer Vision","author":"Heo Byeongho","year":"2024","unstructured":"Byeongho Heo, Song Park, Dongyoon Han, and Sangdoo Yun. 2024. Rotary position embedding for vision transformer. In European Conference on Computer Vision. Springer, 289\u2013305."},{"key":"e_1_3_3_2_27_2","unstructured":"H Hersbach B Bell P Berrisford G Biavati A Hor\u00e1nyi J Mu\u00f1oz\u00a0Sabater J Nicolas C Peubey R Radu I Rozum et\u00a0al. 2018. ERA5 hourly data on single levels from 1979 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS) 10 (2018)."},{"key":"e_1_3_3_2_28_2","unstructured":"Hans Hersbach Bill Bell Paul Berrisford Shoji Hirahara Andr\u00e1s Hor\u00e1nyi Joaqu\u00edn Mu\u00f1oz-Sabater Julien Nicolas Carole Peubey Raluca Radu Dinand Schepers et\u00a0al. 2020. The ERA5 global reanalysis. Quarterly Journal of the Royal Meteorological Society 146 730 (2020) 1999\u20132049."},{"key":"e_1_3_3_2_29_2","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020) 6840\u20136851."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Ernest Hovm\u00f6ller. 1949. The trough-and-ridge diagram. Tellus 1 2 (1949) 62\u201366.","DOI":"10.3402\/tellusa.v1i2.8498"},{"key":"e_1_3_3_2_31_2","unstructured":"Yanping Huang Youlong Cheng Ankur Bapna Orhan Firat Mia\u00a0Xu Chen Dehao Chen HyoukJoong Lee Jiquan Ngiam Quoc\u00a0V. Le Yonghui Wu and Zhifeng Chen. 2019. GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism. arxiv:https:\/\/arXiv.org\/abs\/1811.06965\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/1811.06965"},{"key":"e_1_3_3_2_32_2","unstructured":"Sam\u00a0Ade Jacobs Masahiro Tanaka Chengming Zhang Minjia Zhang Shuaiwen\u00a0Leon Song Samyam Rajbhandari and Yuxiong He. 2023. DeepSpeed Ulysses: System Optimizations for Enabling Training of Extreme Long Sequence Transformer Models. arxiv:https:\/\/arXiv.org\/abs\/2309.14509\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2309.14509"},{"key":"e_1_3_3_2_33_2","unstructured":"Jared Kaplan Sam McCandlish Tom Henighan Tom\u00a0B Brown Benjamin Chess Rewon Child Scott Gray Alec Radford Jeffrey Wu and Dario Amodei. 2020. Scaling laws for neural language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2001.08361 (2020)."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"crossref","unstructured":"Tero Karras Miika Aittala Timo Aila and Samuli Laine. 2022. Elucidating the design space of diffusion-based generative models. Advances in neural information processing systems 35 (2022) 26565\u201326577.","DOI":"10.52202\/068431-1926"},{"key":"e_1_3_3_2_35_2","unstructured":"Ryan Keisler. 2022. Forecasting global weather with graph neural networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2202.07575 (2022)."},{"key":"e_1_3_3_2_36_2","unstructured":"Vijay Korthikanti Jared Casper Sangkug Lym Lawrence McAfee Michael Andersch Mohammad Shoeybi and Bryan Catanzaro. 2022. Reducing Activation Recomputation in Large Transformer Models. arxiv:https:\/\/arXiv.org\/abs\/2205.05198\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2205.05198"},{"key":"e_1_3_3_2_37_2","unstructured":"Vijay Korthikanti Jared Casper Sangkug Lym Lawrence McAfee Michael Andersch Mohammad Shoeybi and Bryan Catanzaro. 2022. Reducing Activation Recomputation in Large Transformer Models. arxiv:https:\/\/arXiv.org\/abs\/2205.05198\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2205.05198"},{"key":"e_1_3_3_2_38_2","unstructured":"Remi Lam Alvaro Sanchez-Gonzalez Matthew Willson Peter Wirnsberger Meire Fortunato Ferran Alet Suman Ravuri Timo Ewalds Zach Eaton-Rosen Weihua Hu Alexander Merose Stephan Hoyer George Holland Oriol Vinyals Jacklynn Stott Alexander Pritzel Shakir Mohamed and Peter Battaglia. 2023. GraphCast: Learning skillful medium-range global weather forecasting. arxiv:https:\/\/arXiv.org\/abs\/2212.12794\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2212.12794"},{"key":"e_1_3_3_2_39_2","unstructured":"Remi Lam Alvaro Sanchez-Gonzalez Matthew Willson Peter Wirnsberger Meire Fortunato Alexander Pritzel Suman Ravuri Timo Ewalds Ferran Alet Zach Eaton-Rosen et\u00a0al. 2022. GraphCast: Learning skillful medium-range global weather forecasting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.12794 (2022)."},{"key":"e_1_3_3_2_40_2","unstructured":"Simon Lang Mark Rodwell and Dinand Schepers. 2023. IFS upgrade brings many improvements and unifies medium-range resolutions. ECMWF Newsletter 176 (2023) 21\u201328."},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00898"},{"key":"e_1_3_3_2_42_2","unstructured":"Zhengyang Liang Hao He Ceyuan Yang and Bo Dai. 2024. Scaling laws for diffusion transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.08184 (2024)."},{"key":"e_1_3_3_2_43_2","unstructured":"Yaron Lipman Ricky\u00a0TQ Chen Heli Ben-Hamu Maximilian Nickel and Matt Le. 2022. Flow matching for generative modeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.02747 (2022)."},{"key":"e_1_3_3_2_44_2","unstructured":"Hao Liu Matei Zaharia and Pieter Abbeel. 2023. Ring Attention with Blockwise Transformers for Near-Infinite Context. arxiv:https:\/\/arXiv.org\/abs\/2310.01889\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2310.01889"},{"key":"e_1_3_3_2_45_2","unstructured":"Xingchao Liu Chengyue Gong and Qiang Liu. 2022. Flow straight and fast: Learning to generate and transfer data with rectified flow. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2209.03003 (2022)."},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_3_2_48_2","unstructured":"Ze Liu Yutong Lin Yue Cao Han Hu Yixuan Wei Zheng Zhang Stephen Lin and Baining Guo. 2021. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. arxiv:https:\/\/arXiv.org\/abs\/2103.14030\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2103.14030"},{"key":"e_1_3_3_2_49_2","unstructured":"Cheng Lu and Yang Song. 2024. Simplifying stabilizing and scaling continuous-time consistency models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.11081 (2024)."},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Cheng Lu Yuhao Zhou Fan Bao Jianfei Chen Chongxuan Li and Jun Zhu. 2025. Dpm-solver++: Fast solver for guided sampling of diffusion probabilistic models. Machine Intelligence Research (2025) 1\u201322.","DOI":"10.1007\/s11633-025-1562-4"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"crossref","unstructured":"Peter Lynch. 2008. The origins of computer weather prediction and climate modeling. Journal of computational physics 227 7 (2008) 3431\u20133444.","DOI":"10.1016\/j.jcp.2007.02.034"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"crossref","unstructured":"Linus Magnusson and Erland K\u00e4ll\u00e9n. 2013. Factors influencing skill improvements in the ECMWF forecasting system. Monthly Weather Review 141 9 (2013) 3142\u20133153.","DOI":"10.1175\/MWR-D-12-00318.1"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"crossref","unstructured":"Morteza Mardani Noah Brenowitz Yair Cohen Jaideep Pathak Chieh-Yu Chen Cheng-Chin Liu Arash Vahdat Mohammad\u00a0Amin Nabian Tao Ge Akshay Subramaniam et\u00a0al. 2025. Residual corrective diffusion modeling for km-scale atmospheric downscaling. Communications Earth & Environment 6 1 (2025) 124.","DOI":"10.1038\/s43247-025-02042-5"},{"key":"e_1_3_3_2_54_2","unstructured":"Tung Nguyen Johannes Brandstetter Ashish Kapoor Jayesh\u00a0K Gupta and Aditya Grover. 2023. ClimaX: A foundation model for weather and climate. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.10343 (2023)."},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"crossref","unstructured":"Tung Nguyen Rohan Shah Hritik Bansal Troy Arcomano Romit Maulik Rao Kotamarthi Ian Foster Sandeep Madireddy and Aditya Grover. 2024. Scaling transformer neural networks for skillful and reliable medium-range weather forecasting. Advances in Neural Information Processing Systems 37 (2024) 68740\u201368771.","DOI":"10.52202\/079017-2196"},{"key":"e_1_3_3_2_56_2","unstructured":"Tung Nguyen Rohan Shah Hritik Bansal Troy Arcomano Romit Maulik Veerabhadra Kotamarthi Ian Foster Sandeep Madireddy and Aditya Grover. 2024. Scaling transformer neural networks for skillful and reliable medium-range weather forecasting. arxiv:https:\/\/arXiv.org\/abs\/2312.03876\u00a0[physics.ao-ph] https:\/\/arxiv.org\/abs\/2312.03876"},{"key":"e_1_3_3_2_57_2","unstructured":"R Pasch R Berg D Roberts and P Papin. 2021. Hurricane Laura (AL 132020). National Hurricane center tropical cyclone report (2021)."},{"key":"e_1_3_3_2_58_2","unstructured":"Jaideep Pathak Shashank Subramanian Peter Harrington Sanjeev Raja Ashesh Chattopadhyay Morteza Mardani Thorsten Kurth David Hall Zongyi Li Kamyar Azizzadenesheli et\u00a0al. 2022. Fourcastnet: A global data-driven high-resolution weather model using adaptive fourier neural operators. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2202.11214 (2022)."},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"e_1_3_3_2_61_2","unstructured":"Ilan Price Alvaro Sanchez-Gonzalez Ferran Alet Tom\u00a0R. Andersson Andrew El-Kadi Dominic Masters Timo Ewalds Jacklynn Stott Shakir Mohamed Peter Battaglia Remi Lam and Matthew Willson. 2024. GenCast: Diffusion-based ensemble forecasting for medium-range weather. arxiv:https:\/\/arXiv.org\/abs\/2312.15796\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2312.15796"},{"key":"e_1_3_3_2_62_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Qi Penghui","year":"2024","unstructured":"Penghui Qi, Xinyi Wan, Guangxing Huang, and Min Lin. 2024. Zero bubble (almost) pipeline parallelism. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_2_63_2","unstructured":"Samyam Rajbhandari Jeff Rasley Olatunji Ruwase and Yuxiong He. 2020. ZeRO: Memory Optimizations Toward Training Trillion Parameter Models. arxiv:https:\/\/arXiv.org\/abs\/1910.02054\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1910.02054"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"crossref","unstructured":"Stephan Rasp Peter\u00a0D Dueben Sebastian Scher Jonathan\u00a0A Weyn Soukayna Mouatadid and Nils Thuerey. 2020. WeatherBench: a benchmark data set for data-driven weather forecasting. Journal of Advances in Modeling Earth Systems 12 11 (2020) e2020MS002203.","DOI":"10.1029\/2020MS002203"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"crossref","unstructured":"Stephan Rasp Stephan Hoyer Alexander Merose Ian Langmore Peter Battaglia Tyler Russel Alvaro Sanchez-Gonzalez Vivian Yang Rob Carver Shreya Agrawal Matthew Chantry Zied\u00a0Ben Bouallegue Peter Dueben Carla Bromberg Jared Sisk Luke Barrington Aaron Bell and Fei Sha. 2023. WeatherBench\u00a02: A benchmark for the next generation of data-driven global weather models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.15560 (2023).","DOI":"10.1029\/2023MS004019"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"crossref","unstructured":"Stephan Rasp Stephan Hoyer Alexander Merose Ian Langmore Peter Battaglia Tyler Russel Alvaro Sanchez-Gonzalez Vivian Yang Rob Carver Shreya Agrawal Matthew Chantry Zied\u00a0Ben Bouallegue Peter Dueben Carla Bromberg Jared Sisk Luke Barrington Aaron Bell and Fei Sha. 2024. WeatherBench 2: A benchmark for the next generation of data-driven global weather models. arxiv:https:\/\/arXiv.org\/abs\/2308.15560\u00a0[physics.ao-ph] https:\/\/arxiv.org\/abs\/2308.15560","DOI":"10.1029\/2023MS004019"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"crossref","unstructured":"Stephan Rasp and Nils Thuerey. 2021. Data-driven medium-range weather prediction with a resnet pretrained on climate simulations: A new model for weatherbench. Journal of Advances in Modeling Earth Systems 13 2 (2021) e2020MS002405.","DOI":"10.1029\/2020MS002405"},{"key":"e_1_3_3_2_69_2","unstructured":"James Reed Pavel Belevich Ke Wen Howard Huang and Will Constable. 2022. PiPPy: Pipeline Parallelism for PyTorch. https:\/\/github.com\/pytorch\/PiPPy."},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"crossref","unstructured":"Sebastian Scher. 2018. Toward data-driven weather and climate forecasting: Approximating a simple general circulation model with deep learning. Geophysical Research Letters 45 22 (2018) 12\u2013616.","DOI":"10.1029\/2018GL080704"},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"publisher","unstructured":"T. Selz and G.\u00a0C. Craig. 2023. Can Artificial Intelligence-Based Weather Prediction Models Simulate the Butterfly Effect? Geophysical Research Letters 50 20 (2023) e2023GL105747. 10.1029\/2023GL105747 arXiv:https:\/\/agupubs.onlinelibrary.wiley.com\/doi\/pdf\/10.1029\/2023GL105747e2023GL105747 2023GL105747.","DOI":"10.1029\/2023GL105747"},{"key":"e_1_3_3_2_72_2","unstructured":"Noam Shazeer. 2020. Glu variants improve transformer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2002.05202 (2020)."},{"key":"e_1_3_3_2_73_2","unstructured":"Mohammad Shoeybi Mostofa Patwary Raul Puri Patrick LeGresley Jared Casper and Bryan Catanzaro. 2019. Megatron-lm: Training multi-billion parameter language models using model parallelism. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.08053 (2019)."},{"key":"e_1_3_3_2_74_2","unstructured":"Shoaib\u00a0Ahmed Siddiqui Jean Kossaifi Boris Bonev Christopher Choy Jan Kautz David Krueger and Kamyar Azizzadenesheli. 2024. Exploring the design space of deep-learning-based weather forecasting systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.07472 (2024)."},{"key":"e_1_3_3_2_75_2","first-page":"2256","volume-title":"International conference on machine learning","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. pmlr, 2256\u20132265."},{"key":"e_1_3_3_2_76_2","unstructured":"Yang Song Jascha Sohl-Dickstein Diederik\u00a0P Kingma Abhishek Kumar Stefano Ermon and Ben Poole. 2020. Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2011.13456 (2020)."},{"key":"e_1_3_3_2_77_2","volume-title":"Parameterization Schemes: Keys to Understanding Numerical Weather Prediction Models","author":"Stensrud David\u00a0J","year":"2009","unstructured":"David\u00a0J Stensrud. 2009. Parameterization Schemes: Keys to Understanding Numerical Weather Prediction Models. Cambridge University Press."},{"key":"e_1_3_3_2_78_2","volume-title":"NeurIPS 2025 Workshop on Tackling Climate Change with Machine Learning","author":"Stock Jason","year":"2025","unstructured":"Jason Stock, Troy Arcomano, and Rao Kotamarthi. 2025. Swift: An Autoregressive Consistency Model for Efficient Weather Forecasting. In NeurIPS 2025 Workshop on Tackling Climate Change with Machine Learning."},{"key":"e_1_3_3_2_79_2","unstructured":"Jason Stock Jaideep Pathak Yair Cohen Mike Pritchard Piyush Garg Dale Durran Morteza Mardani and Noah Brenowitz. 2024. DiffObs: Generative diffusion for global forecasting of satellite observations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.06517 (2024)."},{"key":"e_1_3_3_2_80_2","unstructured":"Alexander Tong Kilian Fatras Nikolay Malkin Guillaume Huguet Yanlei Zhang Jarrid Rector-Brooks Guy Wolf and Yoshua Bengio. 2023. Improving and generalizing flow-based generative models with minibatch optimal transport. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.00482 (2023)."},{"key":"e_1_3_3_2_81_2","unstructured":"Xiao Wang Siyan Liu Aristeidis Tsaris Jong-Youl Choi Ashwin Aji Ming Fan Wei Zhang Junqi Yin Moetasim Ashfaq Dan Lu and Prasanna Balaprakash. 2024. ORBIT: Oak Ridge Base Foundation Model for Earth System Predictability. arxiv:https:\/\/arXiv.org\/abs\/2404.14712\u00a0[physics.ao-ph] https:\/\/arxiv.org\/abs\/2404.14712"},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"crossref","unstructured":"Zelun Wang and Jyh-Charn Liu. 2021. Translating math formula images to LaTeX sequences using deep neural networks with sequence-level training. International Journal on Document Analysis and Recognition (IJDAR) 24 1 (2021) 63\u201375.","DOI":"10.1007\/s10032-020-00360-2"},{"key":"e_1_3_3_2_83_2","volume-title":"The modelling infrastructure of the Integrated Forecasting System: Recent advances and future challenges","author":"Wedi NP","year":"2015","unstructured":"NP Wedi, P Bauer, W Denoninck, M Diamantakis, M Hamrud, C Kuhnlein, S Malardel, K Mogensen, G Mozdzynski, and PK Smolarkiewicz. 2015. The modelling infrastructure of the Integrated Forecasting System: Recent advances and future challenges. European Centre for Medium-Range Weather Forecasts."},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0A Weyn Dale\u00a0R Durran and Rich Caruana. 2019. Can machines learn to predict weather? Using deep learning to predict gridded 500-hPa geopotential height from historical weather data. Journal of Advances in Modeling Earth Systems 11 8 (2019) 2680\u20132693.","DOI":"10.1029\/2019MS001705"},{"key":"e_1_3_3_2_85_2","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0A Weyn Dale\u00a0R Durran and Rich Caruana. 2020. Improving data-driven global weather prediction using deep convolutional neural networks on a cubed sphere. Journal of Advances in Modeling Earth Systems 12 9 (2020) e2020MS002109.","DOI":"10.1029\/2020MS002109"},{"key":"e_1_3_3_2_86_2","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0A Weyn Dale\u00a0R Durran Rich Caruana and Nathaniel Cresswell-Clay. 2021. Sub-seasonal forecasting with a large ensemble of deep-learning weather prediction models. Journal of Advances in Modeling Earth Systems 13 7 (2021) e2021MS002502.","DOI":"10.1029\/2021MS002502"},{"key":"e_1_3_3_2_87_2","unstructured":"Jared\u00a0D. Willard Peter Harrington Shashank Subramanian Ankur Mahesh Travis\u00a0A. O\u2019Brien and William\u00a0D. Collins. 2024. Analyzing and Exploring Training Recipes for Large-Scale Transformer-Based Weather Prediction. arxiv:https:\/\/arXiv.org\/abs\/2404.19630\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2404.19630"},{"key":"e_1_3_3_2_88_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"e_1_3_3_2_89_2","unstructured":"Biao Zhang and Rico Sennrich. 2019. Root mean square layer normalization. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_3_2_90_2","unstructured":"Yanli Zhao Andrew Gu Rohan Varma Liang Luo Chien-Chin Huang Min Xu Less Wright Hamid Shojanazeri Myle Ott Sam Shleifer Alban Desmaison Can Balioglu Pritam Damania Bernard Nguyen Geeta Chauhan Yuchen Hao Ajit Mathews and Shen Li. 2023. PyTorch FSDP: Experiences on Scaling Fully Sharded Data Parallel. arxiv:https:\/\/arXiv.org\/abs\/2304.11277\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2304.11277"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3772094","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:48:56Z","timestamp":1773254936000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3772094"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":89,"alternative-id":["10.1145\/3712285.3772094","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3772094","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}