{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T03:47:31Z","timestamp":1780372051000,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","funder":[{"name":"DOE Office of Science, Advanced Scientific Computing Research","award":["DE-AC05-00OR22725"],"award-info":[{"award-number":["DE-AC05-00OR22725"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3750720.3757282","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T11:42:38Z","timestamp":1766230958000},"page":"18-24","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Data Readiness for Scientific AI at Scale"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3639-3956","authenticated-orcid":false,"given":"Wesley","family":"Brewer","sequence":"first","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5882-0816","authenticated-orcid":false,"given":"Patrick","family":"Widener","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9356-1311","authenticated-orcid":false,"given":"Valentine","family":"Anantharaj","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0099-1559","authenticated-orcid":false,"given":"Feiyi","family":"Wang","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8973-7145","authenticated-orcid":false,"given":"Tom","family":"Beck","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5289-7460","authenticated-orcid":false,"given":"Arjun","family":"Shankar","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8745-7078","authenticated-orcid":false,"given":"Sarp","family":"Oral","sequence":"additional","affiliation":[{"name":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","volume-title":"Report of the Workshop on Advanced Fusion with Machine Learning, April 30-May 2, 2019","year":"2019","unstructured":"2019. Report of the Workshop on Advanced Fusion with Machine Learning, April 30-May 2, 2019. Technical Report. USDOE Office of Science (SC)(United States)."},{"key":"e_1_3_3_1_3_2","volume-title":"2023 Operational Assessment Oak Ridge Leadership Computing Facility","author":"Abraham Subil","year":"2024","unstructured":"Subil Abraham, Paul Abston, Ryan Adamson, Valentine Anantharaj, Ashley Barker, Aaron Barlow, Tom Beck, Katie Bethea, Josh Cunningham, Rafael Ferreira Da\u00a0Silva, et\u00a0al. 2024. 2023 Operational Assessment Oak Ridge Leadership Computing Facility. Technical Report. Oak Ridge National Laboratory (ORNL), Oak Ridge, TN (United States)."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","unstructured":"\u017diga Avsec Vikram Agarwal Daniel Visentin Joseph\u00a0R Ledsam Agnieszka Grabska-Barwi\u0144ska Kyle\u00a0R Taylor Yannis Assael John Jumper Pushmeet Kohli and David\u00a0R Kelley. 2021. Effective gene expression prediction from sequence by integrating long-range interactions. Nature Methods 18 10 (2021) 1196\u20131203. 10.1038\/s41592-021-01252-x","DOI":"10.1038\/s41592-021-01252-x"},{"key":"e_1_3_3_1_5_2","unstructured":"Luis Barroso-Luque Muhammed Shuaibi Xiang Fu Brandon\u00a0M. Wood Misko Dzamba Meng Gao Ammar Rizvi C.\u00a0Lawrence Zitnick and Zachary\u00a0W. Ulissi. 2024. Open Materials 2024 (OMat24) Inorganic Materials Dataset and Models. arXiv:https:\/\/arXiv.org\/abs\/24010.12771 (2024). arxiv:https:\/\/arXiv.org\/abs\/2410.12771\u00a0[cond-mat.mtrl-sci]"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"John\u00a0J. Bates and Jeffrey\u00a0L. Privette. 2012. A maturity model for assessing the completeness of climate data records. Eos Transactions American Geophysical Union 93 44 (2012) 441\u2013441. 10.1029\/2012EO440006 arXiv:https:\/\/agupubs.onlinelibrary.wiley.com\/doi\/pdf\/10.1029\/2012EO440006","DOI":"10.1029\/2012EO440006"},{"key":"e_1_3_3_1_7_2","unstructured":"Cristian Bodnar Wessel\u00a0P Bruinsma Ana Lucic Megan Stanley Anna Allen Johannes Brandstetter Patrick Garvan Maik Riechert Jonathan\u00a0A Weyn Haiyu Dong et\u00a0al. 2025. A foundation model for the Earth system. Nature (2025) 1\u20138."},{"key":"e_1_3_3_1_8_2","unstructured":"Wesley Brewer Aditya Kashi Sajal Dash Aristeidis Tsaris Junqi Yin Mallikarjun Shankar and Feiyi Wang. 2024. Scalable Artificial Intelligence for Science: Perspectives Methods and Exemplars. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.17812 (2024)."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","first-page":"3408","DOI":"10.1109\/BigData55660.2022.10020884","volume-title":"2022 IEEE International Conference on Big Data (Big Data)","author":"Cianciosa Mark","year":"2022","unstructured":"Mark Cianciosa, Richard Archibald, Wael Elwasif, Ana Gainaru, Jin\u00a0Myung Park, and Ross Whitfield. 2022. Adaptive Generation of Training Data for ML Reduced Model Creation. In 2022 IEEE International Conference on Big Data (Big Data). IEEE, 3408\u20133416."},{"key":"e_1_3_3_1_10_2","volume-title":"Integrated plasma and engineering design and assessment for Tokamak reactor components","author":"Collins Cami","year":"2023","unstructured":"Cami Collins, Jin\u00a0Myung Park, Rhea Barnett, Katarzyna Borowiec, Ehab Hassan, Paul Humrickhouse, Jeremy Lore, Kyungjin Kim, Vittorio Badalassi, and Phil Snyder. 2023. Integrated plasma and engineering design and assessment for Tokamak reactor components. Technical Report. Oak Ridge National Laboratory (ORNL), Oak Ridge, TN (United States)."},{"key":"e_1_3_3_1_11_2","unstructured":"Miles Cranmer Sam Greydanus Stephan Hoyer Peter Battaglia David Spergel and Shirley Ho. 2020. Lagrangian neural networks. arXiv:https:\/\/arXiv.org\/abs\/2003.04630 (2020)."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Stefano Curtarolo Wahyu Setyawan Gus\u00a0LW Hart Michal Jahnatek Roman\u00a0V Chepulskii Richard\u00a0H Taylor Shidong Wang Junkai Xue Kesong Yang Ohad Levy et\u00a0al. 2012. AFLOW: An automatic framework for high-throughput materials discovery. Computational Materials Science 58 (2012) 218\u2013226.","DOI":"10.1016\/j.commatsci.2012.02.005"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Jonas Degrave Federico Felici Jonas Buchli Michael Neunert Brendan Tracey Francesco Carpanese Timo Ewalds Roland Hafner Abbas Abdolmaleki Diego de Las\u00a0Casas et\u00a0al. 2022. Magnetic control of tokamak plasmas through deep reinforcement learning. Nature 602 7897 (2022) 414\u2013419.","DOI":"10.1038\/s41586-021-04301-9"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","unstructured":"DIII-D Team. 2024. DIII-D research to provide solutions for ITER and fusion energy. Nuclear Fusion 64 11 (2024) 112003. 10.1088\/1741-4326\/ad2fe9","DOI":"10.1088\/1741-4326\/ad2fe9"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16990-84"},{"key":"e_1_3_3_1_16_2","volume-title":"Strengthening and Democratizing the U.S. Artificial Intelligence Innovation Ecosystem: An Implementation Plan for a National Artificial Intelligence Research Resource","author":"Force National Artificial Intelligence Research Resource\u00a0Task","year":"2023","unstructured":"National Artificial Intelligence Research Resource\u00a0Task Force. 2023. Strengthening and Democratizing the U.S. Artificial Intelligence Innovation Ecosystem: An Implementation Plan for a National Artificial Intelligence Research Resource. Technical Report. National Artificial Intelligence Research Resource Task Force."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"T Fredian J Stillerman G Manduchi A Rigoni K Erickson and T Schr\u00f6der. 2018. MDSplus yesterday today and tomorrow. Fusion Engineering and Design 127 (2018) 106\u2013110.","DOI":"10.1016\/j.fusengdes.2017.12.010"},{"key":"e_1_3_3_1_18_2","unstructured":"Yashika Ghai Donald Spong Jacobo Varela Luis Garcia Juan Ortiz and Wisdom Dayok. 2024. Surrogate model of energetic particle transport in reactor-relevant fusion devices. Bulletin of the American Physical Society (2024)."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Kaveen Hiniduma Suren Byna and Jean\u00a0Luca Bez. 2025. Data readiness for AI: A 360-degree survey. Comput. Surveys 57 9 (2025) 1\u201339.","DOI":"10.1145\/3722214"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Darren\u00a0J Hsu Hao Lu Aditya Kashi Michael Matheson John Gounley Feiyi Wang Wayne Joubert and Jens Glaser. 2023. TwoFold: Highly accurate structure and affinity prediction for protein-ligand complexes from sequences. The International Journal of High Performance Computing Applications 37 6 (2023) 666\u2013682.","DOI":"10.1177\/10943420231201151"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","unstructured":"John Jumper Richard Evans Alexander Pritzel Tim Green Michael Figurnov Olaf Ronneberger Kathryn Tunyasuvunakool Russ Bates Augustin \u017d\u00eddek Anna Potapenko et\u00a0al. 2021. Highly accurate protein structure prediction with AlphaFold. Nature 596 7873 (2021) 583\u2013589. 10.1038\/s41586-021-03819-2","DOI":"10.1038\/s41586-021-03819-2"},{"key":"e_1_3_3_1_22_2","unstructured":"Patrick Kage Jay\u00a0C Rothenberger Pavlos Andreadis and Dimitrios\u00a0I Diochnos. 2024. A review of pseudo-labeling for computer vision. arXiv:https:\/\/arXiv.org\/abs\/2408.07221 (2024)."},{"key":"e_1_3_3_1_23_2","unstructured":"Aditya Kashi Hao Lu Wesley Brewer David Rogers Michael Matheson Mallikarjun Shankar and Feiyi Wang. 2024. Mixed-precision numerics in scientific applications: survey and perspectives. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.19322 (2024)."},{"key":"e_1_3_3_1_24_2","volume-title":"VA EDH Advanced Software Pipeline Framework Report: Enhancing Automation and Scalability","author":"Klasky Hilda","year":"2024","unstructured":"Hilda Klasky, Josh Grant, Midgie MacFarland, Heidi Hanson, Jodie Trafton, and Anuj Kapadia. 2024. VA EDH Advanced Software Pipeline Framework Report: Enhancing Automation and Scalability. Technical Report ORNL\/TM-2024\/3669. Oak Ridge National Laboratory (ORNL), Oak Ridge, TN (United States)."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Kurihana Takuya and Skluzacek Tyler and da Silva Rafael Ferreira da Silva and Anantharaj Valentine. 2024. Scalable Multi-Facility Workflows for Artificial Intelligence Applications in Climate Research. https:\/\/sc24.supercomputing.org\/proceedings\/workshops\/workshop_pages\/ws_xloop106.html","DOI":"10.1109\/SCW63240.2024.00266"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/1383529.1383533"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Chuizheng Meng Sam Griesemer Defu Cao Sungyong Seo and Yan Liu. 2025. When physics meets machine learning: A survey of physics-informed machine learning. Machine Learning for Computational Science and Engineering 1 1 (2025) 1\u201323.","DOI":"10.1007\/s44379-025-00016-0"},{"key":"e_1_3_3_1_28_2","unstructured":"National Artificial Intelligence Research Resource Task Force. 2023. Strengthening and Democratizing the U.S. Artificial Intelligence Innovation Ecosystem: An Implementation Plan for a National Artificial Intelligence Research Resource. U.S. Office of Science and Technology Policy & National Science Foundation."},{"key":"e_1_3_3_1_29_2","unstructured":"Tung Nguyen Johannes Brandstetter Ashish Kapoor Jayesh\u00a0K Gupta and Aditya Grover. 2023. ClimaX: A foundation model for weather and climate. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.10343 (2023)."},{"key":"e_1_3_3_1_30_2","unstructured":"Massimiliano\u00a0Lupo Pasini Jong\u00a0Youl Choi Kshitij Mehta Pei Zhang David Rogers Jonghyun Bae Khaled\u00a0Z Ibrahim Ashwin\u00a0M Aji Karl\u00a0W Schulz Jorda Polo et\u00a0al. 2024. Scalable Training of Graph Foundation Models for Atomistic Materials Modeling: A Case Study with HydraGNN. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.12909 (2024)."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Edward\u00a0O Pyzer-Knapp Matteo Manica Peter Staar Lucas Morin Patrick Ruch Teodoro Laino John\u00a0R Smith and Alessandro Curioni. 2025. Foundation models for materials discovery\u2013current state and future directions. npj Computational Materials 11 1 (2025) 61.","DOI":"10.1038\/s41524-025-01538-0"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Maziar Raissi Paris Perdikaris and George\u00a0E Karniadakis. 2019. Physics-informed neural networks: A deep learning framework for solving forward and inverse problems involving nonlinear partial differential equations. Journal of Computational physics 378 (2019) 686\u2013707.","DOI":"10.1016\/j.jcp.2018.10.045"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"crossref","unstructured":"M Romanelli R Coelho D Coster J Ferreira L Fleury S Henderson J Hollocombe F Imbeaux Tomas Jonsson L Kogan et\u00a0al. 2020. Code integration data verification and models validation using the ITER integrated modeling and analysis system (IMAS) in EUROfusion. Fusion Science and Technology 76 8 (2020) 894\u2013900.","DOI":"10.1080\/15361055.2020.1819751"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","unstructured":"Daniel Schwabe Katinka Becker Martin Seyferth Andreas Kla\u00df and Tobias Schaeffter. 2024. The METRIC-framework for assessing data quality for trustworthy AI in medicine: a systematic review. npj Digital Medicine 7 1 (03 Aug 2024) 203. 10.1038\/s41746-024-01196-4","DOI":"10.1038\/s41746-024-01196-4"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/e-Science58273.2023.10254822"},{"key":"e_1_3_3_1_36_2","unstructured":"TensorFlow Team. 2024. TFRecord and tf.train.Example. https:\/\/www.tensorflow.org\/tutorials\/load_data\/tfrecord. Accessed: 2024-06-09."},{"key":"e_1_3_3_1_37_2","volume-title":"Artificial Intelligence for Earth System Predictability: Workshop Report Executive Summary","author":"Energy U.S. Department of","year":"2022","unstructured":"U.S. Department of Energy. 2022. Artificial Intelligence for Earth System Predictability: Workshop Report Executive Summary. Technical Report. U.S. Department of Energy, Office of Biological and Environmental Research (BER). https:\/\/www.ai4esp.org\/files\/EVS_AI4ESP_Executive_Summary.pdf"},{"key":"e_1_3_3_1_38_2","volume-title":"Artificial Intelligence in Health Care: Benefits and Challenges of Machine Learning Technologies for Medical Diagnostics","author":"Office U.S. Government Accountability","year":"2022","unstructured":"U.S. Government Accountability Office. 2022. Artificial Intelligence in Health Care: Benefits and Challenges of Machine Learning Technologies for Medical Diagnostics. Technical Report GAO-22-104629. U.S. Government Accountability Office (GAO). https:\/\/www.gao.gov\/assets\/gao-22-104629.pdf"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"crossref","unstructured":"Xiao Wang Aristeidis Tsaris Siyan Liu Jong-Youl Choi Ming Fan Wei Zhang Junqi Yin Moetasim Ashfaq Dan Lu and Prasanna Balaprakash. 2024. ORBIT: Oak Ridge Base Foundation Model for Earth System Predictability. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.14712 (2024).","DOI":"10.1109\/SC41406.2024.00007"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"crossref","unstructured":"Huimin Zhao Nathan Hillson Kerstin Kleese\u00a0van Dam and Deepti Tanjore. 2022. Artificial Intelligence and Machine Learning for Bioenergy Research: Opportunities and Challenges. (2022).","DOI":"10.2172\/1968870"}],"event":{"name":"ICPP Workshops '25: The 54th International Conference on Parallel Processing Workshops","location":"San Diego CA USA","acronym":"ICPP Workshops '25"},"container-title":["Workshop Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3750720.3757282","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T11:42:45Z","timestamp":1766230965000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3750720.3757282"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":39,"alternative-id":["10.1145\/3750720.3757282","10.1145\/3750720"],"URL":"https:\/\/doi.org\/10.1145\/3750720.3757282","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}