{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T18:35:50Z","timestamp":1758652550903,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,12]],"date-time":"2024-08-12T00:00:00Z","timestamp":1723420800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-AC02-06CH11357,DE-AC02-05CH11231"],"award-info":[{"award-number":["DE-AC02-06CH11357,DE-AC02-05CH11231"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CSSI-2104013,Core-2313154"],"award-info":[{"award-number":["CSSI-2104013,Core-2313154"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,12]]},"DOI":"10.1145\/3673038.3673070","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T18:29:01Z","timestamp":1723141741000},"page":"812-821","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Viper: A High-Performance I\/O Framework for Transparently Updating, Storing, and Transferring Deep Neural Network Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-3985-7896","authenticated-orcid":false,"given":"Jie","family":"Ye","sequence":"first","affiliation":[{"name":"Illinois Institute of Technology, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1694-6541","authenticated-orcid":false,"given":"Jaime","family":"Cernuda","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9719-9567","authenticated-orcid":false,"given":"Neeraj","family":"Rajesh","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4023-3485","authenticated-orcid":false,"given":"Keith","family":"Bateman","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5910-9221","authenticated-orcid":false,"given":"Orcun","family":"Yildiz","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0525-3205","authenticated-orcid":false,"given":"Tom","family":"Peterka","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4823-5311","authenticated-orcid":false,"given":"Arnur","family":"Nigmetov","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4330-6670","authenticated-orcid":false,"given":"Dmitriy","family":"Morozov","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1093-0792","authenticated-orcid":false,"given":"Xian-He","family":"Sun","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3943-663X","authenticated-orcid":false,"given":"Anthony","family":"Kougkas","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0661-7509","authenticated-orcid":false,"given":"Bogdan","family":"Nicolae","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"AI-assisted automated workflow for real-time x-ray ptychography data analysis via federated resources. arXiv preprint arXiv:2304.04297","author":"Babu V","year":"2023","unstructured":"Anakha\u00a0V Babu, Tekin Bicer, Saugat Kandel, Tao Zhou, Daniel\u00a0J Ching, Steven Henke, Sini\u0161a Veseli, Ryan Chard, Antonino Miceli, and Mathew\u00a0Joseph Cherukara. 2023. AI-assisted automated workflow for real-time x-ray ptychography data analysis via federated resources. arXiv preprint arXiv:2304.04297 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Workshop report on basic research needs for scientific machine learning: Core technologies for artificial intelligence. Technical Report. USDOE Office of Science (SC), Washington, DC (United States).","author":"Baker Nathan","year":"2019","unstructured":"Nathan Baker, Frank Alexander, Timo Bremer, Aric Hagberg, Yannis Kevrekidis, Habib Najm, Manish Parashar, Abani Patra, James Sethian, Stefan Wild, 2019. Workshop report on basic research needs for scientific machine learning: Core technologies for artificial intelligence. Technical Report. USDOE Office of Science (SC), Washington, DC (United States)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0217751X19300199"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337902"},{"key":"e_1_3_2_1_5_1","volume-title":"Clipper: A Low-Latency Online Prediction Serving System.. In NSDI, Vol.\u00a017. 613\u2013627.","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Giulio Zhou, Michael\u00a0J Franklin, Joseph\u00a0E Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System.. In NSDI, Vol.\u00a017. 613\u2013627."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid54584.2022.00020"},{"key":"e_1_3_2_1_7_1","unstructured":"Tobias Domhan Jost\u00a0Tobias Springenberg and Frank Hutter. 2015. Speeding up automatic hyperparameter optimization of deep neural networks by extrapolation of learning curves. In Twenty-fourth international joint conference on artificial intelligence."},{"key":"e_1_3_2_1_8_1","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Eisenman Assaf","year":"2022","unstructured":"Assaf Eisenman, Kiran\u00a0Kumar Matam, Steven Ingram, Dheevatsa Mudigere, Raghuraman Krishnamoorthi, Krishnakumar Nair, Misha Smelyanskiy, and Murali Annavaram. 2022. Check-N-Run: a checkpointing system for training deep learning recommendation models. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22). 929\u2013943."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2020.09.004"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230574"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1107\/S2052252521011258"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593730"},{"key":"e_1_3_2_1_13_1","volume-title":"DataStates-LLM: Lazy Asynchronous Checkpointing for Large Language Models. In HPDC\u201924: The 33nd International Symposium on High-Performance Parallel and Distributed Computing","author":"Maurya Avinash","year":"2024","unstructured":"Avinash Maurya, Robert Underwood, Mustafa Rafique, Franck Cappello, and Bogdan Nicolae. 2024. DataStates-LLM: Lazy Asynchronous Checkpointing for Large Language Models. In HPDC\u201924: The 33nd International Symposium on High-Performance Parallel and Distributed Computing. Pisa, Italy."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00504"},{"key":"e_1_3_2_1_15_1","unstructured":"microsoft. 2023. ONNX Runtime: A cross-platform inference and training machine-learning accelerator. https:\/\/github.com\/microsoft\/onnxruntime"},{"key":"e_1_3_2_1_16_1","unstructured":"Jayashree Mohan Amar Phanishayee and Vijay Chidambaram. 2021. CheckFreq: Frequent Fine-Grained DNN Checkpointing.. In FAST Vol.\u00a021. 203\u2013216."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid49817.2020.00-76"},{"key":"e_1_3_2_1_18_1","volume-title":"DeepClone: Lightweight State Replication of Deep Learning Models for Data Parallel Training. In CLUSTER\u201920: The 2020 IEEE International Conference on Cluster Computing","author":"Nicolae Bogdan","year":"2020","unstructured":"Bogdan Nicolae, Justin\u00a0M Wozniak, Matthieu Dorier, and Franck Cappello. 2020. DeepClone: Lightweight State Replication of Deep Learning Models for Data Parallel Training. In CLUSTER\u201920: The 2020 IEEE International Conference on Cluster Computing. Kobe, Japan."},{"key":"e_1_3_2_1_19_1","unstructured":"NVIDIA. 2023. NVIDIA Triton Inference Server. https:\/\/docs.nvidia.com\/deeplearning\/triton-inference-server\/user-guide\/docs\/user_guide\/architecture.html"},{"key":"e_1_3_2_1_20_1","volume-title":"Tensorflow-serving: Flexible, high-performance ml serving. arXiv preprint arXiv:1712.06139","author":"Olston Christopher","year":"2017","unstructured":"Christopher Olston, Noah Fiedel, Kiril Gorovoy, Jeremiah Harmsen, Li Lao, Fangwei Li, Vinu Rajashekhar, Sukriti Ramesh, and Jordan Soyke. 2017. Tensorflow-serving: Flexible, high-performance ml serving. arXiv preprint arXiv:1712.06139 (2017)."},{"key":"e_1_3_2_1_21_1","unstructured":"pytorch. 2023. TorchServe: a flexible and easy to use tool for serving and scaling PyTorch models in production. https:\/\/github.com\/pytorch\/serve"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1029\/2020MS002203"},{"key":"e_1_3_2_1_23_1","volume-title":"5th USENIX Workshop on Hot Topics in Storage and File Systems (HotStorage 13)","author":"Shin Dong\u00a0In","year":"2013","unstructured":"Dong\u00a0In Shin, Young\u00a0Jin Yu, Hyeong\u00a0S Kim, Jae\u00a0Woo Choi, Heon\u00a0Y Yeom, 2013. Dynamic Interval Polling and Pipelined Post { I\/O} Processing for { Low-Latency} Storage Class Memory. In 5th USENIX Workshop on Hot Topics in Storage and File Systems (HotStorage 13)."},{"key":"e_1_3_2_1_24_1","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Sima Chijun","year":"2022","unstructured":"Chijun Sima, Yao Fu, Man-Kit Sit, Liyi Guo, Xuri Gong, Feng Lin, Junyu Wu, Yongsheng Li, Haidong Rong, Pierre-Louis Aublin, 2022. Ekko: A { Large-Scale} Deep Learning Recommender System with { Low-Latency} Model Update. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). 821\u2013839."},{"key":"e_1_3_2_1_25_1","volume-title":"EvoStore: Towards Scalable Storage of Evolving Learning Models. In HPDC\u201924: The 33nd International Symposium on High-Performance Parallel and Distributed Computing","author":"Underwood Robert","year":"2024","unstructured":"Robert Underwood, Meghana Madhyastha, Randal Burns, and Bogdan Nicolae. 2024. EvoStore: Towards Scalable Storage of Evolving Learning Models. In HPDC\u201924: The 33nd International Symposium on High-Performance Parallel and Distributed Computing. Pisa, Italy."},{"key":"e_1_3_2_1_26_1","volume-title":"The shape of learning curves: a review","author":"Viering Tom","year":"2022","unstructured":"Tom Viering and Marco Loog. 2022. The shape of learning curves: a review. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1038\/s43588-022-00264-7"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546765"},{"key":"e_1_3_2_1_29_1","volume-title":"A workflow framework for machine learning applied to cancer research. BMC bioinformatics 19, 18","author":"Wozniak M","year":"2018","unstructured":"Justin\u00a0M Wozniak, Rajeev Jain, Prasanna Balaprakash, Jonathan Ozik, Nicholson\u00a0T Collier, John Bauer, Fangfang Xia, Thomas Brettin, Rick Stevens, Jamaludin Mohd-Yusof, 2018. CANDLE\/Supervisor: A workflow framework for machine learning applied to cancer research. BMC bioinformatics 19, 18 (2018), 59\u201369."}],"event":{"name":"ICPP '24: the 53rd International Conference on Parallel Processing","acronym":"ICPP '24","location":"Gotland Sweden"},"container-title":["Proceedings of the 53rd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673070","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3673038.3673070","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673038.3673070","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673038.3673070","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T17:32:47Z","timestamp":1758648767000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673070"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,12]]},"references-count":29,"alternative-id":["10.1145\/3673038.3673070","10.1145\/3673038"],"URL":"https:\/\/doi.org\/10.1145\/3673038.3673070","relation":{},"subject":[],"published":{"date-parts":[[2024,8,12]]},"assertion":[{"value":"2024-08-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}